habanalabs: add RMWREG32_SHIFTED to set a val within a mask
[platform/kernel/linux-starfive.git] / drivers / misc / habanalabs / gaudi2 / gaudi2.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
16
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21
22 #define GAUDI2_DMA_POOL_BLK_SIZE                SZ_256          /* 256 bytes */
23
24 #define GAUDI2_RESET_TIMEOUT_MSEC               2000            /* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC          50000           /* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC         25000           /* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC         25000           /* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC     3000000         /* 3s */
29 #define GAUDI2_RESET_POLL_CNT                   3
30 #define GAUDI2_RESET_WAIT_MSEC                  1               /* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC              100             /* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC             1000            /* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT                   512
34 #define GAUDI2_CB_POOL_CB_SIZE                  SZ_128K         /* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC          4000000         /* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC         25000000        /* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC             100000          /* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC        1000000         /* 1s */
39
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT          3
41
42 /*
43  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44  * and the code relies on that value (for array size etc..) we define another value
45  * for MAX faulty TPCs which reflects the cluster binning requirements
46  */
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS         1
48 #define MAX_FAULTY_XBARS                        1
49 #define MAX_FAULTY_EDMAS                        1
50 #define MAX_FAULTY_DECODERS                     1
51
52 #define GAUDI2_TPC_FULL_MASK                    0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK               0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK                0x3FF
55
56 #define GAUDI2_NUM_OF_QM_ERR_CAUSE              18
57 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE          25
58 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE          3
59 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE         14
60 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE         3
61 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE          2
62 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE             22
63 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE            30
64 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE             25
65 #define GAUDI2_NUM_OF_MME_ERR_CAUSE             16
66 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE        5
67 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE         7
68 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE       8
69 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE         19
70 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE             9
71 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE          3
72 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE   3
73 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE      2
74 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE       2
75 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE       2
76 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE          5
77
78 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC       (MMU_CONFIG_TIMEOUT_USEC * 10)
79 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC            (MMU_CONFIG_TIMEOUT_USEC * 200)
80 #define GAUDI2_ARB_WDT_TIMEOUT                  (0x1000000)
81
82 #define GAUDI2_VDEC_TIMEOUT_USEC                10000           /* 10ms */
83 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC           (GAUDI2_VDEC_TIMEOUT_USEC * 100)
84
85 #define KDMA_TIMEOUT_USEC                       USEC_PER_SEC
86
87 #define IS_DMA_IDLE(dma_core_idle_ind_mask)     \
88         (!((dma_core_idle_ind_mask) &           \
89         ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
90         (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
91
92 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
93
94 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
95
96 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
97         ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
98         (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
99         (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
100
101 #define PCIE_DEC_EN_MASK                        0x300
102 #define DEC_WORK_STATE_IDLE                     0
103 #define DEC_WORK_STATE_PEND                     3
104 #define IS_DEC_IDLE(dec_swreg15) \
105         (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
106         ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
107
108 /* HBM MMU address scrambling parameters */
109 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE            SZ_8M
110 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT           26
111 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT           0
112 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK        DRAM_VA_HINT_MASK
113 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR  16
114 #define MMU_RANGE_INV_VA_LSB_SHIFT              12
115 #define MMU_RANGE_INV_VA_MSB_SHIFT              44
116 #define MMU_RANGE_INV_EN_SHIFT                  0
117 #define MMU_RANGE_INV_ASID_EN_SHIFT             1
118 #define MMU_RANGE_INV_ASID_SHIFT                2
119
120 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
121  * a 2 entries FIFO, and hence it is not enabled for it.
122  */
123 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK         GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
124 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK         GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
125
126 #define GAUDI2_MAX_STRING_LEN                   64
127
128 #define GAUDI2_VDEC_MSIX_ENTRIES                (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
129                                                         GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
130
131 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
132
133 enum hl_pmmu_fatal_cause {
134         LATENCY_RD_OUT_FIFO_OVERRUN,
135         LATENCY_WR_OUT_FIFO_OVERRUN,
136 };
137
138 enum hl_pcie_drain_ind_cause {
139         LBW_AXI_DRAIN_IND,
140         HBW_AXI_DRAIN_IND
141 };
142
143 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
144         [HBM_ID0] = 0xFFFC,
145         [HBM_ID1] = 0xFFCF,
146         [HBM_ID2] = 0xF7F7,
147         [HBM_ID3] = 0x7F7F,
148         [HBM_ID4] = 0xFCFF,
149         [HBM_ID5] = 0xCFFF,
150 };
151
152 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
153         [0] = HBM_ID0,
154         [1] = HBM_ID1,
155         [2] = HBM_ID4,
156         [3] = HBM_ID5,
157 };
158
159 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
160         [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
161         [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
162         [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
163         [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
164         [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
165         [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
166         [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
167         [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
168 };
169
170 static const int gaudi2_qman_async_event_id[] = {
171         [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
172         [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
173         [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
174         [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
175         [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
176         [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
177         [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
178         [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
179         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
180         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
181         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
182         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
183         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
184         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
185         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
186         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
187         [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
188         [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
189         [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
190         [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
191         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
192         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
193         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
194         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
195         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
196         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
197         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
198         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
199         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
200         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
201         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
202         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
203         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
204         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
205         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
206         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
207         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
208         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
209         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
210         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
211         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
212         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
213         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
214         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
215         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
216         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
217         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
218         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
219         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
220         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
221         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
222         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
223         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
224         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
225         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
226         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
227         [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
228         [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
229         [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
230         [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
231         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
232         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
233         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
234         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
235         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
236         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
237         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
238         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
239         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
240         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
241         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
242         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
243         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
244         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
245         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
246         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
247         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
248         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
249         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
250         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
251         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
252         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
253         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
254         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
255         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
256         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
257         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
258         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
259         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
260         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
261         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
262         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
263         [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
264         [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
265         [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
266         [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
267         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
268         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
269         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
270         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
271         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
272         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
273         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
274         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
275         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
276         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
277         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
278         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
279         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
280         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
281         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
282         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
283         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
284         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
285         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
286         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
287         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
288         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
289         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
290         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
291         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
292         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
293         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
294         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
295         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
296         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
297         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
298         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
299         [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
300         [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
301         [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
302         [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
303         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
304         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
305         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
306         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
307         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
308         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
309         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
310         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
311         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
312         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
313         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
314         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
315         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
316         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
317         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
318         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
319         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
320         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
321         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
322         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
323         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
324         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
325         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
326         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
327         [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
328         [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
329         [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
330         [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
331         [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
332         [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
333         [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
334         [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
335         [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
336         [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
337         [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
338         [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
339         [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
340         [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
341         [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
342         [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
343         [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
344         [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
345         [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
346         [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
347         [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
348         [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
349         [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
350         [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
351         [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
352         [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
353         [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
354         [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
355         [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
356         [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
357         [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
358         [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
359         [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
360         [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
361         [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
362         [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
363         [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
364         [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
365         [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
366         [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
367         [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
368         [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
369         [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
370         [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
371         [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
372         [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
373         [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
374         [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
375         [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
376         [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
377         [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
378         [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
379         [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
380         [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
381         [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
382         [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
383         [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
384         [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
385         [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
386         [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
387         [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
388         [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
389         [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
390         [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
391         [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
392         [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
393         [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
394         [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
395         [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
396         [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
397         [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
398         [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
399         [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
400         [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
401         [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
402         [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
403         [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
404         [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
405         [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
406         [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
407         [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
408         [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
409         [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
410         [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
411         [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
412         [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
413         [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
414         [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
415         [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
416         [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
417         [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
418         [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
419         [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
420         [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
421         [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
422         [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
423         [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
424         [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425         [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
426         [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
427         [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
428         [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
429         [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
430         [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
431 };
432
433 static const int gaudi2_dma_core_async_event_id[] = {
434         [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
435         [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
436         [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
437         [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
438         [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
439         [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
440         [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
441         [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
442         [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
443         [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
444         [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
445 };
446
447 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
448         "qman sei intr",
449         "arc sei intr"
450 };
451
452 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
453         "AXI_TERMINATOR WR",
454         "AXI_TERMINATOR RD",
455         "AXI SPLIT SEI Status"
456 };
457
458 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
459         "cbu_bresp_sei_intr_cause",
460         "cbu_rresp_sei_intr_cause",
461         "lbu_bresp_sei_intr_cause",
462         "lbu_rresp_sei_intr_cause",
463         "cbu_axi_split_intr_cause",
464         "lbu_axi_split_intr_cause",
465         "arc_ip_excptn_sei_intr_cause",
466         "dmi_bresp_sei_intr_cause",
467         "aux2apb_err_sei_intr_cause",
468         "cfg_lbw_wr_terminated_intr_cause",
469         "cfg_lbw_rd_terminated_intr_cause",
470         "cfg_dccm_wr_terminated_intr_cause",
471         "cfg_dccm_rd_terminated_intr_cause",
472         "cfg_hbw_rd_terminated_intr_cause"
473 };
474
475 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
476         "msix_vcd_hbw_sei",
477         "msix_l2c_hbw_sei",
478         "msix_nrm_hbw_sei",
479         "msix_abnrm_hbw_sei",
480         "msix_vcd_lbw_sei",
481         "msix_l2c_lbw_sei",
482         "msix_nrm_lbw_sei",
483         "msix_abnrm_lbw_sei",
484         "apb_vcd_lbw_sei",
485         "apb_l2c_lbw_sei",
486         "apb_nrm_lbw_sei",
487         "apb_abnrm_lbw_sei",
488         "dec_sei",
489         "dec_apb_sei",
490         "trc_apb_sei",
491         "lbw_mstr_if_sei",
492         "axi_split_bresp_err_sei",
493         "hbw_axi_wr_viol_sei",
494         "hbw_axi_rd_viol_sei",
495         "lbw_axi_wr_viol_sei",
496         "lbw_axi_rd_viol_sei",
497         "vcd_spi",
498         "l2c_spi",
499         "nrm_spi",
500         "abnrm_spi",
501 };
502
503 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
504         "PQ AXI HBW error",
505         "CQ AXI HBW error",
506         "CP AXI HBW error",
507         "CP error due to undefined OPCODE",
508         "CP encountered STOP OPCODE",
509         "CP AXI LBW error",
510         "CP WRREG32 or WRBULK returned error",
511         "N/A",
512         "FENCE 0 inc over max value and clipped",
513         "FENCE 1 inc over max value and clipped",
514         "FENCE 2 inc over max value and clipped",
515         "FENCE 3 inc over max value and clipped",
516         "FENCE 0 dec under min value and clipped",
517         "FENCE 1 dec under min value and clipped",
518         "FENCE 2 dec under min value and clipped",
519         "FENCE 3 dec under min value and clipped",
520         "CPDMA Up overflow",
521         "PQC L2H error"
522 };
523
524 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
525         "RSVD0",
526         "CQ AXI HBW error",
527         "CP AXI HBW error",
528         "CP error due to undefined OPCODE",
529         "CP encountered STOP OPCODE",
530         "CP AXI LBW error",
531         "CP WRREG32 or WRBULK returned error",
532         "N/A",
533         "FENCE 0 inc over max value and clipped",
534         "FENCE 1 inc over max value and clipped",
535         "FENCE 2 inc over max value and clipped",
536         "FENCE 3 inc over max value and clipped",
537         "FENCE 0 dec under min value and clipped",
538         "FENCE 1 dec under min value and clipped",
539         "FENCE 2 dec under min value and clipped",
540         "FENCE 3 dec under min value and clipped",
541         "CPDMA Up overflow",
542         "RSVD17",
543         "CQ_WR_IFIFO_CI_ERR",
544         "CQ_WR_CTL_CI_ERR",
545         "ARC_CQF_RD_ERR",
546         "ARC_CQ_WR_IFIFO_CI_ERR",
547         "ARC_CQ_WR_CTL_CI_ERR",
548         "ARC_AXI_ERR",
549         "CP_SWITCH_WDT_ERR"
550 };
551
552 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
553         "Choice push while full error",
554         "Choice Q watchdog error",
555         "MSG AXI LBW returned with error"
556 };
557
558 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
559         "qm_axi_err",
560         "qm_trace_fence_events",
561         "qm_sw_err",
562         "qm_cp_sw_stop",
563         "lbw_mstr_rresp_err",
564         "lbw_mstr_bresp_err",
565         "lbw_msg_slverr",
566         "hbw_msg_slverr",
567         "wbc_slverr",
568         "hbw_mstr_rresp_err",
569         "hbw_mstr_bresp_err",
570         "sb_resp_intr",
571         "mrsb_resp_intr",
572         "core_dw_status_0",
573         "core_dw_status_1",
574         "core_dw_status_2",
575         "core_dw_status_3",
576         "core_dw_status_4",
577         "core_dw_status_5",
578         "core_dw_status_6",
579         "core_dw_status_7",
580         "async_arc2cpu_sei_intr",
581 };
582
583 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
584         "tpc_address_exceed_slm",
585         "tpc_div_by_0",
586         "tpc_spu_mac_overflow",
587         "tpc_spu_addsub_overflow",
588         "tpc_spu_abs_overflow",
589         "tpc_spu_fma_fp_dst_nan",
590         "tpc_spu_fma_fp_dst_inf",
591         "tpc_spu_convert_fp_dst_nan",
592         "tpc_spu_convert_fp_dst_inf",
593         "tpc_spu_fp_dst_denorm",
594         "tpc_vpu_mac_overflow",
595         "tpc_vpu_addsub_overflow",
596         "tpc_vpu_abs_overflow",
597         "tpc_vpu_convert_fp_dst_nan",
598         "tpc_vpu_convert_fp_dst_inf",
599         "tpc_vpu_fma_fp_dst_nan",
600         "tpc_vpu_fma_fp_dst_inf",
601         "tpc_vpu_fp_dst_denorm",
602         "tpc_assertions",
603         "tpc_illegal_instruction",
604         "tpc_pc_wrap_around",
605         "tpc_qm_sw_err",
606         "tpc_hbw_rresp_err",
607         "tpc_hbw_bresp_err",
608         "tpc_lbw_rresp_err",
609         "tpc_lbw_bresp_err",
610         "st_unlock_already_locked",
611         "invalid_lock_access",
612         "LD_L protection violation",
613         "ST_L protection violation",
614 };
615
616 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
617         "agu_resp_intr",
618         "qman_axi_err",
619         "wap sei (wbc axi err)",
620         "arc sei",
621         "cfg access error",
622         "qm_sw_err",
623         "sbte_dbg_intr_0",
624         "sbte_dbg_intr_1",
625         "sbte_dbg_intr_2",
626         "sbte_dbg_intr_3",
627         "sbte_dbg_intr_4",
628         "sbte_prtn_intr_0",
629         "sbte_prtn_intr_1",
630         "sbte_prtn_intr_2",
631         "sbte_prtn_intr_3",
632         "sbte_prtn_intr_4",
633 };
634
635 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
636         "i0",
637         "i1",
638         "i2",
639         "i3",
640         "i4",
641 };
642
643 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
644         "WBC ERR RESP_0",
645         "WBC ERR RESP_1",
646         "AP SOURCE POS INF",
647         "AP SOURCE NEG INF",
648         "AP SOURCE NAN",
649         "AP RESULT POS INF",
650         "AP RESULT NEG INF",
651 };
652
653 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
654         "HBW Read returned with error RRESP",
655         "HBW write returned with error BRESP",
656         "LBW write returned with error BRESP",
657         "descriptor_fifo_overflow",
658         "KDMA SB LBW Read returned with error",
659         "KDMA WBC LBW Write returned with error",
660         "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
661         "WRONG CFG FOR COMMIT IN LIN DMA"
662 };
663
664 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
665         "HBW/LBW Read returned with error RRESP",
666         "HBW/LBW write returned with error BRESP",
667         "LBW write returned with error BRESP",
668         "descriptor_fifo_overflow",
669         "KDMA SB LBW Read returned with error",
670         "KDMA WBC LBW Write returned with error",
671         "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
672         "WRONG CFG FOR COMMIT IN LIN DMA"
673 };
674
675 struct gaudi2_sm_sei_cause_data {
676         const char *cause_name;
677         const char *log_name;
678         u32 log_mask;
679 };
680
681 static const struct gaudi2_sm_sei_cause_data
682 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
683         {"calculated SO value overflow/underflow", "SOB group ID", 0x7FF},
684         {"payload address of monitor is not aligned to 4B", "monitor addr", 0xFFFF},
685         {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id", 0xFFFF},
686 };
687
688 static const char * const
689 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
690         "LATENCY_RD_OUT_FIFO_OVERRUN",
691         "LATENCY_WR_OUT_FIFO_OVERRUN",
692 };
693
694 static const char * const
695 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
696         "LATENCY_RD_OUT_FIFO_OVERRUN",
697         "LATENCY_WR_OUT_FIFO_OVERRUN",
698 };
699
700 static const char * const
701 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
702         "AXI drain HBW",
703         "AXI drain LBW",
704 };
705
706 static const char * const
707 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
708         "HBW error response",
709         "LBW error response",
710         "TLP is blocked by RR"
711 };
712
713 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
714         [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
715         [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
716         [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
717         [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
718         [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
719         [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
720         [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
721         [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
722         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
723         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
724         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
725         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
726         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
727         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
728         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
729         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
730         [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
731         [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
732         [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
733         [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
734         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
735         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
736         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
737         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
738         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
739         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
740         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
741         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
742         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
743         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
744         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
745         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
746         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
747         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
748         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
749         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
750         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
751         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
752         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
753         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
754         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
755         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
756         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
757         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
758         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
759         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
760         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
761         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
762         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
763         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
764         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
765         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
766         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
767         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
768         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
769         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
770         [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
771         [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
772         [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
773         [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
774         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
775         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
776         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
777         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
778         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
779         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
780         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
781         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
782         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
783         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
784         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
785         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
786         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
787         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
788         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
789         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
790         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
791         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
792         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
793         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
794         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
795         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
796         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
797         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
798         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
799         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
800         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
801         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
802         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
803         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
804         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
805         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
806         [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
807         [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
808         [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
809         [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
810         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
811         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
812         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
813         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
814         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
815         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
816         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
817         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
818         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
819         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
820         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
821         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
822         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
823         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
824         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
825         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
826         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
827         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
828         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
829         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
830         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
831         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
832         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
833         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
834         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
835         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
836         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
837         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
838         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
839         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
840         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
841         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
842         [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
843         [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
844         [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
845         [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
846         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
847         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
848         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
849         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
850         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
851         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
852         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
853         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
854         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
855         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
856         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
857         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
858         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
859         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
860         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
861         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
862         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
863         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
864         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
865         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
866         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
867         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
868         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
869         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
870         [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
871         [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
872         [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
873         [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
874         [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
875         [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
876         [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
877         [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
878         [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
879         [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
880         [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
881         [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
882         [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
883         [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
884         [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
885         [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
886         [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
887         [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
888         [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
889         [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
890         [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
891         [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
892         [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
893         [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
894         [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
895         [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
896         [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
897         [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
898         [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
899         [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
900         [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
901         [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
902         [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
903         [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
904         [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
905         [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
906         [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
907         [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
908         [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
909         [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
910         [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
911         [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
912         [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
913         [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
914         [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
915         [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
916         [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
917         [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
918         [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
919         [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
920         [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
921         [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
922         [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
923         [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
924         [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
925         [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
926         [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
927         [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
928         [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
929         [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
930         [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
931         [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
932         [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
933         [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
934         [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
935         [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
936         [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
937         [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
938         [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
939         [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
940         [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
941         [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
942         [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
943         [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
944         [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
945         [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
946         [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
947         [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
948         [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
949         [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
950         [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
951         [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
952         [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
953         [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
954         [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
955         [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
956         [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
957         [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
958         [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
959         [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
960         [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
961         [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
962         [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
963         [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
964         [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
965         [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
966         [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
967         [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
968         [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
969         [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
970         [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
971         [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
972         [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
973         [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
974 };
975
976 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
977         [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
978         [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
979         [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
980         [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
981         [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
982         [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
983         [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
984         [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
985         [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
986         [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
987         [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
988         [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
989         [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
990         [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
991         [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
992         [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
993         [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
994         [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
995         [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
996         [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
997         [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
998         [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
999         [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1000         [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1001         [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1002         [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1003         [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1004         [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1005         [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1006         [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1007         [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1008         [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1009         [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1010         [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1011         [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1012         [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1013         [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1014         [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1015         [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1016         [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1017         [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1018         [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1019         [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1020         [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1021         [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1022         [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1023         [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1024         [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1025         [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1026         [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1027         [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1028         [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1029         [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1030         [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1031         [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1032         [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1033         [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1034         [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1035         [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1036         [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1037         [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1038         [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1039         [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1040         [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1041         [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1042         [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1043         [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1044         [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1045         [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1046 };
1047
1048 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1049         [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1050         [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1051         [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1052         [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1053         [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1054         [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1055         [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1056         [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1057         [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1058         [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1059         [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1060         [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1061         [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1062         [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1063         [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1064         [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1065         [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1066         [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1067         [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1068         [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1069         [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1070         [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1071         [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1072         [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1073         [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1074         [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1075         [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1076         [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1077         [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1078         [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1079         [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1080         [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1081         [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1082         [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1083         [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1084         [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1085         [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1086         [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1087         [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1088         [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1089         [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1090         [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1091         [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1092         [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1093         [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1094         [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1095         [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1096         [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1097         [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1098         [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1099         [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1100         [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1101         [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1102         [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1103         [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1104         [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1105         [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1106         [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1107         [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1108         [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1109         [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1110         [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1111         [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1112         [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1113         [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1114         [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1115         [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1116         [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1117         [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1118 };
1119
1120 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1121         [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1122         [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1123         [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1124         [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1125 };
1126
1127 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1128         [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1129         [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1130         [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1131         [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1132         [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1133         [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1134         [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1135         [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1136         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1137         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1138         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1139         [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1140         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1141         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1142         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1143         [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1144         [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1145         [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1146         [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1147         [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1148         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1149         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1150         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1151         [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1152         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1153         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1154         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1155         [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1156         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1157         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1158         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1159         [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1160         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1161         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1162         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1163         [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1164         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1165         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1166         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1167         [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1168         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1169         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1170         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1171         [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1172         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1173         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1174         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1175         [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1176         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1177         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1178         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1179         [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1180         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1181         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1182         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1183         [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1184         [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1185         [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1186         [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1187         [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1188         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1189         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1190         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1191         [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1192         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1193         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1194         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1195         [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1196         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1197         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1198         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1199         [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1200         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1201         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1202         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1203         [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1204         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1205         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1206         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1207         [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1208         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1209         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1210         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1211         [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1212         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1213         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1214         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1215         [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1216         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1217         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1218         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1219         [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1220         [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1221         [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1222         [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1223         [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1224         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1225         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1226         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1227         [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1228         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1229         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1230         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1231         [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1232         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1233         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1234         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1235         [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1236         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1237         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1238         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1239         [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1240         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1241         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1242         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1243         [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1244         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1245         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1246         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1247         [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1248         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1249         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1250         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1251         [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1252         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1253         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1254         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1255         [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1256         [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1257         [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1258         [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1259         [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1260         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1261         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1262         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1263         [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1264         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1265         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1266         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1267         [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1268         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1269         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1270         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1271         [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1272         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1273         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1274         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1275         [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1276         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1277         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1278         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1279         [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1280         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1281         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1282         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1283         [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1284         [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1285         [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1286         [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1287         [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1288         [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1289         [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1290         [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1291         [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1292         [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1293         [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1294         [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1295         [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1296         [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1297         [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1298         [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1299         [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1300         [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1301         [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1302         [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1303         [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1304         [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1305         [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1306         [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1307         [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1308         [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1309         [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1310         [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1311         [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1312         [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1313         [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1314         [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1315         [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1316         [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1317         [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1318         [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1319         [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1320         [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1321         [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1322         [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1323         [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1324         [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1325         [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1326         [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1327         [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1328         [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1329         [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1330         [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1331         [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1332         [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1333         [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1334         [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1335         [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1336         [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1337         [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1338         [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1339         [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1340         [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1341         [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1342         [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1343         [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1344         [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1345         [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1346         [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1347         [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1348         [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1349         [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1350         [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1351         [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1352         [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1353         [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1354         [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1355         [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1356         [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1357         [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1358         [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1359         [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1360         [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1361         [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1362         [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1363         [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1364         [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1365         [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1366         [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1367         [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1368         [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1369         [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1370         [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1371         [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1372         [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1373         [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1374         [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1375         [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1376         [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1377         [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1378         [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1379         [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1380         [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1381         [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1382         [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1383         [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1384         [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1385         [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1386         [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1387         [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1388 };
1389
1390 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1391         [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1392         [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1393         [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1394         [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1395         [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1396         [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1397         [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1398         [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1399         [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1400         [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1401         [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1402 };
1403
1404 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1405         [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1406         [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1407         [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1408         [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1409 };
1410
1411 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1412         [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1413         [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1414         [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1415         [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1416         [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1417         [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1418         [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1419         [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1420         [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1421         [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1422         [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1423         [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1424         [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1425         [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1426         [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1427         [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1428         [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1429         [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1430         [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1431         [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1432         [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1433         [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1434         [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1435         [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1436         [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1437 };
1438
1439 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1440         [ROTATOR_ID_0] = mmROT0_BASE,
1441         [ROTATOR_ID_1] = mmROT1_BASE
1442 };
1443
1444 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1445         [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1446         [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1447         [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1448         [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1449         [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1450         [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1451         [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1452         [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1453         [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1454         [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1455         [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1456         [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1457         [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1458         [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1459         [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1460         [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1461         [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1462         [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1463         [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1464         [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1465         [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1466         [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1467         [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1468         [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1469         [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1470 };
1471
1472 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1473         [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1474         [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1475 };
1476
1477 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1478         GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1479         GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1480         GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1481         GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1482         GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1483         GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1484         GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1485         GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1486 };
1487
1488 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1489         "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1490         "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1491         "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1492         "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1493         "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1494         "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1495         "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1496         "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1497         "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1498         "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1499 };
1500
1501 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1502         RTR_ID_X_Y(2, 4),
1503         RTR_ID_X_Y(3, 4),
1504         RTR_ID_X_Y(4, 4),
1505         RTR_ID_X_Y(5, 4),
1506         RTR_ID_X_Y(6, 4),
1507         RTR_ID_X_Y(7, 4),
1508         RTR_ID_X_Y(8, 4),
1509         RTR_ID_X_Y(9, 4),
1510         RTR_ID_X_Y(10, 4),
1511         RTR_ID_X_Y(11, 4),
1512         RTR_ID_X_Y(12, 4),
1513         RTR_ID_X_Y(13, 4),
1514         RTR_ID_X_Y(14, 4),
1515         RTR_ID_X_Y(15, 4),
1516         RTR_ID_X_Y(16, 4),
1517         RTR_ID_X_Y(17, 4),
1518         RTR_ID_X_Y(2, 11),
1519         RTR_ID_X_Y(3, 11),
1520         RTR_ID_X_Y(4, 11),
1521         RTR_ID_X_Y(5, 11),
1522         RTR_ID_X_Y(6, 11),
1523         RTR_ID_X_Y(7, 11),
1524         RTR_ID_X_Y(8, 11),
1525         RTR_ID_X_Y(9, 11),
1526         RTR_ID_X_Y(0, 0),/* 24 no id */
1527         RTR_ID_X_Y(0, 0),/* 25 no id */
1528         RTR_ID_X_Y(0, 0),/* 26 no id */
1529         RTR_ID_X_Y(0, 0),/* 27 no id */
1530         RTR_ID_X_Y(14, 11),
1531         RTR_ID_X_Y(15, 11),
1532         RTR_ID_X_Y(16, 11),
1533         RTR_ID_X_Y(17, 11)
1534 };
1535
1536 enum rtr_id {
1537         DCORE0_RTR0,
1538         DCORE0_RTR1,
1539         DCORE0_RTR2,
1540         DCORE0_RTR3,
1541         DCORE0_RTR4,
1542         DCORE0_RTR5,
1543         DCORE0_RTR6,
1544         DCORE0_RTR7,
1545         DCORE1_RTR0,
1546         DCORE1_RTR1,
1547         DCORE1_RTR2,
1548         DCORE1_RTR3,
1549         DCORE1_RTR4,
1550         DCORE1_RTR5,
1551         DCORE1_RTR6,
1552         DCORE1_RTR7,
1553         DCORE2_RTR0,
1554         DCORE2_RTR1,
1555         DCORE2_RTR2,
1556         DCORE2_RTR3,
1557         DCORE2_RTR4,
1558         DCORE2_RTR5,
1559         DCORE2_RTR6,
1560         DCORE2_RTR7,
1561         DCORE3_RTR0,
1562         DCORE3_RTR1,
1563         DCORE3_RTR2,
1564         DCORE3_RTR3,
1565         DCORE3_RTR4,
1566         DCORE3_RTR5,
1567         DCORE3_RTR6,
1568         DCORE3_RTR7,
1569 };
1570
1571 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1572         DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1573         DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1574         DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1575         DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1576         DCORE0_RTR0
1577 };
1578
1579 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1580         DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1581         DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1582 };
1583
1584 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1585         DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1586         DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1587 };
1588
1589 struct sft_info {
1590         u8 interface_id;
1591         u8 dcore_id;
1592 };
1593
1594 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1595         {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3},
1596 };
1597
1598 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1599         DCORE0_RTR0, DCORE0_RTR0
1600 };
1601
1602 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1603         DCORE2_RTR0, DCORE3_RTR7
1604 };
1605
1606 struct mme_initiators_rtr_id {
1607         u32 wap0;
1608         u32 wap1;
1609         u32 write;
1610         u32 read;
1611         u32 sbte0;
1612         u32 sbte1;
1613         u32 sbte2;
1614         u32 sbte3;
1615         u32 sbte4;
1616 };
1617
1618 enum mme_initiators {
1619         MME_WAP0 = 0,
1620         MME_WAP1,
1621         MME_WRITE,
1622         MME_READ,
1623         MME_SBTE0,
1624         MME_SBTE1,
1625         MME_SBTE2,
1626         MME_SBTE3,
1627         MME_SBTE4,
1628         MME_INITIATORS_MAX
1629 };
1630
1631 static const struct mme_initiators_rtr_id
1632 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1633         { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1634         .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1635         { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1636         .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1637         { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1638         .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1639         { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1640         .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1641 };
1642
1643 enum razwi_event_sources {
1644         RAZWI_TPC,
1645         RAZWI_MME,
1646         RAZWI_EDMA,
1647         RAZWI_PDMA,
1648         RAZWI_NIC,
1649         RAZWI_DEC,
1650         RAZWI_ROT
1651 };
1652
1653 struct hbm_mc_error_causes {
1654         u32 mask;
1655         char cause[50];
1656 };
1657
1658 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1659         {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1660         {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1661         {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1662         {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1663         {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1664 };
1665
1666 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1667         [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1668         [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1669         [HBM_SEI_READ_ERR] = "SEI read data error",
1670         [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1671         [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1672         [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1673         [HBM_SEI_DFI] = "SEI DFI error",
1674         [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1675         [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1676 };
1677
1678 struct mmu_spi_sei_cause {
1679         char cause[50];
1680         int clear_bit;
1681 };
1682
1683 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1684         {"page fault", 1},              /* INTERRUPT_CLR[1] */
1685         {"page access", 1},             /* INTERRUPT_CLR[1] */
1686         {"bypass ddr", 2},              /* INTERRUPT_CLR[2] */
1687         {"multi hit", 2},               /* INTERRUPT_CLR[2] */
1688         {"mmu rei0", -1},               /* no clear register bit */
1689         {"mmu rei1", -1},               /* no clear register bit */
1690         {"stlb rei0", -1},              /* no clear register bit */
1691         {"stlb rei1", -1},              /* no clear register bit */
1692         {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
1693         {"rr privileged read hit", 2},  /* INTERRUPT_CLR[2] */
1694         {"rr secure write hit", 2},     /* INTERRUPT_CLR[2] */
1695         {"rr secure read hit", 2},      /* INTERRUPT_CLR[2] */
1696         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
1697         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
1698         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
1699         {"bist_fail no use", 2},        /* INTERRUPT_CLR[2] */
1700         {"slave error", 16},            /* INTERRUPT_CLR[16] */
1701         {"dec error", 17},              /* INTERRUPT_CLR[17] */
1702         {"burst fifo full", 2}          /* INTERRUPT_CLR[2] */
1703 };
1704
1705 struct gaudi2_cache_invld_params {
1706         u64 start_va;
1707         u64 end_va;
1708         u32 inv_start_val;
1709         u32 flags;
1710         bool range_invalidation;
1711 };
1712
1713 struct gaudi2_tpc_idle_data {
1714         struct engines_data *e;
1715         unsigned long *mask;
1716         bool *is_idle;
1717         const char *tpc_fmt;
1718 };
1719
1720 struct gaudi2_tpc_mmu_data {
1721         u32 rw_asid;
1722 };
1723
1724 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1725
1726 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1727 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1728 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1729 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1730 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1731 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1732 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1733                                                                                 bool is_memset);
1734 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1735
1736 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1737 {
1738
1739 }
1740
1741 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1742 {
1743         return sizeof(struct packet_msg_short);
1744 }
1745
1746 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1747 {
1748         return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1749 }
1750
1751 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1752 {
1753         struct asic_fixed_properties *prop = &hdev->asic_prop;
1754         int dcore, inst, tpc_seq;
1755         u32 offset;
1756
1757         /* init the return code */
1758         ctx->rc = 0;
1759
1760         for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1761                 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1762                         tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1763
1764                         if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1765                                 continue;
1766
1767                         offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1768
1769                         ctx->fn(hdev, dcore, inst, offset, ctx);
1770                         if (ctx->rc) {
1771                                 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1772                                                         dcore, inst);
1773                                 return;
1774                         }
1775                 }
1776         }
1777
1778         if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1779                 return;
1780
1781         /* special check for PCI TPC (DCORE0_TPC6) */
1782         offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1783         ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1784         if (ctx->rc)
1785                 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1786 }
1787
1788 static bool gaudi2_host_phys_addr_valid(u64 addr)
1789 {
1790         if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1791                 return true;
1792
1793         return false;
1794 }
1795
1796 static int set_number_of_functional_hbms(struct hl_device *hdev)
1797 {
1798         struct asic_fixed_properties *prop = &hdev->asic_prop;
1799         u8 faulty_hbms = hweight64(hdev->dram_binning);
1800
1801         /* check if all HBMs should be used */
1802         if (!faulty_hbms) {
1803                 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1804                 prop->num_functional_hbms = GAUDI2_HBM_NUM;
1805                 return 0;
1806         }
1807
1808         /*
1809          * check for error condition in which number of binning
1810          * candidates is higher than the maximum supported by the
1811          * driver (in which case binning mask shall be ignored and driver will
1812          * set the default)
1813          */
1814         if (faulty_hbms > MAX_FAULTY_HBMS) {
1815                 dev_err(hdev->dev,
1816                         "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1817                         MAX_FAULTY_HBMS, hdev->dram_binning);
1818                 return -EINVAL;
1819         }
1820
1821         /*
1822          * by default, number of functional HBMs in Gaudi2 is always
1823          * GAUDI2_HBM_NUM - 1.
1824          */
1825         prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1826         return 0;
1827 }
1828
1829 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1830 {
1831         struct asic_fixed_properties *prop = &hdev->asic_prop;
1832         u32 basic_hbm_page_size;
1833         int rc;
1834
1835         rc = set_number_of_functional_hbms(hdev);
1836         if (rc)
1837                 return -EINVAL;
1838
1839         /*
1840          * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1841          * in which we are using x16 bigger page size to be able to populate the entire
1842          * HBM mappings in the TLB
1843          */
1844         basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1845         prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1846         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1847         prop->dram_size = prop->num_functional_hbms * SZ_16G;
1848         prop->dram_base_address = DRAM_PHYS_BASE;
1849         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1850         prop->dram_supports_virtual_memory = true;
1851
1852         prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1853         prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1854         prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1855         prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1856
1857         /* since DRAM page size differs from DMMU page size we need to allocate
1858          * DRAM memory in units of dram_page size and mapping this memory in
1859          * units of DMMU page size. we overcome this size mismatch using a
1860          * scrambling routine which takes a DRAM page and converts it to a DMMU
1861          * page.
1862          * We therefore:
1863          * 1. partition the virtual address space to DRAM-page (whole) pages.
1864          *    (suppose we get n such pages)
1865          * 2. limit the amount of virtual address space we got from 1 above to
1866          *    a multiple of 64M as we don't want the scrambled address to cross
1867          *    the DRAM virtual address space.
1868          *    ( m = (n * DRAM_page_size) / DMMU_page_size).
1869          * 3. determine the and address accordingly
1870          *    end_addr = start_addr + m * 48M
1871          *
1872          *    the DRAM address MSBs (63:48) are not part of the roundup calculation
1873          */
1874         prop->dmmu.start_addr = prop->dram_base_address +
1875                         (prop->dram_page_size *
1876                                 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1877
1878         prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1879                         div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1880
1881         return 0;
1882 }
1883
1884 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1885 {
1886         struct asic_fixed_properties *prop = &hdev->asic_prop;
1887         struct hw_queue_properties *q_props;
1888         u32 num_sync_stream_queues = 0;
1889         int i;
1890
1891         prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1892         prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1893                                         GFP_KERNEL);
1894
1895         if (!prop->hw_queues_props)
1896                 return -ENOMEM;
1897
1898         q_props = prop->hw_queues_props;
1899
1900         for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1901                 q_props[i].type = QUEUE_TYPE_HW;
1902                 q_props[i].driver_only = 0;
1903
1904                 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1905                         q_props[i].supports_sync_stream = 0;
1906                 } else {
1907                         q_props[i].supports_sync_stream = 1;
1908                         num_sync_stream_queues++;
1909                 }
1910
1911                 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1912         }
1913
1914         q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1915         q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1916         q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1917
1918         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1919         prop->cfg_base_address = CFG_BASE;
1920         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1921         prop->host_base_address = HOST_PHYS_BASE_0;
1922         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1923         prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1924         prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1925         prop->user_dec_intr_count = NUMBER_OF_DEC;
1926         prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1927         prop->completion_mode = HL_COMPLETION_MODE_CS;
1928         prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1929         prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1930
1931         prop->sram_base_address = SRAM_BASE_ADDR;
1932         prop->sram_size = SRAM_SIZE;
1933         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1934         prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1935
1936         prop->hints_range_reservation = true;
1937
1938         if (hdev->pldm)
1939                 prop->mmu_pgt_size = 0x800000; /* 8MB */
1940         else
1941                 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1942
1943         prop->mmu_pte_size = HL_PTE_SIZE;
1944         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1945         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1946
1947         prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1948         prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1949         prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1950         prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1951         prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1952         prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1953         prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1954         prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1955         prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1956         prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1957         prop->dmmu.page_size = PAGE_SIZE_1GB;
1958         prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1959         prop->dmmu.last_mask = LAST_MASK;
1960         prop->dmmu.host_resident = 1;
1961         /* TODO: will be duplicated until implementing per-MMU props */
1962         prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1963         prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1964
1965         /*
1966          * this is done in order to be able to validate FW descriptor (i.e. validating that
1967          * the addresses and allocated space for FW image does not cross memory bounds).
1968          * for this reason we set the DRAM size to the minimum possible and later it will
1969          * be modified according to what reported in the cpucp info packet
1970          */
1971         prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1972
1973         hdev->pmmu_huge_range = true;
1974         prop->pmmu.host_resident = 1;
1975         prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1976         prop->pmmu.last_mask = LAST_MASK;
1977         /* TODO: will be duplicated until implementing per-MMU props */
1978         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1979         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1980
1981         prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1982         prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1983         prop->hints_host_hpage_reserved_va_range.start_addr =
1984                         RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1985         prop->hints_host_hpage_reserved_va_range.end_addr =
1986                         RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1987
1988         if (PAGE_SIZE == SZ_64K) {
1989                 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1990                 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1991                 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1992                 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1993                 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1994                 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1995                 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1996                 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1997                 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1998                 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
1999                 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2000                 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2001                 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2002                 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2003                 prop->pmmu.page_size = PAGE_SIZE_64KB;
2004
2005                 /* shifts and masks are the same in PMMU and HPMMU */
2006                 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2007                 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2008                 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2009                 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2010         } else {
2011                 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2012                 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2013                 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2014                 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2015                 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2016                 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2017                 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2018                 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2019                 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2020                 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2021                 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2022                 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2023                 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2024                 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2025                 prop->pmmu.page_size = PAGE_SIZE_4KB;
2026
2027                 /* shifts and masks are the same in PMMU and HPMMU */
2028                 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2029                 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2030                 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2031                 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2032         }
2033
2034         prop->num_engine_cores = CPU_ID_MAX;
2035         prop->cfg_size = CFG_SIZE;
2036         prop->max_asid = MAX_ASID;
2037         prop->num_of_events = GAUDI2_EVENT_SIZE;
2038
2039         prop->dc_power_default = DC_POWER_DEFAULT;
2040
2041         prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2042         prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2043         prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2044         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2045
2046         strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2047
2048         prop->mme_master_slave_mode = 1;
2049
2050         prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2051                                         (num_sync_stream_queues * HL_RSVD_SOBS);
2052
2053         prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2054                                         (num_sync_stream_queues * HL_RSVD_MONS);
2055
2056         prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2057
2058         prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2059
2060         prop->fw_cpu_boot_dev_sts0_valid = false;
2061         prop->fw_cpu_boot_dev_sts1_valid = false;
2062         prop->hard_reset_done_by_fw = false;
2063         prop->gic_interrupts_enable = true;
2064
2065         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2066
2067         prop->max_dec = NUMBER_OF_DEC;
2068
2069         prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2070
2071         prop->dma_mask = 64;
2072
2073         return 0;
2074 }
2075
2076 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2077 {
2078         static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2079         bool is_wc[3] = {false, false, true};
2080         int rc;
2081
2082         rc = hl_pci_bars_map(hdev, name, is_wc);
2083         if (rc)
2084                 return rc;
2085
2086         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2087
2088         return 0;
2089 }
2090
2091 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2092 {
2093         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2094         struct hl_inbound_pci_region pci_region;
2095         u64 old_addr = addr;
2096         int rc;
2097
2098         if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2099                 return old_addr;
2100
2101         if (hdev->asic_prop.iatu_done_by_fw)
2102                 return U64_MAX;
2103
2104         /* Inbound Region 2 - Bar 4 - Point to DRAM */
2105         pci_region.mode = PCI_BAR_MATCH_MODE;
2106         pci_region.bar = DRAM_BAR_ID;
2107         pci_region.addr = addr;
2108         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2109         if (rc)
2110                 return U64_MAX;
2111
2112         if (gaudi2) {
2113                 old_addr = gaudi2->dram_bar_cur_addr;
2114                 gaudi2->dram_bar_cur_addr = addr;
2115         }
2116
2117         return old_addr;
2118 }
2119
2120 static int gaudi2_init_iatu(struct hl_device *hdev)
2121 {
2122         struct hl_inbound_pci_region inbound_region;
2123         struct hl_outbound_pci_region outbound_region;
2124         u32 bar_addr_low, bar_addr_high;
2125         int rc;
2126
2127         if (hdev->asic_prop.iatu_done_by_fw)
2128                 return 0;
2129
2130         /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2131          * We must map this region in BAR match mode in order to
2132          * fetch BAR physical base address
2133          */
2134         inbound_region.mode = PCI_BAR_MATCH_MODE;
2135         inbound_region.bar = SRAM_CFG_BAR_ID;
2136         /* Base address must be aligned to Bar size which is 256 MB */
2137         inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2138         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2139         if (rc)
2140                 return rc;
2141
2142         /* Fetch physical BAR address */
2143         bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2144         bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2145
2146         hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2147
2148         /* Inbound Region 0 - Bar 0 - Point to CFG */
2149         inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2150         inbound_region.bar = SRAM_CFG_BAR_ID;
2151         inbound_region.offset_in_bar = 0;
2152         inbound_region.addr = STM_FLASH_BASE_ADDR;
2153         inbound_region.size = CFG_REGION_SIZE;
2154         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2155         if (rc)
2156                 return rc;
2157
2158         /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2159         inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2160         inbound_region.bar = SRAM_CFG_BAR_ID;
2161         inbound_region.offset_in_bar = CFG_REGION_SIZE;
2162         inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2163         inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2164         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2165         if (rc)
2166                 return rc;
2167
2168         /* Inbound Region 2 - Bar 4 - Point to DRAM */
2169         inbound_region.mode = PCI_BAR_MATCH_MODE;
2170         inbound_region.bar = DRAM_BAR_ID;
2171         inbound_region.addr = DRAM_PHYS_BASE;
2172         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2173         if (rc)
2174                 return rc;
2175
2176         /* Outbound Region 0 - Point to Host */
2177         outbound_region.addr = HOST_PHYS_BASE_0;
2178         outbound_region.size = HOST_PHYS_SIZE_0;
2179         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2180
2181         return rc;
2182 }
2183
2184 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2185 {
2186         return RREG32(mmHW_STATE);
2187 }
2188
2189 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2190 {
2191         struct asic_fixed_properties *prop = &hdev->asic_prop;
2192
2193         /*
2194          * check for error condition in which number of binning candidates
2195          * is higher than the maximum supported by the driver
2196          */
2197         if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2198                 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2199                                         MAX_CLUSTER_BINNING_FAULTY_TPCS,
2200                                         hdev->tpc_binning);
2201                 return -EINVAL;
2202         }
2203
2204         prop->tpc_binning_mask = hdev->tpc_binning;
2205         prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2206
2207         return 0;
2208 }
2209
2210 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2211 {
2212         struct asic_fixed_properties *prop = &hdev->asic_prop;
2213         struct hw_queue_properties *q_props = prop->hw_queues_props;
2214         u64 tpc_binning_mask;
2215         u8 subst_idx = 0;
2216         int i, rc;
2217
2218         rc = gaudi2_tpc_binning_init_prop(hdev);
2219         if (rc)
2220                 return rc;
2221
2222         tpc_binning_mask = prop->tpc_binning_mask;
2223
2224         for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2225                 u8 subst_seq, binned, qid_base;
2226
2227                 if (tpc_binning_mask == 0)
2228                         break;
2229
2230                 if (subst_idx == 0) {
2231                         subst_seq = TPC_ID_DCORE0_TPC6;
2232                         qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2233                 } else {
2234                         subst_seq = TPC_ID_DCORE3_TPC5;
2235                         qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2236                 }
2237
2238
2239                 /* clear bit from mask */
2240                 binned = __ffs(tpc_binning_mask);
2241                 /*
2242                  * Coverity complains about possible out-of-bound access in
2243                  * clear_bit
2244                  */
2245                 if (binned >= TPC_ID_SIZE) {
2246                         dev_err(hdev->dev,
2247                                 "Invalid binned TPC (binning mask: %llx)\n",
2248                                 tpc_binning_mask);
2249                         return -EINVAL;
2250                 }
2251                 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2252
2253                 /* also clear replacing TPC bit from enabled mask */
2254                 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2255
2256                 /* bin substite TPC's Qs */
2257                 q_props[qid_base].binned = 1;
2258                 q_props[qid_base + 1].binned = 1;
2259                 q_props[qid_base + 2].binned = 1;
2260                 q_props[qid_base + 3].binned = 1;
2261
2262                 subst_idx++;
2263         }
2264
2265         return 0;
2266 }
2267
2268 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2269 {
2270         struct asic_fixed_properties *prop = &hdev->asic_prop;
2271         u8 num_faulty;
2272
2273         num_faulty = hweight32(hdev->decoder_binning);
2274
2275         /*
2276          * check for error condition in which number of binning candidates
2277          * is higher than the maximum supported by the driver
2278          */
2279         if (num_faulty > MAX_FAULTY_DECODERS) {
2280                 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2281                                                 hdev->decoder_binning);
2282                 return -EINVAL;
2283         }
2284
2285         prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2286
2287         if (prop->decoder_binning_mask)
2288                 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2289         else
2290                 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2291
2292         return 0;
2293 }
2294
2295 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2296 {
2297         struct asic_fixed_properties *prop = &hdev->asic_prop;
2298
2299         /* check if we should override default binning */
2300         if (!hdev->dram_binning) {
2301                 prop->dram_binning_mask = 0;
2302                 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2303                 return;
2304         }
2305
2306         /* set DRAM binning constraints */
2307         prop->faulty_dram_cluster_map |= hdev->dram_binning;
2308         prop->dram_binning_mask = hdev->dram_binning;
2309         prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2310 }
2311
2312 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2313 {
2314         struct asic_fixed_properties *prop = &hdev->asic_prop;
2315         struct hw_queue_properties *q_props;
2316         u8 seq, num_faulty;
2317
2318         num_faulty = hweight32(hdev->edma_binning);
2319
2320         /*
2321          * check for error condition in which number of binning candidates
2322          * is higher than the maximum supported by the driver
2323          */
2324         if (num_faulty > MAX_FAULTY_EDMAS) {
2325                 dev_err(hdev->dev,
2326                         "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2327                         hdev->edma_binning);
2328                 return -EINVAL;
2329         }
2330
2331         if (!hdev->edma_binning) {
2332                 prop->edma_binning_mask = 0;
2333                 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2334                 return 0;
2335         }
2336
2337         seq = __ffs((unsigned long)hdev->edma_binning);
2338
2339         /* set binning constraints */
2340         prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2341         prop->edma_binning_mask = hdev->edma_binning;
2342         prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2343
2344         /* bin substitute EDMA's queue */
2345         q_props = prop->hw_queues_props;
2346         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2347         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2348         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2349         q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2350
2351         return 0;
2352 }
2353
2354 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2355 {
2356         struct asic_fixed_properties *prop = &hdev->asic_prop;
2357         u8 num_faulty, seq;
2358
2359         /* check if we should override default binning */
2360         if (!xbar_edge_iso_mask) {
2361                 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2362                 return 0;
2363         }
2364
2365         /*
2366          * note that it can be set to value other than 0 only after cpucp packet (i.e.
2367          * only the FW can set a redundancy value). for user it'll always be 0.
2368          */
2369         num_faulty = hweight32(xbar_edge_iso_mask);
2370
2371         /*
2372          * check for error condition in which number of binning candidates
2373          * is higher than the maximum supported by the driver
2374          */
2375         if (num_faulty > MAX_FAULTY_XBARS) {
2376                 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2377                                                                         MAX_FAULTY_XBARS);
2378                 return -EINVAL;
2379         }
2380
2381         seq = __ffs((unsigned long)xbar_edge_iso_mask);
2382
2383         /* set binning constraints */
2384         prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2385         prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2386
2387         return 0;
2388 }
2389
2390 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2391 {
2392         int rc;
2393
2394         /*
2395          * mark all clusters as good, each component will "fail" cluster
2396          * based on eFuse/user values.
2397          * If more than single cluster is faulty- the chip is unusable
2398          */
2399         hdev->asic_prop.faulty_dram_cluster_map = 0;
2400
2401         gaudi2_set_dram_binning_masks(hdev);
2402
2403         rc = gaudi2_set_edma_binning_masks(hdev);
2404         if (rc)
2405                 return rc;
2406
2407         rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2408         if (rc)
2409                 return rc;
2410
2411
2412         /* always initially set to full mask */
2413         hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2414
2415         return 0;
2416 }
2417
2418 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2419 {
2420         struct asic_fixed_properties *prop = &hdev->asic_prop;
2421         int rc;
2422
2423         rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2424         if (rc)
2425                 return rc;
2426
2427         /* if we have DRAM binning reported by FW we should perform cluster config  */
2428         if (prop->faulty_dram_cluster_map) {
2429                 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2430
2431                 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2432         }
2433
2434         return 0;
2435 }
2436
2437 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2438 {
2439         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2440         struct asic_fixed_properties *prop = &hdev->asic_prop;
2441         long max_power;
2442         u64 dram_size;
2443         int rc;
2444
2445         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2446                 return 0;
2447
2448         /* No point of asking this information again when not doing hard reset, as the device
2449          * CPU hasn't been reset
2450          */
2451         if (hdev->reset_info.in_compute_reset)
2452                 return 0;
2453
2454         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2455                                                                                 mmCPU_BOOT_ERR1);
2456         if (rc)
2457                 return rc;
2458
2459         dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2460         if (dram_size) {
2461                 /* we can have wither 5 or 6 HBMs. other values are invalid */
2462
2463                 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2464                                         (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2465                         dev_err(hdev->dev,
2466                                 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2467                                 dram_size, prop->dram_size);
2468                         dram_size = prop->dram_size;
2469                 }
2470
2471                 prop->dram_size = dram_size;
2472                 prop->dram_end_address = prop->dram_base_address + dram_size;
2473         }
2474
2475         if (!strlen(prop->cpucp_info.card_name))
2476                 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2477
2478         /* Overwrite binning masks with the actual binning values from F/W */
2479         hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2480         hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2481         hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2482         hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2483
2484         /*
2485          * at this point the DRAM parameters need to be updated according to data obtained
2486          * from the FW
2487          */
2488         rc = gaudi2_set_dram_properties(hdev);
2489         if (rc)
2490                 return rc;
2491
2492         rc = gaudi2_set_cluster_binning_masks(hdev);
2493         if (rc)
2494                 return rc;
2495
2496         rc = gaudi2_set_tpc_binning_masks(hdev);
2497         if (rc)
2498                 return rc;
2499
2500         rc = gaudi2_set_dec_binning_masks(hdev);
2501         if (rc)
2502                 return rc;
2503
2504         max_power = hl_fw_get_max_power(hdev);
2505         if (max_power < 0)
2506                 return max_power;
2507
2508         prop->max_power_default = (u64) max_power;
2509
2510         return 0;
2511 }
2512
2513 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2514 {
2515         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2516         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2517         int rc;
2518
2519         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2520                 return 0;
2521
2522         rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2523         if (rc)
2524                 return rc;
2525
2526         hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2527
2528         return 0;
2529 }
2530
2531 static int gaudi2_early_init(struct hl_device *hdev)
2532 {
2533         struct asic_fixed_properties *prop = &hdev->asic_prop;
2534         struct pci_dev *pdev = hdev->pdev;
2535         resource_size_t pci_bar_size;
2536         int rc;
2537
2538         rc = gaudi2_set_fixed_properties(hdev);
2539         if (rc)
2540                 return rc;
2541
2542         /* Check BAR sizes */
2543         pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2544
2545         if (pci_bar_size != CFG_BAR_SIZE) {
2546                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2547                         SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2548                 rc = -ENODEV;
2549                 goto free_queue_props;
2550         }
2551
2552         pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2553         if (pci_bar_size != MSIX_BAR_SIZE) {
2554                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2555                         MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2556                 rc = -ENODEV;
2557                 goto free_queue_props;
2558         }
2559
2560         prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2561         hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2562
2563         /*
2564          * Only in pldm driver config iATU
2565          */
2566         if (hdev->pldm)
2567                 hdev->asic_prop.iatu_done_by_fw = false;
2568         else
2569                 hdev->asic_prop.iatu_done_by_fw = true;
2570
2571         rc = hl_pci_init(hdev);
2572         if (rc)
2573                 goto free_queue_props;
2574
2575         /* Before continuing in the initialization, we need to read the preboot
2576          * version to determine whether we run with a security-enabled firmware
2577          */
2578         rc = hl_fw_read_preboot_status(hdev);
2579         if (rc) {
2580                 if (hdev->reset_on_preboot_fail)
2581                         hdev->asic_funcs->hw_fini(hdev, true, false);
2582                 goto pci_fini;
2583         }
2584
2585         if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2586                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2587                 hdev->asic_funcs->hw_fini(hdev, true, false);
2588         }
2589
2590         return 0;
2591
2592 pci_fini:
2593         hl_pci_fini(hdev);
2594 free_queue_props:
2595         kfree(hdev->asic_prop.hw_queues_props);
2596         return rc;
2597 }
2598
2599 static int gaudi2_early_fini(struct hl_device *hdev)
2600 {
2601         kfree(hdev->asic_prop.hw_queues_props);
2602         hl_pci_fini(hdev);
2603
2604         return 0;
2605 }
2606
2607 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2608 {
2609         switch (arc_id) {
2610         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2611                 return true;
2612         default:
2613                 return false;
2614         }
2615 }
2616
2617 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2618 {
2619         switch (arc_id) {
2620         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2621                 return true;
2622         default:
2623                 return false;
2624         }
2625 }
2626
2627 static void gaudi2_init_arcs(struct hl_device *hdev)
2628 {
2629         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2630         u64 arc_id;
2631         u32 i;
2632
2633         for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2634                 if (gaudi2_is_arc_enabled(hdev, i))
2635                         continue;
2636
2637                 gaudi2_set_arc_id_cap(hdev, i);
2638         }
2639
2640         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2641                 if (!gaudi2_is_queue_enabled(hdev, i))
2642                         continue;
2643
2644                 arc_id = gaudi2_queue_id_to_arc_id[i];
2645                 if (gaudi2_is_arc_enabled(hdev, arc_id))
2646                         continue;
2647
2648                 if (gaudi2_is_arc_nic_owned(arc_id) &&
2649                                 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2650                         continue;
2651
2652                 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2653                                                         BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2654                         continue;
2655
2656                 gaudi2_set_arc_id_cap(hdev, arc_id);
2657         }
2658 }
2659
2660 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2661 {
2662         u32 reg_base, reg_val;
2663         int rc;
2664
2665         switch (cpu_id) {
2666         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2667                 /* Each ARC scheduler has 2 consecutive DCCM blocks */
2668                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2669                                                 ARC_DCCM_BLOCK_SIZE * 2, true);
2670                 if (rc)
2671                         return rc;
2672                 break;
2673         case CPU_ID_SCHED_ARC4:
2674         case CPU_ID_SCHED_ARC5:
2675         case CPU_ID_MME_QMAN_ARC0:
2676         case CPU_ID_MME_QMAN_ARC1:
2677                 reg_base = gaudi2_arc_blocks_bases[cpu_id];
2678
2679                 /* Scrub lower DCCM block */
2680                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2681                                                 ARC_DCCM_BLOCK_SIZE, true);
2682                 if (rc)
2683                         return rc;
2684
2685                 /* Switch to upper DCCM block */
2686                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2687                 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2688
2689                 /* Scrub upper DCCM block */
2690                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2691                                                 ARC_DCCM_BLOCK_SIZE, true);
2692                 if (rc)
2693                         return rc;
2694
2695                 /* Switch to lower DCCM block */
2696                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2697                 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2698                 break;
2699         default:
2700                 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2701                                                 ARC_DCCM_BLOCK_SIZE, true);
2702                 if (rc)
2703                         return rc;
2704         }
2705
2706         return 0;
2707 }
2708
2709 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2710 {
2711         u16 arc_id;
2712
2713         for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2714                 if (!gaudi2_is_arc_enabled(hdev, arc_id))
2715                         continue;
2716
2717                 gaudi2_scrub_arc_dccm(hdev, arc_id);
2718         }
2719 }
2720
2721 static int gaudi2_late_init(struct hl_device *hdev)
2722 {
2723         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2724         int rc;
2725
2726         hdev->asic_prop.supports_advanced_cpucp_rc = true;
2727
2728         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2729                                         gaudi2->virt_msix_db_dma_addr);
2730         if (rc) {
2731                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2732                 return rc;
2733         }
2734
2735         rc = gaudi2_fetch_psoc_frequency(hdev);
2736         if (rc) {
2737                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2738                 goto disable_pci_access;
2739         }
2740
2741         gaudi2_init_arcs(hdev);
2742         gaudi2_scrub_arcs_dccm(hdev);
2743         gaudi2_init_security(hdev);
2744
2745         return 0;
2746
2747 disable_pci_access:
2748         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2749
2750         return rc;
2751 }
2752
2753 static void gaudi2_late_fini(struct hl_device *hdev)
2754 {
2755         hl_hwmon_release_resources(hdev);
2756 }
2757
2758 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2759 {
2760         struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2761
2762         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2763         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2764         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2765         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2766         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2767         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2768         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2769         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2770         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2771         HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2772 }
2773
2774 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2775 {
2776         struct gaudi2_device *gaudi2 = hdev->asic_specific;
2777         struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2778         u32 block_size, umr_start_idx, num_umr_blocks;
2779         int i;
2780
2781         for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2782                 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2783                         block_size = ARC_DCCM_BLOCK_SIZE * 2;
2784                 else
2785                         block_size = ARC_DCCM_BLOCK_SIZE;
2786
2787                 blocks[i].address = gaudi2_arc_dccm_bases[i];
2788                 blocks[i].size = block_size;
2789         }
2790
2791         blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2792         blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2793
2794         blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2795         blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2796
2797         blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2798         blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2799
2800         blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2801         blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2802
2803         blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2804         blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2805
2806         blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2807         blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2808
2809         blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2810         blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2811
2812         blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2813         blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2814
2815         umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2816         num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2817         for (i = 0 ; i < num_umr_blocks ; i++) {
2818                 u8 nic_id, umr_block_id;
2819
2820                 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2821                 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2822
2823                 blocks[umr_start_idx + i].address =
2824                         mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2825                         (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2826                         (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2827                         umr_block_id * NIC_UMR_OFFSET;
2828                 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2829         }
2830
2831         /* Expose decoder HW configuration block to user */
2832         gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2833
2834         for (i = 1; i < NUM_OF_DCORES; ++i) {
2835                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2836                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2837
2838                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2839                                                 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2840
2841                 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2842                                                 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2843         }
2844 }
2845
2846 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2847 {
2848         dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2849         void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2850         int i, j, rc = 0;
2851
2852         /* The device ARC works with 32-bits addresses, and because there is a single HW register
2853          * that holds the extension bits (49..28), these bits must be identical in all the allocated
2854          * range.
2855          */
2856
2857         for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2858                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2859                                                         &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2860                 if (!virt_addr_arr[i]) {
2861                         rc = -ENOMEM;
2862                         goto free_dma_mem_arr;
2863                 }
2864
2865                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2866                 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2867                         break;
2868         }
2869
2870         if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2871                 dev_err(hdev->dev,
2872                         "MSB of ARC accessible DMA memory are not identical in all range\n");
2873                 rc = -EFAULT;
2874                 goto free_dma_mem_arr;
2875         }
2876
2877         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2878         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2879
2880 free_dma_mem_arr:
2881         for (j = 0 ; j < i ; j++)
2882                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2883                                                 dma_addr_arr[j]);
2884
2885         return rc;
2886 }
2887
2888 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2889 {
2890         struct asic_fixed_properties *prop = &hdev->asic_prop;
2891         struct pci_mem_region *region;
2892
2893         /* CFG */
2894         region = &hdev->pci_mem_region[PCI_REGION_CFG];
2895         region->region_base = CFG_BASE;
2896         region->region_size = CFG_SIZE;
2897         region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2898         region->bar_size = CFG_BAR_SIZE;
2899         region->bar_id = SRAM_CFG_BAR_ID;
2900         region->used = 1;
2901
2902         /* SRAM */
2903         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2904         region->region_base = SRAM_BASE_ADDR;
2905         region->region_size = SRAM_SIZE;
2906         region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2907         region->bar_size = CFG_BAR_SIZE;
2908         region->bar_id = SRAM_CFG_BAR_ID;
2909         region->used = 1;
2910
2911         /* DRAM */
2912         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2913         region->region_base = DRAM_PHYS_BASE;
2914         region->region_size = hdev->asic_prop.dram_size;
2915         region->offset_in_bar = 0;
2916         region->bar_size = prop->dram_pci_bar_size;
2917         region->bar_id = DRAM_BAR_ID;
2918         region->used = 1;
2919 }
2920
2921 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2922 {
2923         struct asic_fixed_properties *prop = &hdev->asic_prop;
2924         int i, j, k;
2925
2926         /* Initialize common user CQ interrupt */
2927         HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2928                                 HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2929
2930         /* Initialize common decoder interrupt */
2931         HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2932                                 HL_COMMON_DEC_INTERRUPT_ID, true);
2933
2934         /* User interrupts structure holds both decoder and user interrupts from various engines.
2935          * We first initialize the decoder interrupts and then we add the user interrupts.
2936          * The only limitation is that the last decoder interrupt id must be smaller
2937          * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2938          */
2939
2940         /* Initialize decoder interrupts, expose only normal interrupts,
2941          * error interrupts to be handled by driver
2942          */
2943         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2944                                                                                 i += 2, j++)
2945                 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2946
2947         for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2948                 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2949 }
2950
2951 static inline int gaudi2_get_non_zero_random_int(void)
2952 {
2953         int rand = get_random_u32();
2954
2955         return rand ? rand : 1;
2956 }
2957
2958 static int gaudi2_sw_init(struct hl_device *hdev)
2959 {
2960         struct asic_fixed_properties *prop = &hdev->asic_prop;
2961         struct gaudi2_device *gaudi2;
2962         int i, rc;
2963
2964         /* Allocate device structure */
2965         gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2966         if (!gaudi2)
2967                 return -ENOMEM;
2968
2969         for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2970                 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2971                         continue;
2972
2973                 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2974                         dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2975                                 GAUDI2_EVENT_SIZE);
2976                         rc = -EINVAL;
2977                         goto free_gaudi2_device;
2978                 }
2979
2980                 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2981         }
2982
2983         for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2984                 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2985
2986         gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
2987
2988         hdev->asic_specific = gaudi2;
2989
2990         /* Create DMA pool for small allocations.
2991          * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
2992          * PI/CI registers allocated from this pool have this restriction
2993          */
2994         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
2995                                         GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
2996         if (!hdev->dma_pool) {
2997                 dev_err(hdev->dev, "failed to create DMA pool\n");
2998                 rc = -ENOMEM;
2999                 goto free_gaudi2_device;
3000         }
3001
3002         rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3003         if (rc)
3004                 goto free_dma_pool;
3005
3006         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3007         if (!hdev->cpu_accessible_dma_pool) {
3008                 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3009                 rc = -ENOMEM;
3010                 goto free_cpu_dma_mem;
3011         }
3012
3013         rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3014                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3015         if (rc) {
3016                 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3017                 rc = -EFAULT;
3018                 goto free_cpu_accessible_dma_pool;
3019         }
3020
3021         gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3022                                                                 &gaudi2->virt_msix_db_dma_addr);
3023         if (!gaudi2->virt_msix_db_cpu_addr) {
3024                 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3025                 rc = -ENOMEM;
3026                 goto free_cpu_accessible_dma_pool;
3027         }
3028
3029         spin_lock_init(&gaudi2->hw_queues_lock);
3030
3031         gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3032                                                         &gaudi2->scratchpad_bus_address,
3033                                                         GFP_KERNEL | __GFP_ZERO);
3034         if (!gaudi2->scratchpad_kernel_address) {
3035                 rc = -ENOMEM;
3036                 goto free_virt_msix_db_mem;
3037         }
3038
3039         gaudi2_user_mapped_blocks_init(hdev);
3040
3041         /* Initialize user interrupts */
3042         gaudi2_user_interrupt_setup(hdev);
3043
3044         hdev->supports_coresight = true;
3045         hdev->supports_sync_stream = true;
3046         hdev->supports_cb_mapping = true;
3047         hdev->supports_wait_for_multi_cs = false;
3048
3049         prop->supports_compute_reset = true;
3050
3051         hdev->asic_funcs->set_pci_memory_regions(hdev);
3052
3053         return 0;
3054
3055 free_virt_msix_db_mem:
3056         hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3057 free_cpu_accessible_dma_pool:
3058         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3059 free_cpu_dma_mem:
3060         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3061                                         hdev->cpu_accessible_dma_address);
3062 free_dma_pool:
3063         dma_pool_destroy(hdev->dma_pool);
3064 free_gaudi2_device:
3065         kfree(gaudi2);
3066         return rc;
3067 }
3068
3069 static int gaudi2_sw_fini(struct hl_device *hdev)
3070 {
3071         struct asic_fixed_properties *prop = &hdev->asic_prop;
3072         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3073
3074         hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3075
3076         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3077
3078         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3079                                                 hdev->cpu_accessible_dma_address);
3080
3081         hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3082                                         gaudi2->scratchpad_bus_address);
3083
3084         dma_pool_destroy(hdev->dma_pool);
3085
3086         kfree(gaudi2);
3087
3088         return 0;
3089 }
3090
3091 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3092 {
3093         WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3094                                                 QM_GLBL_CFG1_CQF_STOP |
3095                                                 QM_GLBL_CFG1_CP_STOP);
3096
3097         /* stop also the ARC */
3098         WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3099 }
3100
3101 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3102 {
3103         WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3104                                                 QM_GLBL_CFG1_CQF_FLUSH |
3105                                                 QM_GLBL_CFG1_CP_FLUSH);
3106 }
3107
3108 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3109 {
3110         WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3111 }
3112
3113 /**
3114  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3115  *
3116  * @hdev: pointer to the habanalabs device structure
3117  * @queue_id: queue to clear fence counters to
3118  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3119  *              getting stuck on any fence value. otherwise set all fence
3120  *              counters to 0 (standard clear of fence counters)
3121  */
3122 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3123                                                 bool skip_fence)
3124 {
3125         u32 size, reg_base;
3126         u32 addr, val;
3127
3128         reg_base = gaudi2_qm_blocks_bases[queue_id];
3129
3130         addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3131         size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3132
3133         /*
3134          * in case we want to make sure that QM that is stuck on a fence will
3135          * be released we should set the fence counter to a higher value that
3136          * the value the QM waiting for. to comply with any fence counter of
3137          * any value we set maximum fence value to all counters
3138          */
3139         val = skip_fence ? U32_MAX : 0;
3140         gaudi2_memset_device_lbw(hdev, addr, size, val);
3141 }
3142
3143 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3144 {
3145         u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3146
3147         gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3148         gaudi2_flush_qman_common(hdev, reg_base);
3149         gaudi2_flush_qman_arc_common(hdev, reg_base);
3150 }
3151
3152 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3153 {
3154         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3155         int dcore, inst;
3156
3157         if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3158                 goto stop_edma_qmans;
3159
3160         /* Stop CPs of PDMA QMANs */
3161         gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3162         gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3163
3164 stop_edma_qmans:
3165         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3166                 return;
3167
3168         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3169                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3170                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3171                         u32 qm_base;
3172
3173                         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3174                                 continue;
3175
3176                         qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3177                                         inst * DCORE_EDMA_OFFSET;
3178
3179                         /* Stop CPs of EDMA QMANs */
3180                         gaudi2_stop_qman_common(hdev, qm_base);
3181                 }
3182         }
3183 }
3184
3185 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3186 {
3187         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3188         u32 offset, i;
3189
3190         offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3191
3192         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3193                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3194                         continue;
3195
3196                 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3197         }
3198 }
3199
3200 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3201 {
3202         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3203         u32 reg_base;
3204         int i;
3205
3206         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3207                 return;
3208
3209         for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3210                 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3211                         continue;
3212
3213                 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3214                 gaudi2_stop_qman_common(hdev, reg_base);
3215         }
3216 }
3217
3218 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3219 {
3220         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3221         u32 reg_base;
3222         int i;
3223
3224         if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3225                 return;
3226
3227         for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3228                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3229                         continue;
3230
3231                 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3232                 gaudi2_stop_qman_common(hdev, reg_base);
3233         }
3234 }
3235
3236 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3237 {
3238         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3239         u32 reg_base, queue_id;
3240         int i;
3241
3242         if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3243                 return;
3244
3245         queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3246
3247         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3248                 if (!(hdev->nic_ports_mask & BIT(i)))
3249                         continue;
3250
3251                 reg_base = gaudi2_qm_blocks_bases[queue_id];
3252                 gaudi2_stop_qman_common(hdev, reg_base);
3253         }
3254 }
3255
3256 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3257 {
3258         u32 reg_val;
3259
3260         reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3261         WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3262 }
3263
3264 static void gaudi2_dma_stall(struct hl_device *hdev)
3265 {
3266         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3267         int dcore, inst;
3268
3269         if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3270                 goto stall_edma;
3271
3272         gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3273         gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3274
3275 stall_edma:
3276         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3277                 return;
3278
3279         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3280                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3281                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3282                         u32 core_base;
3283
3284                         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3285                                 continue;
3286
3287                         core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3288                                         inst * DCORE_EDMA_OFFSET;
3289
3290                         /* Stall CPs of EDMA QMANs */
3291                         gaudi2_stall_dma_common(hdev, core_base);
3292                 }
3293         }
3294 }
3295
3296 static void gaudi2_mme_stall(struct hl_device *hdev)
3297 {
3298         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3299         u32 offset, i;
3300
3301         offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3302
3303         for (i = 0 ; i < NUM_OF_DCORES ; i++)
3304                 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3305                         WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3306 }
3307
3308 static void gaudi2_tpc_stall(struct hl_device *hdev)
3309 {
3310         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3311         u32 reg_base;
3312         int i;
3313
3314         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3315                 return;
3316
3317         for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3318                 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3319                         continue;
3320
3321                 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3322                 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3323         }
3324 }
3325
3326 static void gaudi2_rotator_stall(struct hl_device *hdev)
3327 {
3328         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3329         u32 reg_val;
3330         int i;
3331
3332         if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3333                 return;
3334
3335         reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3336                         FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3337                         FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3338
3339         for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3340                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3341                         continue;
3342
3343                 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3344         }
3345 }
3346
3347 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3348 {
3349         WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3350 }
3351
3352 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3353 {
3354         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3355         int dcore, inst;
3356
3357         if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3358                 goto stop_edma_qmans;
3359
3360         gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3361         gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3362
3363 stop_edma_qmans:
3364         if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3365                 return;
3366
3367         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3368                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3369                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3370                         u32 qm_base;
3371
3372                         if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3373                                 continue;
3374
3375                         qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3376                                         inst * DCORE_EDMA_OFFSET;
3377
3378                         /* Disable CPs of EDMA QMANs */
3379                         gaudi2_disable_qman_common(hdev, qm_base);
3380                 }
3381         }
3382 }
3383
3384 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3385 {
3386         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3387         u32 offset, i;
3388
3389         offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3390
3391         for (i = 0 ; i < NUM_OF_DCORES ; i++)
3392                 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3393                         gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3394 }
3395
3396 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3397 {
3398         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3399         u32 reg_base;
3400         int i;
3401
3402         if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3403                 return;
3404
3405         for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3406                 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3407                         continue;
3408
3409                 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3410                 gaudi2_disable_qman_common(hdev, reg_base);
3411         }
3412 }
3413
3414 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3415 {
3416         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3417         u32 reg_base;
3418         int i;
3419
3420         if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3421                 return;
3422
3423         for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3424                 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3425                         continue;
3426
3427                 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3428                 gaudi2_disable_qman_common(hdev, reg_base);
3429         }
3430 }
3431
3432 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3433 {
3434         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3435         u32 reg_base, queue_id;
3436         int i;
3437
3438         if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3439                 return;
3440
3441         queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3442
3443         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3444                 if (!(hdev->nic_ports_mask & BIT(i)))
3445                         continue;
3446
3447                 reg_base = gaudi2_qm_blocks_bases[queue_id];
3448                 gaudi2_disable_qman_common(hdev, reg_base);
3449         }
3450 }
3451
3452 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3453 {
3454         /* Disable the timestamp counter */
3455         WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3456
3457         /* Zero the lower/upper parts of the 64-bit counter */
3458         WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3459         WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3460
3461         /* Enable the counter */
3462         WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3463 }
3464
3465 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3466 {
3467         /* Disable the timestamp counter */
3468         WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3469 }
3470
3471 static const char *gaudi2_irq_name(u16 irq_number)
3472 {
3473         switch (irq_number) {
3474         case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3475                 return "gaudi2 cpu eq";
3476         case GAUDI2_IRQ_NUM_COMPLETION:
3477                 return "gaudi2 completion";
3478         case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3479                 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3480         case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3481                 return "gaudi2 user completion";
3482         default:
3483                 return "invalid";
3484         }
3485 }
3486
3487 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3488 {
3489         int i, irq, relative_idx;
3490         struct hl_dec *dec;
3491
3492         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3493                 irq = pci_irq_vector(hdev->pdev, i);
3494                 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3495
3496                 dec = hdev->dec + relative_idx / 2;
3497
3498                 /* We pass different structures depending on the irq handler. For the abnormal
3499                  * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3500                  * user_interrupt entry
3501                  */
3502                 free_irq(irq, ((relative_idx % 2) ?
3503                                 (void *) dec :
3504                                 (void *) &hdev->user_interrupt[dec->core_id]));
3505         }
3506 }
3507
3508 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3509 {
3510         int rc, i, irq_init_cnt, irq, relative_idx;
3511         irq_handler_t irq_handler;
3512         struct hl_dec *dec;
3513
3514         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3515                         i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3516                         i++, irq_init_cnt++) {
3517
3518                 irq = pci_irq_vector(hdev->pdev, i);
3519                 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3520
3521                 irq_handler = (relative_idx % 2) ?
3522                                 hl_irq_handler_dec_abnrm :
3523                                 hl_irq_handler_user_interrupt;
3524
3525                 dec = hdev->dec + relative_idx / 2;
3526
3527                 /* We pass different structures depending on the irq handler. For the abnormal
3528                  * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3529                  * user_interrupt entry
3530                  */
3531                 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3532                                 ((relative_idx % 2) ?
3533                                 (void *) dec :
3534                                 (void *) &hdev->user_interrupt[dec->core_id]));
3535                 if (rc) {
3536                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3537                         goto free_dec_irqs;
3538                 }
3539         }
3540
3541         return 0;
3542
3543 free_dec_irqs:
3544         gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3545         return rc;
3546 }
3547
3548 static int gaudi2_enable_msix(struct hl_device *hdev)
3549 {
3550         struct asic_fixed_properties *prop = &hdev->asic_prop;
3551         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3552         int rc, irq, i, j, user_irq_init_cnt;
3553         irq_handler_t irq_handler;
3554         struct hl_cq *cq;
3555
3556         if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3557                 return 0;
3558
3559         rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3560                                         PCI_IRQ_MSIX);
3561         if (rc < 0) {
3562                 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3563                         GAUDI2_MSIX_ENTRIES, rc);
3564                 return rc;
3565         }
3566
3567         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3568         cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3569         rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3570         if (rc) {
3571                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3572                 goto free_irq_vectors;
3573         }
3574
3575         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3576         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3577                         &hdev->event_queue);
3578         if (rc) {
3579                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3580                 goto free_completion_irq;
3581         }
3582
3583         rc = gaudi2_dec_enable_msix(hdev);
3584         if (rc) {
3585                 dev_err(hdev->dev, "Failed to enable decoder IRQ");
3586                 goto free_event_irq;
3587         }
3588
3589         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3590                         user_irq_init_cnt < prop->user_interrupt_count;
3591                         i++, j++, user_irq_init_cnt++) {
3592
3593                 irq = pci_irq_vector(hdev->pdev, i);
3594                 irq_handler = hl_irq_handler_user_interrupt;
3595
3596                 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3597                 if (rc) {
3598                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3599                         goto free_user_irq;
3600                 }
3601         }
3602
3603         gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3604
3605         return 0;
3606
3607 free_user_irq:
3608         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3609                         i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3610
3611                 irq = pci_irq_vector(hdev->pdev, i);
3612                 free_irq(irq, &hdev->user_interrupt[j]);
3613         }
3614
3615         gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3616
3617 free_event_irq:
3618         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3619         free_irq(irq, cq);
3620
3621 free_completion_irq:
3622         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3623         free_irq(irq, cq);
3624
3625 free_irq_vectors:
3626         pci_free_irq_vectors(hdev->pdev);
3627
3628         return rc;
3629 }
3630
3631 static void gaudi2_sync_irqs(struct hl_device *hdev)
3632 {
3633         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3634         int i, j;
3635         int irq;
3636
3637         if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3638                 return;
3639
3640         /* Wait for all pending IRQs to be finished */
3641         synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3642
3643         for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3644                 irq = pci_irq_vector(hdev->pdev, i);
3645                 synchronize_irq(irq);
3646         }
3647
3648         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3649                                                                                 i++, j++) {
3650                 irq = pci_irq_vector(hdev->pdev, i);
3651                 synchronize_irq(irq);
3652         }
3653
3654         synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3655 }
3656
3657 static void gaudi2_disable_msix(struct hl_device *hdev)
3658 {
3659         struct asic_fixed_properties *prop = &hdev->asic_prop;
3660         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3661         struct hl_cq *cq;
3662         int irq, i, j, k;
3663
3664         if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3665                 return;
3666
3667         gaudi2_sync_irqs(hdev);
3668
3669         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3670         free_irq(irq, &hdev->event_queue);
3671
3672         gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3673
3674         for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3675                         k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3676
3677                 irq = pci_irq_vector(hdev->pdev, i);
3678                 free_irq(irq, &hdev->user_interrupt[j]);
3679         }
3680
3681         irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3682         cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3683         free_irq(irq, cq);
3684
3685         pci_free_irq_vectors(hdev->pdev);
3686
3687         gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3688 }
3689
3690 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3691 {
3692         u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3693         u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3694         u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3695         int rc;
3696
3697         if (hdev->pldm)
3698                 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3699         else
3700                 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3701
3702         for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3703                 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3704                 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3705                         continue;
3706
3707                 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3708
3709                 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3710
3711                 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3712
3713                 /* Wait till all traffic from decoder stops
3714                  * before apply core reset.
3715                  */
3716                 rc = hl_poll_timeout(
3717                                 hdev,
3718                                 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3719                                 graceful,
3720                                 (graceful & graceful_pend_mask),
3721                                 100,
3722                                 timeout_usec);
3723                 if (rc)
3724                         dev_err(hdev->dev,
3725                                 "Failed to stop traffic from DCORE%d Decoder %d\n",
3726                                 dcore_id, dec_id);
3727         }
3728 }
3729
3730 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3731 {
3732         u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3733         u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3734         u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3735         int rc;
3736
3737         if (hdev->pldm)
3738                 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3739         else
3740                 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3741
3742         for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3743                 dec_bit = PCIE_DEC_SHIFT + dec_id;
3744                 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3745                         continue;
3746
3747                 offset = dec_id * PCIE_VDEC_OFFSET;
3748
3749                 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3750
3751                 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3752
3753                 /* Wait till all traffic from decoder stops
3754                  * before apply core reset.
3755                  */
3756                 rc = hl_poll_timeout(
3757                                 hdev,
3758                                 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3759                                 graceful,
3760                                 (graceful & graceful_pend_mask),
3761                                 100,
3762                                 timeout_usec);
3763                 if (rc)
3764                         dev_err(hdev->dev,
3765                                 "Failed to stop traffic from PCIe Decoder %d\n",
3766                                 dec_id);
3767         }
3768 }
3769
3770 static void gaudi2_stop_dec(struct hl_device *hdev)
3771 {
3772         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3773         int dcore_id;
3774
3775         if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3776                 return;
3777
3778         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3779                 gaudi2_stop_dcore_dec(hdev, dcore_id);
3780
3781         gaudi2_stop_pcie_dec(hdev);
3782 }
3783
3784 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3785 {
3786         u32 reg_base, reg_val;
3787
3788         reg_base = gaudi2_arc_blocks_bases[cpu_id];
3789         if (run_mode == HL_ENGINE_CORE_RUN)
3790                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3791         else
3792                 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3793
3794         WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3795 }
3796
3797 static void gaudi2_halt_arcs(struct hl_device *hdev)
3798 {
3799         u16 arc_id;
3800
3801         for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3802                 if (gaudi2_is_arc_enabled(hdev, arc_id))
3803                         gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3804         }
3805 }
3806
3807 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3808 {
3809         int rc;
3810         u32 reg_base, val, ack_mask, timeout_usec = 100000;
3811
3812         if (hdev->pldm)
3813                 timeout_usec *= 100;
3814
3815         reg_base = gaudi2_arc_blocks_bases[cpu_id];
3816         if (run_mode == HL_ENGINE_CORE_RUN)
3817                 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3818         else
3819                 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3820
3821         rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3822                                 val, ((val & ack_mask) == ack_mask),
3823                                 1000, timeout_usec);
3824
3825         if (!rc) {
3826                 /* Clear */
3827                 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3828                 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3829         }
3830
3831         return rc;
3832 }
3833
3834 static void gaudi2_reset_arcs(struct hl_device *hdev)
3835 {
3836         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3837         u16 arc_id;
3838
3839         if (!gaudi2)
3840                 return;
3841
3842         for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3843                 if (gaudi2_is_arc_enabled(hdev, arc_id))
3844                         gaudi2_clr_arc_id_cap(hdev, arc_id);
3845 }
3846
3847 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3848 {
3849         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3850         u32 queue_id;
3851         int i;
3852
3853         if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3854                 return;
3855
3856         queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3857
3858         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3859                 if (!(hdev->nic_ports_mask & BIT(i)))
3860                         continue;
3861
3862                 gaudi2_qman_manual_flush_common(hdev, queue_id);
3863         }
3864 }
3865
3866 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3867                                         u32 num_cores, u32 core_command)
3868 {
3869         int i, rc;
3870
3871
3872         for (i = 0 ; i < num_cores ; i++) {
3873                 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3874                         gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3875         }
3876
3877         for (i = 0 ; i < num_cores ; i++) {
3878                 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3879                         rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3880
3881                         if (rc) {
3882                                 dev_err(hdev->dev, "failed to %s arc: %d\n",
3883                                         (core_command == HL_ENGINE_CORE_HALT) ?
3884                                         "HALT" : "RUN", core_ids[i]);
3885                                 return -1;
3886                         }
3887                 }
3888         }
3889
3890         return 0;
3891 }
3892
3893 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3894 {
3895         u32 wait_timeout_ms;
3896
3897         if (hdev->pldm)
3898                 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3899         else
3900                 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3901
3902         if (fw_reset)
3903                 goto skip_engines;
3904
3905         gaudi2_stop_dma_qmans(hdev);
3906         gaudi2_stop_mme_qmans(hdev);
3907         gaudi2_stop_tpc_qmans(hdev);
3908         gaudi2_stop_rot_qmans(hdev);
3909         gaudi2_stop_nic_qmans(hdev);
3910         msleep(wait_timeout_ms);
3911
3912         gaudi2_halt_arcs(hdev);
3913         gaudi2_dma_stall(hdev);
3914         gaudi2_mme_stall(hdev);
3915         gaudi2_tpc_stall(hdev);
3916         gaudi2_rotator_stall(hdev);
3917
3918         msleep(wait_timeout_ms);
3919
3920         gaudi2_stop_dec(hdev);
3921
3922         /*
3923          * in case of soft reset do a manual flush for QMANs (currently called
3924          * only for NIC QMANs
3925          */
3926         if (!hard_reset)
3927                 gaudi2_nic_qmans_manual_flush(hdev);
3928
3929         gaudi2_disable_dma_qmans(hdev);
3930         gaudi2_disable_mme_qmans(hdev);
3931         gaudi2_disable_tpc_qmans(hdev);
3932         gaudi2_disable_rot_qmans(hdev);
3933         gaudi2_disable_nic_qmans(hdev);
3934         gaudi2_disable_timestamp(hdev);
3935
3936 skip_engines:
3937         if (hard_reset) {
3938                 gaudi2_disable_msix(hdev);
3939                 return;
3940         }
3941
3942         gaudi2_sync_irqs(hdev);
3943 }
3944
3945 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3946 {
3947         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3948
3949         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3950         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3951         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3952         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3953         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3954         pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3955 }
3956
3957 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3958 {
3959         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3960         struct dynamic_fw_load_mgr *dynamic_loader;
3961         struct cpu_dyn_regs *dyn_regs;
3962
3963         /* fill common fields */
3964         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3965         fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3966         fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3967         fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3968         fw_loader->skip_bmc = false;
3969         fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3970         fw_loader->dram_bar_id = DRAM_BAR_ID;
3971         fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3972
3973         /* here we update initial values for few specific dynamic regs (as
3974          * before reading the first descriptor from FW those value has to be
3975          * hard-coded). in later stages of the protocol those values will be
3976          * updated automatically by reading the FW descriptor so data there
3977          * will always be up-to-date
3978          */
3979         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3980         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3981         dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3982         dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3983         dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3984 }
3985
3986 static int gaudi2_init_cpu(struct hl_device *hdev)
3987 {
3988         struct gaudi2_device *gaudi2 = hdev->asic_specific;
3989         int rc;
3990
3991         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3992                 return 0;
3993
3994         if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
3995                 return 0;
3996
3997         rc = hl_fw_init_cpu(hdev);
3998         if (rc)
3999                 return rc;
4000
4001         gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4002
4003         return 0;
4004 }
4005
4006 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4007 {
4008         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4009         struct asic_fixed_properties *prop = &hdev->asic_prop;
4010         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4011         struct cpu_dyn_regs *dyn_regs;
4012         struct hl_eq *eq;
4013         u32 status;
4014         int err;
4015
4016         if (!hdev->cpu_queues_enable)
4017                 return 0;
4018
4019         if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4020                 return 0;
4021
4022         eq = &hdev->event_queue;
4023
4024         dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4025
4026         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4027         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4028
4029         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4030         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4031
4032         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4033         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4034
4035         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4036         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4037         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4038
4039         /* Used for EQ CI */
4040         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4041
4042         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4043
4044         WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4045
4046         /* Let the ARC know we are ready as it is now handling those queues  */
4047
4048         WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4049                 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4050
4051         err = hl_poll_timeout(
4052                 hdev,
4053                 mmCPU_IF_QUEUE_INIT,
4054                 status,
4055                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4056                 1000,
4057                 cpu_timeout);
4058
4059         if (err) {
4060                 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4061                 return -EIO;
4062         }
4063
4064         /* update FW application security bits */
4065         if (prop->fw_cpu_boot_dev_sts0_valid)
4066                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4067
4068         if (prop->fw_cpu_boot_dev_sts1_valid)
4069                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4070
4071         gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4072         return 0;
4073 }
4074
4075 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4076                                 u32 queue_id_base)
4077 {
4078         struct hl_hw_queue *q;
4079         u32 pq_id, pq_offset;
4080
4081         for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4082                 q = &hdev->kernel_queues[queue_id_base + pq_id];
4083                 pq_offset = pq_id * 4;
4084
4085                 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4086                                 lower_32_bits(q->bus_address));
4087                 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4088                                 upper_32_bits(q->bus_address));
4089                 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4090                 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4091                 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4092         }
4093 }
4094
4095 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4096 {
4097         u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4098
4099         mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4100         mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4101         so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4102         so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4103
4104         for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4105                 cp_offset = cp_id * 4;
4106
4107                 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4108                 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
4109                 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
4110                 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
4111         }
4112
4113         /* allow QMANs to accept work from ARC CQF */
4114         WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4115 }
4116
4117 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4118                                 u32 queue_id_base)
4119 {
4120         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4121         u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4122
4123         so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4124         so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4125
4126         for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4127                 pq_offset = pq_id * 4;
4128
4129                 /* Configure QMAN HBW to scratchpad as it is not needed */
4130                 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4131                                 lower_32_bits(gaudi2->scratchpad_bus_address));
4132                 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4133                                 upper_32_bits(gaudi2->scratchpad_bus_address));
4134                 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4135                                 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4136
4137                 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4138                 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4139                 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4140                 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4141         }
4142
4143         /* Enable QMAN H/W completion */
4144         WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4145 }
4146
4147 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4148 {
4149         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4150         u32 sp_reg_addr;
4151
4152         switch (queue_id_base) {
4153         case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4154                 fallthrough;
4155         case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4156                 fallthrough;
4157         case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4158                 fallthrough;
4159         case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4160                 fallthrough;
4161         case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4162                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4163                 break;
4164         case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4165                 fallthrough;
4166         case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4167                 fallthrough;
4168         case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4169                 fallthrough;
4170         case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4171                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4172                 break;
4173         case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4174                 fallthrough;
4175         case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4176                 fallthrough;
4177         case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4178                 fallthrough;
4179         case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4180                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4181                 break;
4182         case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4183                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4184                 break;
4185         case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4186                 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4187                 break;
4188         default:
4189                 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4190                 return 0;
4191         }
4192
4193         return sp_reg_addr;
4194 }
4195
4196 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4197                                         u32 queue_id_base)
4198 {
4199         u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4200         int map_table_entry;
4201
4202         WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4203
4204         irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4205         WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4206         WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4207
4208         map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4209         WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4210                 gaudi2_irq_map_table[map_table_entry].cpu_id);
4211
4212         WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4213
4214         WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4215         WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4216         WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4217
4218         /* Enable the QMAN channel.
4219          * PDMA QMAN configuration is different, as we do not allow user to
4220          * access some of the CPs.
4221          * PDMA0: CP2/3 are reserved for the ARC usage.
4222          * PDMA1: CP1/2/3 are reserved for the ARC usage.
4223          */
4224         if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4225                 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4226         else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4227                 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4228         else
4229                 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4230 }
4231
4232 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4233                 u32 queue_id_base)
4234 {
4235         u32 pq_id;
4236
4237         for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4238                 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4239
4240         gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4241         gaudi2_init_qman_cp(hdev, reg_base);
4242         gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4243         gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4244 }
4245
4246 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4247                                 u32 dma_core_id, bool is_secure)
4248 {
4249         u32 prot, irq_handler_offset;
4250         struct cpu_dyn_regs *dyn_regs;
4251         int map_table_entry;
4252
4253         prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4254         if (is_secure)
4255                 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4256
4257         WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4258
4259         dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4260         irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4261
4262         WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4263                         lower_32_bits(CFG_BASE + irq_handler_offset));
4264
4265         WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4266                         upper_32_bits(CFG_BASE + irq_handler_offset));
4267
4268         map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4269         WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4270                 gaudi2_irq_map_table[map_table_entry].cpu_id);
4271
4272         /* Enable the DMA channel */
4273         WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4274 }
4275
4276 static void gaudi2_init_kdma(struct hl_device *hdev)
4277 {
4278         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4279         u32 reg_base;
4280
4281         if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4282                 return;
4283
4284         reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4285
4286         gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4287
4288         gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4289 }
4290
4291 static void gaudi2_init_pdma(struct hl_device *hdev)
4292 {
4293         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4294         u32 reg_base;
4295
4296         if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4297                 return;
4298
4299         reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4300         gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4301
4302         reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4303         gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4304
4305         reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4306         gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4307
4308         reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4309         gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4310
4311         gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4312 }
4313
4314 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4315 {
4316         u32 reg_base, base_edma_core_id, base_edma_qman_id;
4317
4318         base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4319         base_edma_qman_id = edma_stream_base[seq];
4320
4321         reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4322         gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4323
4324         reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4325         gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4326 }
4327
4328 static void gaudi2_init_edma(struct hl_device *hdev)
4329 {
4330         struct asic_fixed_properties *prop = &hdev->asic_prop;
4331         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4332         int dcore, inst;
4333
4334         if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4335                 return;
4336
4337         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4338                 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4339                         u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4340
4341                         if (!(prop->edma_enabled_mask & BIT(seq)))
4342                                 continue;
4343
4344                         gaudi2_init_edma_instance(hdev, seq);
4345
4346                         gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4347                 }
4348         }
4349 }
4350
4351 /*
4352  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4353  * @hdev: pointer to habanalabs device structure.
4354  * @sob_id: sync object ID.
4355  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4356  * @interrupt_id: interrupt ID.
4357  *
4358  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4359  * write directly to the HBW host memory of the virtual MSI-X doorbell.
4360  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4361  *
4362  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4363  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4364  * completion, by decrementing the sync object value and re-arming the monitor.
4365  */
4366 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4367                                                         u32 first_mon_id, u32 interrupt_id)
4368 {
4369         u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4370         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4371         u64 addr;
4372         u8 mask;
4373
4374         /* Reset the SOB value */
4375         sob_offset = sob_id * sizeof(u32);
4376         WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4377
4378         /* Configure 3 monitors:
4379          * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4380          * 2. Decrement SOB value by 1.
4381          * 3. Re-arm the master monitor.
4382          */
4383
4384         first_mon_offset = first_mon_id * sizeof(u32);
4385
4386         /* 2nd monitor: Decrement SOB value by 1 */
4387         mon_offset = first_mon_offset + sizeof(u32);
4388
4389         addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4390         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4391         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4392
4393         payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4394                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4395                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4396         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4397
4398         /* 3rd monitor: Re-arm the master monitor */
4399         mon_offset = first_mon_offset + 2 * sizeof(u32);
4400
4401         addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4402         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4403         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4404
4405         sob_group = sob_id / 8;
4406         mask = ~BIT(sob_id & 0x7);
4407         mode = 0; /* comparison mode is "greater than or equal to" */
4408         arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4409                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4410                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4411                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4412
4413         payload = arm;
4414         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4415
4416         /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4417         mon_offset = first_mon_offset;
4418
4419         config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4420         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4421
4422         addr = gaudi2->virt_msix_db_dma_addr;
4423         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4424         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4425
4426         payload = interrupt_id;
4427         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4428
4429         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4430 }
4431
4432 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4433 {
4434         u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4435         struct asic_fixed_properties *prop = &hdev->asic_prop;
4436
4437         /* Decoder normal/abnormal interrupts */
4438         for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4439                 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4440                         continue;
4441
4442                 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4443                 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4444                 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4445                 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4446
4447                 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4448                 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4449                 interrupt_id += 1;
4450                 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4451         }
4452 }
4453
4454 static void gaudi2_init_sm(struct hl_device *hdev)
4455 {
4456         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4457         u64 cq_address;
4458         u32 reg_val;
4459         int i;
4460
4461         /* Enable HBW/LBW CQ for completion monitors */
4462         reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4463         reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4464
4465         for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4466                 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4467
4468         /* Enable only HBW CQ for KDMA completion monitor */
4469         reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4470         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4471
4472         /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
4473         WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4474         WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4475         WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4476
4477         for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4478                 cq_address =
4479                         hdev->completion_queue[i].bus_address;
4480
4481                 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4482                                                         lower_32_bits(cq_address));
4483                 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4484                                                         upper_32_bits(cq_address));
4485                 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4486                                                         ilog2(HL_CQ_SIZE_IN_BYTES));
4487         }
4488
4489         /* Configure kernel ASID and MMU BP*/
4490         WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4491         WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4492
4493         /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4494         gaudi2_prepare_sm_for_virt_msix_db(hdev);
4495 }
4496
4497 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4498 {
4499         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4500         u32 reg_val;
4501         int i;
4502
4503         reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4504         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4505         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4506         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4507         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4508         reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4509
4510         WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4511         WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4512
4513         for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4514                 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4515                 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4516         }
4517 }
4518
4519 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4520                                                         bool config_qman_only)
4521 {
4522         u32 queue_id_base, reg_base;
4523
4524         switch (dcore_id) {
4525         case 0:
4526                 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4527                 break;
4528         case 1:
4529                 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4530                 break;
4531         case 2:
4532                 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4533                 break;
4534         case 3:
4535                 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4536                 break;
4537         default:
4538                 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4539                 return;
4540         }
4541
4542         if (!config_qman_only) {
4543                 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4544                 gaudi2_init_mme_acc(hdev, reg_base);
4545         }
4546
4547         reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4548         gaudi2_init_qman(hdev, reg_base, queue_id_base);
4549 }
4550
4551 static void gaudi2_init_mme(struct hl_device *hdev)
4552 {
4553         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4554         int i;
4555
4556         if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4557                 return;
4558
4559         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4560                 gaudi2_init_dcore_mme(hdev, i, false);
4561
4562                 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4563         }
4564 }
4565
4566 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4567 {
4568         /* Mask arithmetic and QM interrupts in TPC */
4569         WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4570
4571         /* Set 16 cache lines */
4572         WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4573                         2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4574 }
4575
4576 struct gaudi2_tpc_init_cfg_data {
4577         enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4578 };
4579
4580 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4581                                         u32 offset, struct iterate_module_ctx *ctx)
4582 {
4583         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4584         struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4585         u32 queue_id_base;
4586         u8 seq;
4587
4588         queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4589
4590         if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4591                 /* gets last sequence number */
4592                 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4593         else
4594                 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4595
4596         gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4597         gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4598
4599         gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4600 }
4601
4602 static void gaudi2_init_tpc(struct hl_device *hdev)
4603 {
4604         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4605         struct gaudi2_tpc_init_cfg_data init_cfg_data;
4606         struct iterate_module_ctx tpc_iter;
4607
4608         if (!hdev->asic_prop.tpc_enabled_mask)
4609                 return;
4610
4611         if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4612                 return;
4613
4614         init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4615         init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4616         init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4617         init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4618         tpc_iter.fn = &gaudi2_init_tpc_config;
4619         tpc_iter.data = &init_cfg_data;
4620         gaudi2_iterate_tpcs(hdev, &tpc_iter);
4621 }
4622
4623 static void gaudi2_init_rotator(struct hl_device *hdev)
4624 {
4625         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4626         u32 i, reg_base, queue_id;
4627
4628         queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4629
4630         for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4631                 reg_base = gaudi2_qm_blocks_bases[queue_id];
4632                 gaudi2_init_qman(hdev, reg_base, queue_id);
4633
4634                 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4635         }
4636 }
4637
4638 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4639 {
4640         u32 sob_id;
4641
4642         /* VCMD normal interrupt */
4643         sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4644         WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4645                         mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4646         WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4647
4648         /* VCMD abnormal interrupt */
4649         sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4650         WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4651                         mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4652         WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4653 }
4654
4655 static void gaudi2_init_dec(struct hl_device *hdev)
4656 {
4657         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4658         u32 dcore_id, dec_id, dec_bit;
4659         u64 base_addr;
4660
4661         if (!hdev->asic_prop.decoder_enabled_mask)
4662                 return;
4663
4664         if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4665                 return;
4666
4667         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4668                 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4669                         dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4670
4671                         if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4672                                 continue;
4673
4674                         base_addr =  mmDCORE0_DEC0_CMD_BASE +
4675                                         BRDG_CTRL_BLOCK_OFFSET +
4676                                         dcore_id * DCORE_OFFSET +
4677                                         dec_id * DCORE_VDEC_OFFSET;
4678
4679                         gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4680
4681                         gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4682                 }
4683
4684         for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4685                 dec_bit = PCIE_DEC_SHIFT + dec_id;
4686                 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4687                         continue;
4688
4689                 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4690                                 dec_id * DCORE_VDEC_OFFSET;
4691
4692                 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4693
4694                 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4695         }
4696 }
4697
4698 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4699                                         u32 stlb_base, u32 asid, u64 phys_addr)
4700 {
4701         u32 status, timeout_usec;
4702         int rc;
4703
4704         if (hdev->pldm || !hdev->pdev)
4705                 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4706         else
4707                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4708
4709         WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4710         WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4711         WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4712         WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4713
4714         rc = hl_poll_timeout(
4715                 hdev,
4716                 stlb_base + STLB_BUSY_OFFSET,
4717                 status,
4718                 !(status & 0x80000000),
4719                 1000,
4720                 timeout_usec);
4721
4722         if (rc) {
4723                 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4724                 return rc;
4725         }
4726
4727         return 0;
4728 }
4729
4730 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4731                                         u32 start_offset, u32 inv_start_val,
4732                                         u32 flags)
4733 {
4734         /* clear PMMU mem line cache (only needed in mmu range invalidation) */
4735         if (flags & MMU_OP_CLEAR_MEMCACHE)
4736                 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4737
4738         if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4739                 return;
4740
4741         WREG32(stlb_base + start_offset, inv_start_val);
4742 }
4743
4744 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4745                                                 struct gaudi2_cache_invld_params *inv_params)
4746 {
4747         u32 status, timeout_usec, start_offset;
4748         int rc;
4749
4750         timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4751                                         GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4752
4753         /* poll PMMU mem line cache (only needed in mmu range invalidation) */
4754         if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4755                 rc = hl_poll_timeout(
4756                         hdev,
4757                         mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4758                         status,
4759                         status & 0x1,
4760                         1000,
4761                         timeout_usec);
4762
4763                 if (rc)
4764                         return rc;
4765
4766                 /* Need to manually reset the status to 0 */
4767                 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4768         }
4769
4770         /* Lower cache does not work with cache lines, hence we can skip its
4771          * invalidation upon map and invalidate only upon unmap
4772          */
4773         if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4774                 return 0;
4775
4776         start_offset = inv_params->range_invalidation ?
4777                         STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4778
4779         rc = hl_poll_timeout(
4780                 hdev,
4781                 stlb_base + start_offset,
4782                 status,
4783                 !(status & 0x1),
4784                 1000,
4785                 timeout_usec);
4786
4787         return rc;
4788 }
4789
4790 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4791 {
4792         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4793         u32 hw_cap;
4794
4795         hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4796
4797         if (gaudi2->hw_cap_initialized & hw_cap)
4798                 return true;
4799
4800         return false;
4801 }
4802
4803 /* this function shall be called only for HMMUs for which capability bit is set */
4804 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4805 {
4806         u32 offset;
4807
4808         offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4809         return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4810 }
4811
4812 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4813                                                 struct gaudi2_cache_invld_params *inv_params)
4814 {
4815         u32 start_offset;
4816
4817         if (inv_params->range_invalidation) {
4818                 /* Set the addresses range
4819                  * Note: that the start address we set in register, is not included in
4820                  * the range of the invalidation, by design.
4821                  * that's why we need to set lower address than the one we actually
4822                  * want to be included in the range invalidation.
4823                  */
4824                 u64 start = inv_params->start_va - 1;
4825
4826                 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4827
4828                 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4829                                 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4830
4831                 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4832                                 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4833
4834                 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4835                                 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4836
4837                 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4838                                 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4839         } else {
4840                 start_offset = STLB_INV_ALL_START_OFFSET;
4841         }
4842
4843         gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4844                                                 inv_params->inv_start_val, inv_params->flags);
4845 }
4846
4847 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4848                                                 int dcore_id, int hmmu_id,
4849                                                 struct gaudi2_cache_invld_params *inv_params)
4850 {
4851         u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4852
4853         gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4854 }
4855
4856 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4857                                                 int dcore_id, int hmmu_id,
4858                                                 struct gaudi2_cache_invld_params *inv_params)
4859 {
4860         u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4861
4862         return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4863 }
4864
4865 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4866                                                 struct gaudi2_cache_invld_params *inv_params)
4867 {
4868         int dcore_id, hmmu_id;
4869
4870         /* first send all invalidation commands */
4871         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4872                 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4873                         if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4874                                 continue;
4875
4876                         gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4877                 }
4878         }
4879
4880         /* next, poll all invalidations status */
4881         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4882                 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4883                         int rc;
4884
4885                         if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4886                                 continue;
4887
4888                         rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4889                                                                                 inv_params);
4890                         if (rc)
4891                                 return rc;
4892                 }
4893         }
4894
4895         return 0;
4896 }
4897
4898 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4899 {
4900         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4901         struct gaudi2_cache_invld_params invld_params;
4902         int rc = 0;
4903
4904         if (hdev->reset_info.hard_reset_pending)
4905                 return rc;
4906
4907         invld_params.range_invalidation = false;
4908         invld_params.inv_start_val = 1;
4909
4910         if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4911                 invld_params.flags = flags;
4912                 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4913                 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4914                                                                                 &invld_params);
4915         } else if (flags & MMU_OP_PHYS_PACK) {
4916                 invld_params.flags = 0;
4917                 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4918         }
4919
4920         return rc;
4921 }
4922
4923 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4924                                 u32 flags, u32 asid, u64 va, u64 size)
4925 {
4926         struct gaudi2_cache_invld_params invld_params = {0};
4927         struct gaudi2_device *gaudi2 = hdev->asic_specific;
4928         u64 start_va, end_va;
4929         u32 inv_start_val;
4930         int rc = 0;
4931
4932         if (hdev->reset_info.hard_reset_pending)
4933                 return 0;
4934
4935         inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4936                         1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4937                         asid << MMU_RANGE_INV_ASID_SHIFT);
4938         start_va = va;
4939         end_va = start_va + size;
4940
4941         if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4942                 /* As range invalidation does not support zero address we will
4943                  * do full invalidation in this case
4944                  */
4945                 if (start_va) {
4946                         invld_params.range_invalidation = true;
4947                         invld_params.start_va = start_va;
4948                         invld_params.end_va = end_va;
4949                         invld_params.inv_start_val = inv_start_val;
4950                         invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
4951                 } else {
4952                         invld_params.range_invalidation = false;
4953                         invld_params.inv_start_val = 1;
4954                         invld_params.flags = flags;
4955                 }
4956
4957
4958                 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4959                 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4960                                                                                 &invld_params);
4961                 if (rc)
4962                         return rc;
4963
4964         } else if (flags & MMU_OP_PHYS_PACK) {
4965                 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
4966                 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
4967                 invld_params.inv_start_val = inv_start_val;
4968                 invld_params.flags = flags;
4969                 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4970         }
4971
4972         return rc;
4973 }
4974
4975 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
4976 {
4977         struct asic_fixed_properties *prop = &hdev->asic_prop;
4978         u64 hop0_addr;
4979         u32 asid, max_asid = prop->max_asid;
4980         int rc;
4981
4982         /* it takes too much time to init all of the ASIDs on palladium */
4983         if (hdev->pldm)
4984                 max_asid = min((u32) 8, max_asid);
4985
4986         for (asid = 0 ; asid < max_asid ; asid++) {
4987                 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
4988                 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
4989                 if (rc) {
4990                         dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
4991                         return rc;
4992                 }
4993         }
4994
4995         return 0;
4996 }
4997
4998 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
4999 {
5000         u32 status, timeout_usec;
5001         int rc;
5002
5003         if (hdev->pldm || !hdev->pdev)
5004                 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5005         else
5006                 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5007
5008         WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5009
5010         rc = hl_poll_timeout(
5011                 hdev,
5012                 stlb_base + STLB_SRAM_INIT_OFFSET,
5013                 status,
5014                 !status,
5015                 1000,
5016                 timeout_usec);
5017
5018         if (rc)
5019                 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5020
5021         rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5022         if (rc)
5023                 return rc;
5024
5025         WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5026
5027         rc = hl_poll_timeout(
5028                 hdev,
5029                 stlb_base + STLB_INV_ALL_START_OFFSET,
5030                 status,
5031                 !status,
5032                 1000,
5033                 timeout_usec);
5034
5035         if (rc)
5036                 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5037
5038         WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5039
5040         return rc;
5041 }
5042
5043 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5044 {
5045         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5046         u32 mmu_base, stlb_base;
5047         int rc;
5048
5049         if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5050                 return 0;
5051
5052         mmu_base = mmPMMU_HBW_MMU_BASE;
5053         stlb_base = mmPMMU_HBW_STLB_BASE;
5054
5055         RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5056                 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5057                 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5058                 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5059                 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5060                 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5061                 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5062                 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5063                 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5064                 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5065                 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5066
5067         WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5068
5069         if (PAGE_SIZE == SZ_64K) {
5070                 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5071                 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5072                         FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5073                         FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5074                         FIELD_PREP(
5075                                 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5076                                 1),
5077                         DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5078                         DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5079                         DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5080         }
5081
5082         WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5083
5084         rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5085         if (rc)
5086                 return rc;
5087
5088         gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5089
5090         return 0;
5091 }
5092
5093 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5094                                 int hmmu_id)
5095 {
5096         struct asic_fixed_properties *prop = &hdev->asic_prop;
5097         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5098         u32 offset, mmu_base, stlb_base, hw_cap;
5099         u8 dmmu_seq;
5100         int rc;
5101
5102         dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5103         hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5104
5105         /*
5106          * return if DMMU is already initialized or if it's not out of
5107          * isolation (due to cluster binning)
5108          */
5109         if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5110                 return 0;
5111
5112         offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5113         mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5114         stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5115
5116         RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5117                         MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5118
5119         RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5120                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5121                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5122                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5123                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5124                 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5125                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5126                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5127                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5128                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5129                         DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5130
5131         RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5132                         STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5133
5134         WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5135
5136         rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5137         if (rc)
5138                 return rc;
5139
5140         gaudi2->hw_cap_initialized |= hw_cap;
5141
5142         return 0;
5143 }
5144
5145 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5146 {
5147         int rc, dcore_id, hmmu_id;
5148
5149         for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5150                 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5151                         rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5152                         if (rc)
5153                                 return rc;
5154                 }
5155
5156         return 0;
5157 }
5158
5159 static int gaudi2_mmu_init(struct hl_device *hdev)
5160 {
5161         int rc;
5162
5163         rc = gaudi2_pci_mmu_init(hdev);
5164         if (rc)
5165                 return rc;
5166
5167         rc = gaudi2_hbm_mmu_init(hdev);
5168         if (rc)
5169                 return rc;
5170
5171         return 0;
5172 }
5173
5174 static int gaudi2_hw_init(struct hl_device *hdev)
5175 {
5176         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5177         int rc;
5178
5179         /* Let's mark in the H/W that we have reached this point. We check
5180          * this value in the reset_before_init function to understand whether
5181          * we need to reset the chip before doing H/W init. This register is
5182          * cleared by the H/W upon H/W reset
5183          */
5184         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5185
5186         /* Perform read from the device to make sure device is up */
5187         RREG32(mmHW_STATE);
5188
5189         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5190          * So we set it here and if anyone tries to move it later to
5191          * a different address, there will be an error
5192          */
5193         if (hdev->asic_prop.iatu_done_by_fw)
5194                 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5195
5196         /*
5197          * Before pushing u-boot/linux to device, need to set the hbm bar to
5198          * base address of dram
5199          */
5200         if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5201                 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5202                 return -EIO;
5203         }
5204
5205         rc = gaudi2_init_cpu(hdev);
5206         if (rc) {
5207                 dev_err(hdev->dev, "failed to initialize CPU\n");
5208                 return rc;
5209         }
5210
5211         gaudi2_init_scrambler_hbm(hdev);
5212         gaudi2_init_kdma(hdev);
5213
5214         rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5215         if (rc) {
5216                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5217                 return rc;
5218         }
5219
5220         rc = gaudi2->cpucp_info_get(hdev);
5221         if (rc) {
5222                 dev_err(hdev->dev, "Failed to get cpucp info\n");
5223                 return rc;
5224         }
5225
5226         rc = gaudi2_mmu_init(hdev);
5227         if (rc)
5228                 return rc;
5229
5230         gaudi2_init_pdma(hdev);
5231         gaudi2_init_edma(hdev);
5232         gaudi2_init_sm(hdev);
5233         gaudi2_init_tpc(hdev);
5234         gaudi2_init_mme(hdev);
5235         gaudi2_init_rotator(hdev);
5236         gaudi2_init_dec(hdev);
5237         gaudi2_enable_timestamp(hdev);
5238
5239         rc = gaudi2_coresight_init(hdev);
5240         if (rc)
5241                 goto disable_queues;
5242
5243         rc = gaudi2_enable_msix(hdev);
5244         if (rc)
5245                 goto disable_queues;
5246
5247         /* Perform read from the device to flush all configuration */
5248         RREG32(mmHW_STATE);
5249
5250         return 0;
5251
5252 disable_queues:
5253         gaudi2_disable_dma_qmans(hdev);
5254         gaudi2_disable_mme_qmans(hdev);
5255         gaudi2_disable_tpc_qmans(hdev);
5256         gaudi2_disable_rot_qmans(hdev);
5257         gaudi2_disable_nic_qmans(hdev);
5258
5259         gaudi2_disable_timestamp(hdev);
5260
5261         return rc;
5262 }
5263
5264 /**
5265  * gaudi2_send_hard_reset_cmd - common function to handle reset
5266  *
5267  * @hdev: pointer to the habanalabs device structure
5268  *
5269  * This function handles the various possible scenarios for reset.
5270  * It considers if reset is handled by driver\FW and what FW components are loaded
5271  */
5272 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5273 {
5274         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5275         bool heartbeat_reset, preboot_only, cpu_initialized = false;
5276         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5277         u32 cpu_boot_status;
5278
5279         preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5280         heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5281
5282         /*
5283          * Handle corner case where failure was at cpu management app load,
5284          * and driver didn't detect any failure while loading the FW,
5285          * then at such scenario driver will send only HALT_MACHINE
5286          * and no one will respond to this request since FW already back to preboot
5287          * and it cannot handle such cmd.
5288          * In this case next time the management app loads it'll check on events register
5289          * which will still have the halt indication, and will reboot the device.
5290          * The solution is to let preboot clear all relevant registers before next boot
5291          * once driver send COMMS_RST_DEV.
5292          */
5293         cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5294
5295         if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5296                         (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5297                 cpu_initialized = true;
5298
5299         /*
5300          * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5301          * 1. FW reset: FW initiate the reset sequence
5302          * 2. driver reset: FW will start HALT sequence (the preparations for the
5303          *                  reset but not the reset itself as it is not implemented
5304          *                  on their part) and LKD will wait to let FW complete the
5305          *                  sequence before issuing the reset
5306          */
5307         if (!preboot_only && cpu_initialized) {
5308                 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5309                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5310
5311                 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5312         }
5313
5314         /*
5315          * When working with preboot (without Linux/Boot fit) we can
5316          * communicate only using the COMMS commands to issue halt/reset.
5317          *
5318          * For the case in which we are working with Linux/Bootfit this is a hail-mary
5319          * attempt to revive the card in the small chance that the f/w has
5320          * experienced a watchdog event, which caused it to return back to preboot.
5321          * In that case, triggering reset through GIC won't help. We need to
5322          * trigger the reset as if Linux wasn't loaded.
5323          *
5324          * We do it only if the reset cause was HB, because that would be the
5325          * indication of such an event.
5326          *
5327          * In case watchdog hasn't expired but we still got HB, then this won't
5328          * do any damage.
5329          */
5330
5331         if (heartbeat_reset || preboot_only || !cpu_initialized) {
5332                 if (hdev->asic_prop.hard_reset_done_by_fw)
5333                         hl_fw_ask_hard_reset_without_linux(hdev);
5334                 else
5335                         hl_fw_ask_halt_machine_without_linux(hdev);
5336         }
5337 }
5338
5339 /**
5340  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5341  *
5342  * @hdev: pointer to the habanalabs device structure
5343  * @reset_sleep_ms: sleep time in msec after reset
5344  *
5345  * This function executes hard reset based on if driver/FW should do the reset
5346  */
5347 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5348 {
5349         if (hdev->asic_prop.hard_reset_done_by_fw) {
5350                 gaudi2_send_hard_reset_cmd(hdev);
5351                 return;
5352         }
5353
5354         /* Set device to handle FLR by H/W as we will put the device
5355          * CPU to halt mode
5356          */
5357         WREG32(mmPCIE_AUX_FLR_CTRL,
5358                         (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5359
5360         gaudi2_send_hard_reset_cmd(hdev);
5361
5362         WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5363 }
5364
5365 /**
5366  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5367  *
5368  * @hdev: pointer to the habanalabs device structure
5369  * @reset_sleep_ms: sleep time in msec after reset
5370  * @driver_performs_reset: true if driver should perform reset instead of f/w.
5371  *
5372  * This function executes soft reset based on if driver/FW should do the reset
5373  */
5374 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5375                                                 bool driver_performs_reset)
5376 {
5377         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5378
5379         if (!driver_performs_reset) {
5380                 /* set SP to indicate reset request sent to FW */
5381                 if (dyn_regs->cpu_rst_status)
5382                         WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5383                 else
5384                         WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5385
5386                 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5387                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5388                 return;
5389         }
5390
5391         /* Block access to engines, QMANs and SM during reset, these
5392          * RRs will be reconfigured after soft reset.
5393          * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5394          */
5395         gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5396                                         mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5397
5398         gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5399                                 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5400                                 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5401
5402         WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5403 }
5404
5405 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5406                                                                 u32 poll_timeout_us)
5407 {
5408         int i, rc = 0;
5409         u32 reg_val;
5410
5411         /* without this sleep reset will not work */
5412         msleep(reset_sleep_ms);
5413
5414         /* We poll the BTM done indication multiple times after reset due to
5415          * a HW errata 'GAUDI2_0300'
5416          */
5417         for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5418                 rc = hl_poll_timeout(
5419                         hdev,
5420                         mmPSOC_GLOBAL_CONF_BTM_FSM,
5421                         reg_val,
5422                         reg_val == 0,
5423                         1000,
5424                         poll_timeout_us);
5425
5426         if (rc)
5427                 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5428 }
5429
5430 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5431 {
5432         int i, rc = 0;
5433         u32 reg_val;
5434
5435         for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5436                 rc = hl_poll_timeout(
5437                         hdev,
5438                         mmCPU_RST_STATUS_TO_HOST,
5439                         reg_val,
5440                         reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5441                         1000,
5442                         poll_timeout_us);
5443
5444         if (rc)
5445                 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5446                                 reg_val);
5447 }
5448
5449 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5450 {
5451         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5452         u32 poll_timeout_us, reset_sleep_ms;
5453         bool driver_performs_reset = false;
5454
5455         if (hdev->pldm) {
5456                 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5457                                                 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5458                 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5459         } else {
5460                 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5461                 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5462         }
5463
5464         if (fw_reset)
5465                 goto skip_reset;
5466
5467         gaudi2_reset_arcs(hdev);
5468
5469         if (hard_reset) {
5470                 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5471                 gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5472         } else {
5473                 /*
5474                  * As we have to support also work with preboot only (which does not supports
5475                  * soft reset) we have to make sure that security is disabled before letting driver
5476                  * do the reset. user shall control the BFE flags to avoid asking soft reset in
5477                  * secured device with preboot only.
5478                  */
5479                 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5480                                                         !hdev->asic_prop.fw_security_enabled);
5481                 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5482         }
5483
5484 skip_reset:
5485         if (driver_performs_reset || hard_reset)
5486                 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5487         else
5488                 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5489
5490         if (!gaudi2)
5491                 return;
5492
5493         gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5494         gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5495
5496         /*
5497          * Clear NIC capability mask in order for driver to re-configure
5498          * NIC QMANs. NIC ports will not be re-configured during soft
5499          * reset as we call gaudi2_nic_init only during hard reset
5500          */
5501         gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5502
5503         if (hard_reset) {
5504                 gaudi2->hw_cap_initialized &=
5505                         ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5506                         HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5507                         HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5508                         HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5509                         HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5510
5511                 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5512         } else {
5513                 gaudi2->hw_cap_initialized &=
5514                         ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5515                         HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5516                         HW_CAP_ROT_MASK);
5517         }
5518 }
5519
5520 static int gaudi2_suspend(struct hl_device *hdev)
5521 {
5522         int rc;
5523
5524         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5525         if (rc)
5526                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5527
5528         return rc;
5529 }
5530
5531 static int gaudi2_resume(struct hl_device *hdev)
5532 {
5533         return gaudi2_init_iatu(hdev);
5534 }
5535
5536 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5537                 void *cpu_addr, dma_addr_t dma_addr, size_t size)
5538 {
5539         int rc;
5540
5541         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5542                         VM_DONTCOPY | VM_NORESERVE;
5543
5544 #ifdef _HAS_DMA_MMAP_COHERENT
5545
5546         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5547         if (rc)
5548                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5549
5550 #else
5551
5552         rc = remap_pfn_range(vma, vma->vm_start,
5553                                 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5554                                 size, vma->vm_page_prot);
5555         if (rc)
5556                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5557
5558 #endif
5559
5560         return rc;
5561 }
5562
5563 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5564 {
5565         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5566         u64 hw_cap_mask = 0;
5567         u64 hw_tpc_cap_bit = 0;
5568         u64 hw_nic_cap_bit = 0;
5569         u64 hw_test_cap_bit = 0;
5570
5571         switch (hw_queue_id) {
5572         case GAUDI2_QUEUE_ID_PDMA_0_0:
5573         case GAUDI2_QUEUE_ID_PDMA_0_1:
5574         case GAUDI2_QUEUE_ID_PDMA_1_0:
5575                 hw_cap_mask = HW_CAP_PDMA_MASK;
5576                 break;
5577         case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5578                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5579                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5580                 break;
5581         case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5582                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5583                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5584                 break;
5585         case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5586                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5587                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5588                 break;
5589         case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5590                 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5591                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5592                 break;
5593
5594         case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5595                 hw_test_cap_bit = HW_CAP_MME_SHIFT;
5596                 break;
5597
5598         case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5599                 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5600                 break;
5601
5602         case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5603                 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5604                 break;
5605
5606         case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5607                 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5608                 break;
5609
5610         case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5611                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5612                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5613
5614                 /* special case where cap bit refers to the first queue id */
5615                 if (!hw_tpc_cap_bit)
5616                         return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5617                 break;
5618
5619         case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5620                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5621                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5622                 break;
5623
5624         case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5625                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5626                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5627                 break;
5628
5629         case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5630                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5631                         ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5632                 break;
5633
5634         case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5635                 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5636                 break;
5637
5638         case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5639                 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5640                 break;
5641
5642         case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5643                 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5644
5645                 /* special case where cap bit refers to the first queue id */
5646                 if (!hw_nic_cap_bit)
5647                         return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5648                 break;
5649
5650         case GAUDI2_QUEUE_ID_CPU_PQ:
5651                 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5652
5653         default:
5654                 return false;
5655         }
5656
5657         if (hw_tpc_cap_bit)
5658                 return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5659
5660         if (hw_nic_cap_bit)
5661                 return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5662
5663         if (hw_test_cap_bit)
5664                 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5665
5666         return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5667 }
5668
5669 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5670 {
5671         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5672
5673         switch (arc_id) {
5674         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5675         case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5676                 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5677
5678         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5679                 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5680
5681         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5682                 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5683
5684         default:
5685                 return false;
5686         }
5687 }
5688
5689 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5690 {
5691         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5692
5693         switch (arc_id) {
5694         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5695         case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5696                 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5697                 break;
5698
5699         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5700                 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5701                 break;
5702
5703         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5704                 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5705                 break;
5706
5707         default:
5708                 return;
5709         }
5710 }
5711
5712 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5713 {
5714         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5715
5716         switch (arc_id) {
5717         case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5718         case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5719                 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5720                 break;
5721
5722         case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5723                 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5724                 break;
5725
5726         case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5727                 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5728                 break;
5729
5730         default:
5731                 return;
5732         }
5733 }
5734
5735 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5736 {
5737         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5738         u32 pq_offset, reg_base, db_reg_offset, db_value;
5739
5740         if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5741                 /*
5742                  * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5743                  * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5744                  * number.
5745                  */
5746                 pq_offset = (hw_queue_id & 0x3) * 4;
5747                 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5748                 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5749         } else {
5750                 db_reg_offset = mmCPU_IF_PF_PQ_PI;
5751         }
5752
5753         db_value = pi;
5754
5755         /* ring the doorbell */
5756         WREG32(db_reg_offset, db_value);
5757
5758         if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5759                 /* make sure device CPU will read latest data from host */
5760                 mb();
5761                 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5762                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5763         }
5764 }
5765
5766 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5767 {
5768         __le64 *pbd = (__le64 *) bd;
5769
5770         /* The QMANs are on the host memory so a simple copy suffice */
5771         pqe[0] = pbd[0];
5772         pqe[1] = pbd[1];
5773 }
5774
5775 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5776                                 dma_addr_t *dma_handle, gfp_t flags)
5777 {
5778         return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5779 }
5780
5781 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5782                                 void *cpu_addr, dma_addr_t dma_handle)
5783 {
5784         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5785 }
5786
5787 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5788                                 u32 timeout, u64 *result)
5789 {
5790         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5791
5792         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5793                 if (result)
5794                         *result = 0;
5795                 return 0;
5796         }
5797
5798         if (!timeout)
5799                 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5800
5801         return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5802 }
5803
5804 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5805                                 gfp_t mem_flags, dma_addr_t *dma_handle)
5806 {
5807         if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5808                 return NULL;
5809
5810         return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5811 }
5812
5813 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5814 {
5815         dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5816 }
5817
5818 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5819                                                 dma_addr_t *dma_handle)
5820 {
5821         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5822 }
5823
5824 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5825 {
5826         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5827 }
5828
5829 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5830                                         enum dma_data_direction dir)
5831 {
5832         dma_addr_t dma_addr;
5833
5834         dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5835         if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5836                 return 0;
5837
5838         return dma_addr;
5839 }
5840
5841 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5842                                         enum dma_data_direction dir)
5843 {
5844         dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5845 }
5846
5847 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5848 {
5849         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5850         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5851
5852         if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5853                 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5854                 return -EINVAL;
5855         }
5856
5857         /* Just check if CB address is valid */
5858
5859         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5860                                         parser->user_cb_size,
5861                                         asic_prop->sram_user_base_address,
5862                                         asic_prop->sram_end_address))
5863                 return 0;
5864
5865         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5866                                         parser->user_cb_size,
5867                                         asic_prop->dram_user_base_address,
5868                                         asic_prop->dram_end_address))
5869                 return 0;
5870
5871         if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5872                 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5873                                                 parser->user_cb_size,
5874                                                 asic_prop->dmmu.start_addr,
5875                                                 asic_prop->dmmu.end_addr))
5876                 return 0;
5877
5878         if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5879                 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5880                                         parser->user_cb_size,
5881                                         asic_prop->pmmu.start_addr,
5882                                         asic_prop->pmmu.end_addr) ||
5883                         hl_mem_area_inside_range(
5884                                         (u64) (uintptr_t) parser->user_cb,
5885                                         parser->user_cb_size,
5886                                         asic_prop->pmmu_huge.start_addr,
5887                                         asic_prop->pmmu_huge.end_addr))
5888                         return 0;
5889
5890         } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5891                 if (!hdev->pdev)
5892                         return 0;
5893
5894                 if (!device_iommu_mapped(&hdev->pdev->dev))
5895                         return 0;
5896         }
5897
5898         dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5899                 parser->user_cb, parser->user_cb_size);
5900
5901         return -EFAULT;
5902 }
5903
5904 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5905 {
5906         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5907
5908         if (!parser->is_kernel_allocated_cb)
5909                 return gaudi2_validate_cb_address(hdev, parser);
5910
5911         if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5912                 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5913                 return -EINVAL;
5914         }
5915
5916         return 0;
5917 }
5918
5919 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5920 {
5921         struct gaudi2_device *gaudi2 = hdev->asic_specific;
5922
5923         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5924                 return 0;
5925
5926         return hl_fw_send_heartbeat(hdev);
5927 }
5928
5929 /* This is an internal helper function, used to update the KDMA mmu props.
5930  * Should be called with a proper kdma lock.
5931  */
5932 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5933                                            bool mmu_bypass, u32 asid)
5934 {
5935         u32 rw_asid, rw_mmu_bp;
5936
5937         rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5938                       (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
5939
5940         rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
5941                         (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
5942
5943         WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
5944         WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
5945 }
5946
5947 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
5948                                                 u32 mon_payload, u32 sync_value)
5949 {
5950         u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
5951         u8 mask;
5952
5953         sob_offset = sob_id * 4;
5954         mon_offset = mon_id * 4;
5955
5956         /* Reset the SOB value */
5957         WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5958
5959         /* Configure this address with CQ_ID 0 because CQ_EN is set */
5960         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
5961
5962         /* Configure this address with CS index because CQ_EN is set */
5963         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
5964
5965         sync_group_id = sob_id / 8;
5966         mask = ~(1 << (sob_id & 0x7));
5967         mode = 1; /* comparison mode is "equal to" */
5968
5969         mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
5970         mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
5971         mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
5972         mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
5973         WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
5974 }
5975
5976 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
5977 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
5978                                         u64 src_addr, u64 dst_addr,
5979                                         u32 size, bool is_memset)
5980 {
5981         u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
5982         struct hl_cq_entry *cq_base;
5983         struct hl_cq *cq;
5984         u64 comp_addr;
5985         int rc;
5986
5987         gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
5988                                 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
5989                                 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
5990
5991         comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5992                         (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
5993
5994         comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
5995                         FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
5996
5997         WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
5998         WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
5999         WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6000         WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6001         WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6002         WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6003         WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6004         WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6005
6006         commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6007                                 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6008
6009         if (is_memset)
6010                 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6011
6012         WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6013
6014         /* Wait for completion */
6015         cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6016         cq_base = cq->kernel_address;
6017         polling_addr = (u32 *)&cq_base[cq->ci];
6018
6019         if (hdev->pldm)
6020                 /* for each 1MB 20 second of timeout */
6021                 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6022         else
6023                 timeout = KDMA_TIMEOUT_USEC;
6024
6025         /* Polling */
6026         rc = hl_poll_timeout_memory(
6027                         hdev,
6028                         polling_addr,
6029                         status,
6030                         (status == 1),
6031                         1000,
6032                         timeout,
6033                         true);
6034
6035         *polling_addr = 0;
6036
6037         if (rc) {
6038                 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6039                 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6040                 return rc;
6041         }
6042
6043         cq->ci = hl_cq_inc_ptr(cq->ci);
6044
6045         return 0;
6046 }
6047
6048 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6049 {
6050         u32 i;
6051
6052         for (i = 0 ; i < size ; i += sizeof(u32))
6053                 WREG32(addr + i, val);
6054 }
6055
6056 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6057 {
6058         u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6059
6060         if (enable) {
6061                 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6062                 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6063         } else {
6064                 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6065                 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6066         }
6067 }
6068
6069 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6070 {
6071         u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6072         u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6073         u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6074         struct packet_msg_short *msg_short_pkt;
6075         dma_addr_t pkt_dma_addr;
6076         size_t pkt_size;
6077         int rc;
6078
6079         if (hdev->pldm)
6080                 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6081         else
6082                 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6083
6084         pkt_size = sizeof(*msg_short_pkt);
6085         msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6086         if (!msg_short_pkt) {
6087                 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6088                         hw_queue_id);
6089                 return -ENOMEM;
6090         }
6091
6092         tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6093                 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6094                 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6095                 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6096                 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6097
6098         msg_short_pkt->value = cpu_to_le32(sob_val);
6099         msg_short_pkt->ctl = cpu_to_le32(tmp);
6100
6101         /* Reset the SOB value */
6102         WREG32(sob_addr, 0);
6103
6104         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6105         if (rc) {
6106                 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6107                         hw_queue_id);
6108                 goto free_pkt;
6109         }
6110
6111         rc = hl_poll_timeout(
6112                         hdev,
6113                         sob_addr,
6114                         tmp,
6115                         (tmp == sob_val),
6116                         1000,
6117                         timeout_usec);
6118
6119         if (rc == -ETIMEDOUT) {
6120                 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6121                         hw_queue_id, tmp);
6122                 rc = -EIO;
6123         }
6124
6125         /* Reset the SOB value */
6126         WREG32(sob_addr, 0);
6127
6128 free_pkt:
6129         hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6130         return rc;
6131 }
6132
6133 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6134 {
6135         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6136
6137         /*
6138          * check capability here as send_cpu_message() won't update the result
6139          * value if no capability
6140          */
6141         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6142                 return 0;
6143
6144         return hl_fw_test_cpu_queue(hdev);
6145 }
6146
6147 static int gaudi2_test_queues(struct hl_device *hdev)
6148 {
6149         int i, rc, ret_val = 0;
6150
6151         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6152                 if (!gaudi2_is_queue_enabled(hdev, i))
6153                         continue;
6154
6155                 gaudi2_qman_set_test_mode(hdev, i, true);
6156                 rc = gaudi2_test_queue(hdev, i);
6157                 gaudi2_qman_set_test_mode(hdev, i, false);
6158
6159                 if (rc) {
6160                         ret_val = -EINVAL;
6161                         goto done;
6162                 }
6163         }
6164
6165         rc = gaudi2_test_cpu_queue(hdev);
6166         if (rc) {
6167                 ret_val = -EINVAL;
6168                 goto done;
6169         }
6170
6171 done:
6172         return ret_val;
6173 }
6174
6175 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6176 {
6177         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6178         size_t irq_arr_size;
6179
6180         /* TODO: missing gaudi2_nic_resume.
6181          * Until implemented nic_hw_cap_initialized will remain zeroed
6182          */
6183         gaudi2_init_arcs(hdev);
6184         gaudi2_scrub_arcs_dccm(hdev);
6185         gaudi2_init_security(hdev);
6186
6187         /* Unmask all IRQs since some could have been received during the soft reset */
6188         irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6189         return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6190 }
6191
6192 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6193                                         struct iterate_module_ctx *ctx)
6194 {
6195         struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6196         u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6197         bool is_eng_idle;
6198         int engine_idx;
6199
6200         if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6201                 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6202         else
6203                 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6204                                 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6205
6206         tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6207         qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6208         qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6209         qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6210
6211         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6212                                                 IS_TPC_IDLE(tpc_cfg_sts);
6213         *(idle_data->is_idle) &= is_eng_idle;
6214
6215         if (idle_data->mask && !is_eng_idle)
6216                 set_bit(engine_idx, idle_data->mask);
6217
6218         if (idle_data->e)
6219                 hl_engine_data_sprintf(idle_data->e,
6220                                         idle_data->tpc_fmt, dcore, inst,
6221                                         is_eng_idle ? "Y" : "N",
6222                                         qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6223 }
6224
6225 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6226                                         struct engines_data *e)
6227 {
6228         u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6229                 mme_arch_sts, dec_swreg15, dec_enabled_bit;
6230         struct asic_fixed_properties *prop = &hdev->asic_prop;
6231         const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6232         unsigned long *mask = (unsigned long *) mask_arr;
6233         const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6234         const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6235         const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6236         const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6237         const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6238         const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6239         bool is_idle = true, is_eng_idle;
6240         u64 offset;
6241
6242         struct gaudi2_tpc_idle_data tpc_idle_data = {
6243                 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6244                 .e = e,
6245                 .mask = mask,
6246                 .is_idle = &is_idle,
6247         };
6248         struct iterate_module_ctx tpc_iter = {
6249                 .fn = &gaudi2_is_tpc_engine_idle,
6250                 .data = &tpc_idle_data,
6251         };
6252
6253         int engine_idx, i, j;
6254
6255         /* EDMA, Two engines per Dcore */
6256         if (e)
6257                 hl_engine_data_sprintf(e,
6258                         "\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6259                         "----  ----  -------  ------------  ----------------------\n");
6260
6261         for (i = 0; i < NUM_OF_DCORES; i++) {
6262                 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6263                         int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6264
6265                         if (!(prop->edma_enabled_mask & BIT(seq)))
6266                                 continue;
6267
6268                         engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6269                                         i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6270                         offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6271
6272                         dma_core_idle_ind_mask =
6273                         RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6274
6275                         qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6276                         qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6277                         qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6278
6279                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6280                                         IS_DMA_IDLE(dma_core_idle_ind_mask);
6281                         is_idle &= is_eng_idle;
6282
6283                         if (mask && !is_eng_idle)
6284                                 set_bit(engine_idx, mask);
6285
6286                         if (e)
6287                                 hl_engine_data_sprintf(e, edma_fmt, i, j,
6288                                                         is_eng_idle ? "Y" : "N",
6289                                                         qm_glbl_sts0,
6290                                                         dma_core_idle_ind_mask);
6291                 }
6292         }
6293
6294         /* PDMA, Two engines in Full chip */
6295         if (e)
6296                 hl_engine_data_sprintf(e,
6297                                         "\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6298                                         "----  -------  ------------  ----------------------\n");
6299
6300         for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6301                 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6302                 offset = i * PDMA_OFFSET;
6303                 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6304
6305                 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6306                 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6307                 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6308
6309                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6310                                 IS_DMA_IDLE(dma_core_idle_ind_mask);
6311                 is_idle &= is_eng_idle;
6312
6313                 if (mask && !is_eng_idle)
6314                         set_bit(engine_idx, mask);
6315
6316                 if (e)
6317                         hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6318                                                 qm_glbl_sts0, dma_core_idle_ind_mask);
6319         }
6320
6321         /* NIC, twelve macros in Full chip */
6322         if (e && hdev->nic_ports_mask)
6323                 hl_engine_data_sprintf(e,
6324                                         "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
6325                                         "---  -------  ------------  ----------\n");
6326
6327         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6328                 if (!(i & 1))
6329                         offset = i / 2 * NIC_OFFSET;
6330                 else
6331                         offset += NIC_QM_OFFSET;
6332
6333                 if (!(hdev->nic_ports_mask & BIT(i)))
6334                         continue;
6335
6336                 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6337
6338
6339                 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6340                 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6341                 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6342
6343                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6344                 is_idle &= is_eng_idle;
6345
6346                 if (mask && !is_eng_idle)
6347                         set_bit(engine_idx, mask);
6348
6349                 if (e)
6350                         hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6351                                                 qm_glbl_sts0, qm_cgm_sts);
6352         }
6353
6354         if (e)
6355                 hl_engine_data_sprintf(e,
6356                                         "\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
6357                                         "---  ----  -------  ------------  ---------------\n");
6358         /* MME, one per Dcore */
6359         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6360                 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6361                 offset = i * DCORE_OFFSET;
6362
6363                 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6364                 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6365                 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6366
6367                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6368                 is_idle &= is_eng_idle;
6369
6370                 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6371                 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6372                 is_idle &= is_eng_idle;
6373
6374                 if (e)
6375                         hl_engine_data_sprintf(e, mme_fmt, i, "N",
6376                                 is_eng_idle ? "Y" : "N",
6377                                 qm_glbl_sts0,
6378                                 mme_arch_sts);
6379
6380                 if (mask && !is_eng_idle)
6381                         set_bit(engine_idx, mask);
6382         }
6383
6384         /*
6385          * TPC
6386          */
6387         if (e && prop->tpc_enabled_mask)
6388                 hl_engine_data_sprintf(e,
6389                         "\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
6390                         "----  ---  --------  ------------  ----------  ----------------------\n");
6391
6392         gaudi2_iterate_tpcs(hdev, &tpc_iter);
6393
6394         /* Decoders, two each Dcore and two shared PCIe decoders */
6395         if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6396                 hl_engine_data_sprintf(e,
6397                         "\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
6398                         "----  ---  -------  ---------------\n");
6399
6400         for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6401                 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6402                         dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6403                         if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6404                                 continue;
6405
6406                         engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6407                                         i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6408                         offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6409
6410                         dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6411                         is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6412                         is_idle &= is_eng_idle;
6413
6414                         if (mask && !is_eng_idle)
6415                                 set_bit(engine_idx, mask);
6416
6417                         if (e)
6418                                 hl_engine_data_sprintf(e, dec_fmt, i, j,
6419                                                         is_eng_idle ? "Y" : "N", dec_swreg15);
6420                 }
6421         }
6422
6423         if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6424                 hl_engine_data_sprintf(e,
6425                         "\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
6426                         "--------  -------  ---------------\n");
6427
6428         /* Check shared(PCIe) decoders */
6429         for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6430                 dec_enabled_bit = PCIE_DEC_SHIFT + i;
6431                 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6432                         continue;
6433
6434                 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6435                 offset = i * DCORE_DEC_OFFSET;
6436                 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6437                 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6438                 is_idle &= is_eng_idle;
6439
6440                 if (mask && !is_eng_idle)
6441                         set_bit(engine_idx, mask);
6442
6443                 if (e)
6444                         hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6445                                                 is_eng_idle ? "Y" : "N", dec_swreg15);
6446         }
6447
6448         if (e)
6449                 hl_engine_data_sprintf(e,
6450                         "\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6451                         "----  ----  -------  ------------  ----------  -------------\n");
6452
6453         for (i = 0 ; i < NUM_OF_ROT ; i++) {
6454                 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6455
6456                 offset = i * ROT_OFFSET;
6457
6458                 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6459                 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6460                 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6461
6462                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6463                 is_idle &= is_eng_idle;
6464
6465                 if (mask && !is_eng_idle)
6466                         set_bit(engine_idx, mask);
6467
6468                 if (e)
6469                         hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6470                                         qm_glbl_sts0, qm_cgm_sts, "-");
6471         }
6472
6473         return is_idle;
6474 }
6475
6476 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6477         __acquires(&gaudi2->hw_queues_lock)
6478 {
6479         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6480
6481         spin_lock(&gaudi2->hw_queues_lock);
6482 }
6483
6484 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6485         __releases(&gaudi2->hw_queues_lock)
6486 {
6487         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6488
6489         spin_unlock(&gaudi2->hw_queues_lock);
6490 }
6491
6492 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6493 {
6494         return hdev->pdev->device;
6495 }
6496
6497 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6498 {
6499         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6500
6501         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6502                 return 0;
6503
6504         return hl_fw_get_eeprom_data(hdev, data, max_size);
6505 }
6506
6507 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6508 {
6509         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6510 }
6511
6512 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6513 {
6514         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6515
6516         if (aggregate) {
6517                 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
6518                 return gaudi2->events_stat_aggregate;
6519         }
6520
6521         *size = (u32) sizeof(gaudi2->events_stat);
6522         return gaudi2->events_stat;
6523 }
6524
6525 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6526                                 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6527 {
6528         u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6529                         dcore_vdec_id + DCORE_OFFSET * dcore_id;
6530
6531         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6532         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6533
6534         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6535         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6536
6537         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6538         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6539
6540         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6541         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6542
6543         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6544         WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6545 }
6546
6547 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6548 {
6549         u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6550                         (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6551         struct asic_fixed_properties *prop = &hdev->asic_prop;
6552         u32 dcore_offset = dcore_id * DCORE_OFFSET;
6553         u32 vdec_id, i, ports_offset, reg_val;
6554         u8 edma_seq_base;
6555
6556         /* EDMA */
6557         edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6558         if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6559                 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6560                 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6561                 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6562                 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6563         }
6564
6565         if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6566                 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6567                 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6568                 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6569                 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6570         }
6571
6572         /* Sync Mngr */
6573         WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6574         /*
6575          * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6576          * for any access type
6577          */
6578         if (dcore_id > 0) {
6579                 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6580                           (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6581                 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6582                 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6583         }
6584
6585         WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6586         WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6587
6588         for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6589                 ports_offset = i * DCORE_MME_SBTE_OFFSET;
6590                 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6591                                 dcore_offset + ports_offset, 0);
6592                 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6593                                 dcore_offset + ports_offset, rw_asid);
6594         }
6595
6596         for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6597                 ports_offset = i * DCORE_MME_WB_OFFSET;
6598                 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6599                                 dcore_offset + ports_offset, 0);
6600                 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6601                                 dcore_offset + ports_offset, rw_asid);
6602         }
6603
6604         WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6605         WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6606
6607         /*
6608          * Decoders
6609          */
6610         for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6611                 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6612                         gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6613         }
6614 }
6615
6616 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6617                                 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6618 {
6619         u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6620
6621         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6622         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6623
6624         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6625         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6626
6627         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6628         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6629
6630         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6631         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6632
6633         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6634         WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6635 }
6636
6637 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6638                                                         u32 rw_asid, u32 rw_mmu_bp)
6639 {
6640         u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6641
6642         WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6643         WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6644 }
6645
6646 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6647 {
6648         u32 reg_base, reg_offset, reg_val = 0;
6649
6650         reg_base = gaudi2_arc_blocks_bases[cpu_id];
6651
6652         /* Enable MMU and configure asid for all relevant ARC regions */
6653         reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6654         reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6655
6656         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6657         WREG32(reg_base + reg_offset, reg_val);
6658
6659         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6660         WREG32(reg_base + reg_offset, reg_val);
6661
6662         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6663         WREG32(reg_base + reg_offset, reg_val);
6664
6665         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6666         WREG32(reg_base + reg_offset, reg_val);
6667
6668         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6669         WREG32(reg_base + reg_offset, reg_val);
6670
6671         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6672         WREG32(reg_base + reg_offset, reg_val);
6673
6674         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6675         WREG32(reg_base + reg_offset, reg_val);
6676
6677         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6678         WREG32(reg_base + reg_offset, reg_val);
6679
6680         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6681         WREG32(reg_base + reg_offset, reg_val);
6682
6683         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6684         WREG32(reg_base + reg_offset, reg_val);
6685
6686         reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6687         WREG32(reg_base + reg_offset, reg_val);
6688 }
6689
6690 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6691 {
6692         int i;
6693
6694         if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6695                 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6696
6697         for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6698                 gaudi2_arc_mmu_prepare(hdev, i, asid);
6699
6700         for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6701                 if (!gaudi2_is_queue_enabled(hdev, i))
6702                         continue;
6703
6704                 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6705         }
6706
6707         return 0;
6708 }
6709
6710 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6711 {
6712         struct asic_fixed_properties *prop = &hdev->asic_prop;
6713         u32 rw_asid, offset;
6714         int rc, i;
6715
6716         rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6717                         FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6718
6719         WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6720         WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6721         WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6722         WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6723
6724         WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6725         WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6726         WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6727         WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6728
6729         /* ROT */
6730         for (i = 0 ; i < NUM_OF_ROT ; i++) {
6731                 offset = i * ROT_OFFSET;
6732                 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6733                 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6734                 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6735                 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6736                 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6737         }
6738
6739         /* Shared Decoders are the last bits in the decoders mask */
6740         if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6741                 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6742
6743         if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6744                 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6745
6746         /* arc farm arc dup eng */
6747         for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6748                 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6749
6750         rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6751         if (rc)
6752                 return rc;
6753
6754         return 0;
6755 }
6756
6757 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
6758                                         struct iterate_module_ctx *ctx)
6759 {
6760         struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6761
6762         WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6763         WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6764         WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6765         WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6766 }
6767
6768 /* zero the MMUBP and set the ASID */
6769 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6770 {
6771         struct gaudi2_device *gaudi2 = hdev->asic_specific;
6772         struct gaudi2_tpc_mmu_data tpc_mmu_data;
6773         struct iterate_module_ctx tpc_iter = {
6774                 .fn = &gaudi2_tpc_mmu_prepare,
6775                 .data = &tpc_mmu_data,
6776         };
6777         int rc, i;
6778
6779         if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6780                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6781                 return -EINVAL;
6782         }
6783
6784         if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6785                 return 0;
6786
6787         rc = gaudi2_mmu_shared_prepare(hdev, asid);
6788         if (rc)
6789                 return rc;
6790
6791         /* configure DCORE MMUs */
6792         tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6793                                 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6794         gaudi2_iterate_tpcs(hdev, &tpc_iter);
6795         for (i = 0 ; i < NUM_OF_DCORES ; i++)
6796                 gaudi2_mmu_dcore_prepare(hdev, i, asid);
6797
6798         return 0;
6799 }
6800
6801 static inline bool is_info_event(u32 event)
6802 {
6803         switch (event) {
6804         case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6805         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
6806                 return true;
6807         default:
6808                 return false;
6809         }
6810 }
6811
6812 static void gaudi2_print_irq_info(struct hl_device *hdev, u16 event_type)
6813 {
6814         char desc[64] = "";
6815         bool event_valid = false;
6816
6817         /* return in case of NIC status event - these events are received periodically and not as
6818          * an indication to an error, thus not printed.
6819          */
6820         if (event_type >= GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 &&
6821                 event_type <= GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1)
6822                 return;
6823
6824         if (gaudi2_irq_map_table[event_type].valid) {
6825                 snprintf(desc, sizeof(desc), gaudi2_irq_map_table[event_type].name);
6826                 event_valid = true;
6827         }
6828
6829         if (!event_valid)
6830                 snprintf(desc, sizeof(desc), "N/A");
6831
6832         if (is_info_event(event_type))
6833                 dev_info_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6834                                                                 event_type, desc);
6835         else
6836                 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6837                                                                 event_type, desc);
6838 }
6839
6840 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6841                 struct hl_eq_ecc_data *ecc_data)
6842 {
6843         u64 ecc_address = 0, ecc_syndrom = 0;
6844         u8 memory_wrapper_idx = 0;
6845
6846         ecc_address = le64_to_cpu(ecc_data->ecc_address);
6847         ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6848         memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6849
6850         dev_err(hdev->dev,
6851                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6852                 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6853
6854         return !!ecc_data->is_critical;
6855 }
6856
6857 /*
6858  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6859  *
6860  * @idx: the current pi/ci value
6861  * @q_len: the queue length (power of 2)
6862  *
6863  * @return the cyclically decremented index
6864  */
6865 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6866 {
6867         u32 mask = q_len - 1;
6868
6869         /*
6870          * modular decrement is equivalent to adding (queue_size -1)
6871          * later we take LSBs to make sure the value is in the
6872          * range [0, queue_len - 1]
6873          */
6874         return (idx + q_len - 1) & mask;
6875 }
6876
6877 /**
6878  * gaudi2_print_sw_config_stream_data - print SW config stream data
6879  *
6880  * @hdev: pointer to the habanalabs device structure
6881  * @stream: the QMAN's stream
6882  * @qman_base: base address of QMAN registers block
6883  */
6884 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6885                                                 u32 stream, u64 qman_base)
6886 {
6887         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6888         u32 cq_ptr_lo_off, size;
6889
6890         cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6891
6892         cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6893                                                                         stream * cq_ptr_lo_off;
6894
6895         cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6896
6897         cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6898
6899         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6900         size = RREG32(cq_tsize);
6901         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6902                 stream, cq_ptr, size);
6903 }
6904
6905 /**
6906  * gaudi2_print_last_pqes_on_err - print last PQEs on error
6907  *
6908  * @hdev: pointer to the habanalabs device structure
6909  * @qid_base: first QID of the QMAN (out of 4 streams)
6910  * @stream: the QMAN's stream
6911  * @qman_base: base address of QMAN registers block
6912  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6913  */
6914 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6915                                                 u64 qman_base, bool pr_sw_conf)
6916 {
6917         u32 ci, qm_ci_stream_off;
6918         struct hl_hw_queue *q;
6919         u64 pq_ci;
6920         int i;
6921
6922         q = &hdev->kernel_queues[qid_base + stream];
6923
6924         qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6925         pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6926                                                 stream * qm_ci_stream_off;
6927
6928         hdev->asic_funcs->hw_queues_lock(hdev);
6929
6930         if (pr_sw_conf)
6931                 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6932
6933         ci = RREG32(pq_ci);
6934
6935         /* we should start printing form ci -1 */
6936         ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6937
6938         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6939                 struct hl_bd *bd;
6940                 u64 addr;
6941                 u32 len;
6942
6943                 bd = q->kernel_address;
6944                 bd += ci;
6945
6946                 len = le32_to_cpu(bd->len);
6947                 /* len 0 means uninitialized entry- break */
6948                 if (!len)
6949                         break;
6950
6951                 addr = le64_to_cpu(bd->ptr);
6952
6953                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
6954                         stream, ci, addr, len);
6955
6956                 /* get previous ci, wrap if needed */
6957                 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6958         }
6959
6960         hdev->asic_funcs->hw_queues_unlock(hdev);
6961 }
6962
6963 /**
6964  * print_qman_data_on_err - extract QMAN data on error
6965  *
6966  * @hdev: pointer to the habanalabs device structure
6967  * @qid_base: first QID of the QMAN (out of 4 streams)
6968  * @stream: the QMAN's stream
6969  * @qman_base: base address of QMAN registers block
6970  *
6971  * This function attempt to extract as much data as possible on QMAN error.
6972  * On upper CP print the SW config stream data and last 8 PQEs.
6973  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6974  */
6975 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
6976 {
6977         u32 i;
6978
6979         if (stream != QMAN_STREAMS) {
6980                 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
6981                 return;
6982         }
6983
6984         gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6985
6986         for (i = 0 ; i < QMAN_STREAMS ; i++)
6987                 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
6988 }
6989
6990 static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name,
6991                                                 u64 qman_base, u32 qid_base)
6992 {
6993         u32 i, j, glbl_sts_val, arb_err_val, num_error_causes;
6994         u64 glbl_sts_addr, arb_err_addr;
6995         char reg_desc[32];
6996
6997         glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
6998         arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
6999
7000         /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7001         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7002                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7003
7004                 if (!glbl_sts_val)
7005                         continue;
7006
7007                 if (i == QMAN_STREAMS) {
7008                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7009                         num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7010                 } else {
7011                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7012                         num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7013                 }
7014
7015                 for (j = 0 ; j < num_error_causes ; j++)
7016                         if (glbl_sts_val & BIT(j))
7017                                 dev_err_ratelimited(hdev->dev, "%s %s. err cause: %s\n",
7018                                                 qm_name, reg_desc,
7019                                                 i == QMAN_STREAMS ?
7020                                                 gaudi2_qman_lower_cp_error_cause[j] :
7021                                                 gaudi2_qman_error_cause[j]);
7022
7023                 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7024         }
7025
7026         arb_err_val = RREG32(arb_err_addr);
7027
7028         if (!arb_err_val)
7029                 return;
7030
7031         for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7032                 if (arb_err_val & BIT(j))
7033                         dev_err_ratelimited(hdev->dev, "%s ARB_ERR. err cause: %s\n",
7034                                                 qm_name, gaudi2_qman_arb_error_cause[j]);
7035         }
7036 }
7037
7038 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7039                         u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7040                         bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7041                         enum gaudi2_engine_id id, u64 *event_mask)
7042 {
7043         u32 razwi_hi, razwi_lo, razwi_xy;
7044         u16 eng_id = id;
7045         u8 rd_wr_flag;
7046
7047         if (is_write) {
7048                 if (read_razwi_regs) {
7049                         razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7050                         razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7051                         razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7052                 } else {
7053                         razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg);
7054                         razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
7055                         razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
7056                 }
7057                 rd_wr_flag = HL_RAZWI_WRITE;
7058         } else {
7059                 if (read_razwi_regs) {
7060                         razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7061                         razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7062                         razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7063                 } else {
7064                         razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg);
7065                         razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
7066                         razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
7067                 }
7068                 rd_wr_flag = HL_RAZWI_READ;
7069         }
7070
7071         hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7072                                 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7073
7074         dev_err_ratelimited(hdev->dev,
7075                 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7076                 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7077 }
7078
7079 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7080                         u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7081                         bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7082                         enum gaudi2_engine_id id, u64 *event_mask)
7083 {
7084         u32 razwi_addr, razwi_xy;
7085         u16 eng_id = id;
7086         u8 rd_wr_flag;
7087
7088         if (is_write) {
7089                 if (read_razwi_regs) {
7090                         razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7091                         razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7092                 } else {
7093                         razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg);
7094                         razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
7095                 }
7096
7097                 rd_wr_flag = HL_RAZWI_WRITE;
7098         } else {
7099                 if (read_razwi_regs) {
7100                         razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7101                         razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7102                 } else {
7103                         razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg);
7104                         razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
7105                 }
7106
7107                 rd_wr_flag = HL_RAZWI_READ;
7108         }
7109
7110         hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7111         dev_err_ratelimited(hdev->dev,
7112                                 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7113                                 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7114                                                 razwi_xy);
7115 }
7116
7117 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7118                                                 enum razwi_event_sources module, u8 module_idx)
7119 {
7120         switch (module) {
7121         case RAZWI_TPC:
7122                 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7123                         return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7124                 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7125                                 (module_idx % NUM_OF_TPC_PER_DCORE) +
7126                                 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7127
7128         case RAZWI_MME:
7129                 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7130                         (module_idx * ENGINE_ID_DCORE_OFFSET));
7131
7132         case RAZWI_EDMA:
7133                 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7134                         (module_idx % NUM_OF_EDMA_PER_DCORE));
7135
7136         case RAZWI_PDMA:
7137                 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7138
7139         case RAZWI_NIC:
7140                 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7141
7142         case RAZWI_DEC:
7143                 if (module_idx == 8)
7144                         return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7145
7146                 if (module_idx == 9)
7147                         return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7148                                         ;
7149                 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7150                                 (module_idx % NUM_OF_DEC_PER_DCORE) +
7151                                 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7152
7153         case RAZWI_ROT:
7154                 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7155
7156         default:
7157                 return GAUDI2_ENGINE_ID_SIZE;
7158         }
7159 }
7160
7161 /*
7162  * This function handles RR(Range register) hit events.
7163  * raised be initiators not PSOC RAZWI.
7164  */
7165 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7166                                 enum razwi_event_sources module, u8 module_idx,
7167                                 u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info,
7168                                 u64 *event_mask)
7169 {
7170         bool via_sft = false, read_razwi_regs = false;
7171         u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
7172         u64 rtr_mstr_if_base_addr;
7173         u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7174         u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7175         char initiator_name[64];
7176
7177         if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info)
7178                 read_razwi_regs = true;
7179
7180         switch (module) {
7181         case RAZWI_TPC:
7182                 rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7183                 sprintf(initiator_name, "TPC_%u", module_idx);
7184                 break;
7185         case RAZWI_MME:
7186                 sprintf(initiator_name, "MME_%u", module_idx);
7187                 switch (module_sub_idx) {
7188                 case MME_WAP0:
7189                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7190                         break;
7191                 case MME_WAP1:
7192                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7193                         break;
7194                 case MME_WRITE:
7195                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7196                         break;
7197                 case MME_READ:
7198                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7199                         break;
7200                 case MME_SBTE0:
7201                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7202                         break;
7203                 case MME_SBTE1:
7204                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7205                         break;
7206                 case MME_SBTE2:
7207                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7208                         break;
7209                 case MME_SBTE3:
7210                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7211                         break;
7212                 case MME_SBTE4:
7213                         rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7214                         break;
7215                 default:
7216                         return;
7217                 }
7218                 break;
7219         case RAZWI_EDMA:
7220                 sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7221                 dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7222                 via_sft = true;
7223                 sprintf(initiator_name, "EDMA_%u", module_idx);
7224                 break;
7225         case RAZWI_PDMA:
7226                 rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7227                 sprintf(initiator_name, "PDMA_%u", module_idx);
7228                 break;
7229         case RAZWI_NIC:
7230                 rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7231                 sprintf(initiator_name, "NIC_%u", module_idx);
7232                 break;
7233         case RAZWI_DEC:
7234                 rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7235                 sprintf(initiator_name, "DEC_%u", module_idx);
7236                 break;
7237         case RAZWI_ROT:
7238                 rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7239                 sprintf(initiator_name, "ROT_%u", module_idx);
7240                 break;
7241         default:
7242                 return;
7243         }
7244
7245         if (!read_razwi_regs) {
7246                 if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) {
7247                         hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7248                                                                 RAZWI_HAPPENED_AW;
7249                         hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7250                                                                 RAZWI_HAPPENED_AR;
7251                 } else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) {
7252                         lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7253                                                                 RAZWI_HAPPENED_AW;
7254                         lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7255                                                                 RAZWI_HAPPENED_AR;
7256                 }
7257                 rtr_mstr_if_base_addr = 0;
7258
7259                 goto dump_info;
7260         }
7261
7262         /* Find router mstr_if register base */
7263         if (via_sft) {
7264                 rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7265                                 dcore_id * SFT_DCORE_OFFSET +
7266                                 sft_id * SFT_IF_OFFSET +
7267                                 RTR_MSTR_IF_OFFSET;
7268         } else {
7269                 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7270                 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7271                 rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7272                                 dcore_id * DCORE_OFFSET +
7273                                 dcore_rtr_id * DCORE_RTR_OFFSET +
7274                                 RTR_MSTR_IF_OFFSET;
7275         }
7276
7277         /* Find out event cause by reading "RAZWI_HAPPENED" registers */
7278         hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7279
7280         hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7281
7282         if (via_sft) {
7283                 /* SFT has separate MSTR_IF for LBW, only there we can
7284                  * read the LBW razwi related registers
7285                  */
7286                 u64 base;
7287
7288                 base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7289                                 RTR_LBW_MSTR_IF_OFFSET;
7290
7291                 lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7292
7293                 lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7294         } else {
7295                 lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7296
7297                 lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7298         }
7299
7300 dump_info:
7301         /* check if there is no RR razwi indication at all */
7302         if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
7303                 return;
7304
7305         eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
7306         if (hbw_shrd_aw) {
7307                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7308                                                 initiator_name, read_razwi_regs, razwi_info,
7309                                                 eng_id, event_mask);
7310
7311                 /* Clear event indication */
7312                 if (read_razwi_regs)
7313                         WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7314         }
7315
7316         if (hbw_shrd_ar) {
7317                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7318                                                 initiator_name, read_razwi_regs, razwi_info,
7319                                                 eng_id, event_mask);
7320
7321                 /* Clear event indication */
7322                 if (read_razwi_regs)
7323                         WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7324         }
7325
7326         if (lbw_shrd_aw) {
7327                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7328                                                 initiator_name, read_razwi_regs, razwi_info,
7329                                                 eng_id, event_mask);
7330
7331                 /* Clear event indication */
7332                 if (read_razwi_regs)
7333                         WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7334         }
7335
7336         if (lbw_shrd_ar) {
7337                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7338                                                 initiator_name, read_razwi_regs, razwi_info,
7339                                                 eng_id, event_mask);
7340
7341                 /* Clear event indication */
7342                 if (read_razwi_regs)
7343                         WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7344         }
7345 }
7346
7347 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7348 {
7349         struct asic_fixed_properties *prop = &hdev->asic_prop;
7350         u8 mod_idx, sub_mod;
7351
7352         /* check all TPCs */
7353         for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7354                 if (prop->tpc_enabled_mask & BIT(mod_idx))
7355                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL,
7356                                                                 NULL);
7357         }
7358
7359         /* check all MMEs */
7360         for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7361                 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7362                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7363                                                                         sub_mod, NULL, NULL);
7364
7365         /* check all EDMAs */
7366         for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7367                 if (prop->edma_enabled_mask & BIT(mod_idx))
7368                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL,
7369                                                                 NULL);
7370
7371         /* check all PDMAs */
7372         for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7373                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL,
7374                                                         NULL);
7375
7376         /* check all NICs */
7377         for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7378                 if (hdev->nic_ports_mask & BIT(mod_idx))
7379                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7380                                                                 NULL, NULL);
7381
7382         /* check all DECs */
7383         for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7384                 if (prop->decoder_enabled_mask & BIT(mod_idx))
7385                         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL,
7386                                                                 NULL);
7387
7388         /* check all ROTs */
7389         for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7390                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL);
7391 }
7392
7393 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7394 {
7395         switch (rtr_id) {
7396         case DCORE0_RTR0:
7397                 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7398         case DCORE0_RTR1:
7399                 return "TPC0/1";
7400         case DCORE0_RTR2:
7401                 return "TPC2/3";
7402         case DCORE0_RTR3:
7403                 return "TPC4/5";
7404         case DCORE0_RTR4:
7405                 return "MME0_SBTE0/1";
7406         case DCORE0_RTR5:
7407                 return "MME0_WAP0/SBTE2";
7408         case DCORE0_RTR6:
7409                 return "MME0_CTRL_WR/SBTE3";
7410         case DCORE0_RTR7:
7411                 return "MME0_WAP1/CTRL_RD/SBTE4";
7412         case DCORE1_RTR0:
7413                 return "MME1_WAP1/CTRL_RD/SBTE4";
7414         case DCORE1_RTR1:
7415                 return "MME1_CTRL_WR/SBTE3";
7416         case DCORE1_RTR2:
7417                 return "MME1_WAP0/SBTE2";
7418         case DCORE1_RTR3:
7419                 return "MME1_SBTE0/1";
7420         case DCORE1_RTR4:
7421                 return "TPC10/11";
7422         case DCORE1_RTR5:
7423                 return "TPC8/9";
7424         case DCORE1_RTR6:
7425                 return "TPC6/7";
7426         case DCORE1_RTR7:
7427                 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7428         case DCORE2_RTR0:
7429                 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7430         case DCORE2_RTR1:
7431                 return "TPC16/17";
7432         case DCORE2_RTR2:
7433                 return "TPC14/15";
7434         case DCORE2_RTR3:
7435                 return "TPC12/13";
7436         case DCORE2_RTR4:
7437                 return "MME2_SBTE0/1";
7438         case DCORE2_RTR5:
7439                 return "MME2_WAP0/SBTE2";
7440         case DCORE2_RTR6:
7441                 return "MME2_CTRL_WR/SBTE3";
7442         case DCORE2_RTR7:
7443                 return "MME2_WAP1/CTRL_RD/SBTE4";
7444         case DCORE3_RTR0:
7445                 return "MME3_WAP1/CTRL_RD/SBTE4";
7446         case DCORE3_RTR1:
7447                 return "MME3_CTRL_WR/SBTE3";
7448         case DCORE3_RTR2:
7449                 return "MME3_WAP0/SBTE2";
7450         case DCORE3_RTR3:
7451                 return "MME3_SBTE0/1";
7452         case DCORE3_RTR4:
7453                 return "TPC18/19";
7454         case DCORE3_RTR5:
7455                 return "TPC20/21";
7456         case DCORE3_RTR6:
7457                 return "TPC22/23";
7458         case DCORE3_RTR7:
7459                 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7460         default:
7461         return "N/A";
7462         }
7463 }
7464
7465 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
7466 {
7467         switch (rtr_id) {
7468         case DCORE0_RTR0:
7469                 engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
7470                 engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
7471                 engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
7472                 engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
7473                 engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7474                 engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
7475                 engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
7476                 engines[7] = GAUDI2_ENGINE_ID_PCIE;
7477                 engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
7478                 engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
7479                 engines[10] = GAUDI2_ENGINE_ID_PSOC;
7480                 return 11;
7481
7482         case DCORE0_RTR1:
7483                 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
7484                 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
7485                 return 2;
7486
7487         case DCORE0_RTR2:
7488                 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
7489                 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
7490                 return 2;
7491
7492         case DCORE0_RTR3:
7493                 engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
7494                 engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
7495                 return 2;
7496
7497         case DCORE0_RTR4:
7498         case DCORE0_RTR5:
7499         case DCORE0_RTR6:
7500         case DCORE0_RTR7:
7501                 engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
7502                 return 1;
7503
7504         case DCORE1_RTR0:
7505         case DCORE1_RTR1:
7506         case DCORE1_RTR2:
7507         case DCORE1_RTR3:
7508                 engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
7509                 return 1;
7510
7511         case DCORE1_RTR4:
7512                 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
7513                 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
7514                 return 2;
7515
7516         case DCORE1_RTR5:
7517                 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
7518                 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
7519                 return 2;
7520
7521         case DCORE1_RTR6:
7522                 engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
7523                 engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
7524                 return 2;
7525
7526         case DCORE1_RTR7:
7527                 engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
7528                 engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
7529                 engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
7530                 engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
7531                 engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
7532                 engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
7533                 engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
7534                 engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
7535                 engines[8] = GAUDI2_ENGINE_ID_KDMA;
7536                 engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
7537                 engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
7538                 return 11;
7539
7540         case DCORE2_RTR0:
7541                 engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
7542                 engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
7543                 engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
7544                 engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
7545                 engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
7546                 engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
7547                 engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
7548                 engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
7549                 engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7550                 return 9;
7551
7552         case DCORE2_RTR1:
7553                 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
7554                 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
7555                 return 2;
7556
7557         case DCORE2_RTR2:
7558                 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
7559                 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
7560                 return 2;
7561
7562         case DCORE2_RTR3:
7563                 engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
7564                 engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
7565                 return 2;
7566
7567         case DCORE2_RTR4:
7568         case DCORE2_RTR5:
7569         case DCORE2_RTR6:
7570         case DCORE2_RTR7:
7571                 engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
7572                 return 1;
7573         case DCORE3_RTR0:
7574         case DCORE3_RTR1:
7575         case DCORE3_RTR2:
7576         case DCORE3_RTR3:
7577                 engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
7578                 return 1;
7579         case DCORE3_RTR4:
7580                 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
7581                 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
7582                 return 2;
7583         case DCORE3_RTR5:
7584                 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
7585                 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
7586                 return 2;
7587         case DCORE3_RTR6:
7588                 engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
7589                 engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
7590                 return 2;
7591         case DCORE3_RTR7:
7592                 engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
7593                 engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
7594                 engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
7595                 engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
7596                 engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
7597                 engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
7598                 engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
7599                 engines[7] = GAUDI2_ENGINE_ID_ROT_1;
7600                 engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7601                 return 9;
7602         default:
7603                 return 0;
7604         }
7605 }
7606
7607 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7608                                                         u64 rtr_ctrl_base_addr, bool is_write,
7609                                                         u64 *event_mask)
7610 {
7611         u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7612         u32 razwi_hi, razwi_lo;
7613         u8 rd_wr_flag;
7614
7615         num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7616
7617         if (is_write) {
7618                 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7619                 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7620                 rd_wr_flag = HL_RAZWI_WRITE;
7621
7622                 /* Clear set indication */
7623                 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7624         } else {
7625                 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7626                 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7627                 rd_wr_flag = HL_RAZWI_READ;
7628
7629                 /* Clear set indication */
7630                 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7631         }
7632
7633         hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
7634                                 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7635         dev_err_ratelimited(hdev->dev,
7636                 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7637                 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7638
7639         dev_err_ratelimited(hdev->dev,
7640                 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7641 }
7642
7643 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7644                                                         u64 rtr_ctrl_base_addr, bool is_write,
7645                                                         u64 *event_mask)
7646 {
7647         u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7648         u32 razwi_addr;
7649         u8 rd_wr_flag;
7650
7651         num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7652
7653         if (is_write) {
7654                 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7655                 rd_wr_flag = HL_RAZWI_WRITE;
7656
7657                 /* Clear set indication */
7658                 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7659         } else {
7660                 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7661                 rd_wr_flag = HL_RAZWI_READ;
7662
7663                 /* Clear set indication */
7664                 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7665         }
7666
7667         hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
7668                         event_mask);
7669         dev_err_ratelimited(hdev->dev,
7670                 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7671                 is_write ? "WR" : "RD", rtr_id, razwi_addr);
7672
7673         dev_err_ratelimited(hdev->dev,
7674                 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7675 }
7676
7677 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
7678 static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
7679 {
7680         u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7681                                                                 razwi_mask_info, razwi_intr = 0;
7682         int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7683         u64 rtr_ctrl_base_addr;
7684
7685         if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7686                 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7687                 if (!razwi_intr)
7688                         return;
7689         }
7690
7691         razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7692         xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7693
7694         dev_err_ratelimited(hdev->dev,
7695                 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7696                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7697                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7698                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7699                 xy,
7700                 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7701
7702         if (xy == 0) {
7703                 dev_err_ratelimited(hdev->dev,
7704                                 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7705                 goto clear;
7706         }
7707
7708         /* Find router id by router coordinates */
7709         for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7710                 if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7711                         break;
7712
7713         if (rtr_id == rtr_map_arr_len) {
7714                 dev_err_ratelimited(hdev->dev,
7715                                 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7716                 goto clear;
7717         }
7718
7719         /* Find router mstr_if register base */
7720         dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7721         dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7722         rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7723                                 dcore_rtr_id * DCORE_RTR_OFFSET;
7724
7725         hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7726         hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7727         lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7728         lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7729
7730         if (hbw_aw_set)
7731                 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7732                                                 rtr_ctrl_base_addr, true, event_mask);
7733
7734         if (hbw_ar_set)
7735                 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7736                                                 rtr_ctrl_base_addr, false, event_mask);
7737
7738         if (lbw_aw_set)
7739                 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7740                                                 rtr_ctrl_base_addr, true, event_mask);
7741
7742         if (lbw_ar_set)
7743                 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7744                                                 rtr_ctrl_base_addr, false, event_mask);
7745
7746 clear:
7747         /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7748         if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7749                 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7750 }
7751
7752 static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base)
7753 {
7754         u32 i, sts_val, sts_clr_val = 0;
7755
7756         sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7757
7758         for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7759                 if (sts_val & BIT(i)) {
7760                         dev_err_ratelimited(hdev->dev, "QM SEI. err cause: %s\n",
7761                                                 gaudi2_qm_sei_error_cause[i]);
7762                         sts_clr_val |= BIT(i);
7763                 }
7764         }
7765
7766         WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7767 }
7768
7769 static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7770                                         struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
7771 {
7772         enum razwi_event_sources module;
7773         u64 qman_base;
7774         u8 index;
7775
7776         switch (event_type) {
7777         case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7778                 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7779                 qman_base = mmDCORE0_TPC0_QM_BASE +
7780                                 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7781                                 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7782                 module = RAZWI_TPC;
7783                 break;
7784         case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7785                 qman_base = mmDCORE0_TPC6_QM_BASE;
7786                 module = RAZWI_TPC;
7787                 break;
7788         case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7789         case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7790         case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7791         case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7792                 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7793                                 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7794                                                 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7795                 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7796                 module = RAZWI_MME;
7797                 break;
7798         case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7799         case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7800                 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7801                 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7802                 module = RAZWI_PDMA;
7803                 break;
7804         case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7805         case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7806                 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7807                 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7808                 module = RAZWI_ROT;
7809                 break;
7810         default:
7811                 return;
7812         }
7813
7814         _gaudi2_handle_qm_sei_err(hdev, qman_base);
7815
7816         /* There is a single event per NIC macro, so should check its both QMAN blocks */
7817         if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7818                         event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7819                 _gaudi2_handle_qm_sei_err(hdev, qman_base + NIC_QM_OFFSET);
7820
7821         /* check if RAZWI happened */
7822         if (razwi_info)
7823                 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask);
7824 }
7825
7826 static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7827 {
7828         u32 qid_base;
7829         u64 qman_base;
7830         char desc[32];
7831         u8 index;
7832
7833         switch (event_type) {
7834         case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7835                 index = event_type - GAUDI2_EVENT_TPC0_QM;
7836                 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7837                 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7838                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC%d_QM", index);
7839                 break;
7840         case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7841                 index = event_type - GAUDI2_EVENT_TPC6_QM;
7842                 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7843                 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7844                 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_TPC%d_QM", index);
7845                 break;
7846         case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7847                 index = event_type - GAUDI2_EVENT_TPC12_QM;
7848                 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7849                 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7850                 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_TPC%d_QM", index);
7851                 break;
7852         case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7853                 index = event_type - GAUDI2_EVENT_TPC18_QM;
7854                 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7855                 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7856                 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_TPC%d_QM", index);
7857                 break;
7858         case GAUDI2_EVENT_TPC24_QM:
7859                 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7860                 qman_base = mmDCORE0_TPC6_QM_BASE;
7861                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC6_QM");
7862                 break;
7863         case GAUDI2_EVENT_MME0_QM:
7864                 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7865                 qman_base = mmDCORE0_MME_QM_BASE;
7866                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_MME_QM");
7867                 break;
7868         case GAUDI2_EVENT_MME1_QM:
7869                 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7870                 qman_base = mmDCORE1_MME_QM_BASE;
7871                 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_MME_QM");
7872                 break;
7873         case GAUDI2_EVENT_MME2_QM:
7874                 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7875                 qman_base = mmDCORE2_MME_QM_BASE;
7876                 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_MME_QM");
7877                 break;
7878         case GAUDI2_EVENT_MME3_QM:
7879                 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7880                 qman_base = mmDCORE3_MME_QM_BASE;
7881                 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_MME_QM");
7882                 break;
7883         case GAUDI2_EVENT_HDMA0_QM:
7884                 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7885                 qman_base = mmDCORE0_EDMA0_QM_BASE;
7886                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA0_QM");
7887                 break;
7888         case GAUDI2_EVENT_HDMA1_QM:
7889                 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7890                 qman_base = mmDCORE0_EDMA1_QM_BASE;
7891                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA1_QM");
7892                 break;
7893         case GAUDI2_EVENT_HDMA2_QM:
7894                 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7895                 qman_base = mmDCORE1_EDMA0_QM_BASE;
7896                 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA0_QM");
7897                 break;
7898         case GAUDI2_EVENT_HDMA3_QM:
7899                 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7900                 qman_base = mmDCORE1_EDMA1_QM_BASE;
7901                 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA1_QM");
7902                 break;
7903         case GAUDI2_EVENT_HDMA4_QM:
7904                 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7905                 qman_base = mmDCORE2_EDMA0_QM_BASE;
7906                 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA0_QM");
7907                 break;
7908         case GAUDI2_EVENT_HDMA5_QM:
7909                 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7910                 qman_base = mmDCORE2_EDMA1_QM_BASE;
7911                 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA1_QM");
7912                 break;
7913         case GAUDI2_EVENT_HDMA6_QM:
7914                 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7915                 qman_base = mmDCORE3_EDMA0_QM_BASE;
7916                 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA0_QM");
7917                 break;
7918         case GAUDI2_EVENT_HDMA7_QM:
7919                 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7920                 qman_base = mmDCORE3_EDMA1_QM_BASE;
7921                 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA1_QM");
7922                 break;
7923         case GAUDI2_EVENT_PDMA0_QM:
7924                 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7925                 qman_base = mmPDMA0_QM_BASE;
7926                 snprintf(desc, ARRAY_SIZE(desc), "PDMA0_QM");
7927                 break;
7928         case GAUDI2_EVENT_PDMA1_QM:
7929                 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7930                 qman_base = mmPDMA1_QM_BASE;
7931                 snprintf(desc, ARRAY_SIZE(desc), "PDMA1_QM");
7932                 break;
7933         case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7934                 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7935                 qman_base = mmROT0_QM_BASE;
7936                 snprintf(desc, ARRAY_SIZE(desc), "ROTATOR0_QM");
7937                 break;
7938         case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7939                 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7940                 qman_base = mmROT1_QM_BASE;
7941                 snprintf(desc, ARRAY_SIZE(desc), "ROTATOR1_QM");
7942                 break;
7943         default:
7944                 return;
7945         }
7946
7947         gaudi2_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7948
7949         /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7950         if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7951                 _gaudi2_handle_qm_sei_err(hdev, qman_base);
7952 }
7953
7954 static void gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev)
7955 {
7956         u32 i, sts_val, sts_clr_val = 0;
7957
7958         sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7959
7960         for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7961                 if (sts_val & BIT(i)) {
7962                         dev_err_ratelimited(hdev->dev, "ARC SEI. err cause: %s\n",
7963                                                 gaudi2_arc_sei_error_cause[i]);
7964                         sts_clr_val |= BIT(i);
7965                 }
7966         }
7967
7968         WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
7969 }
7970
7971 static void gaudi2_handle_cpu_sei_err(struct hl_device *hdev)
7972 {
7973         u32 i, sts_val, sts_clr_val = 0;
7974
7975         sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
7976
7977         for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
7978                 if (sts_val & BIT(i)) {
7979                         dev_err_ratelimited(hdev->dev, "CPU SEI. err cause: %s\n",
7980                                                 gaudi2_cpu_sei_error_cause[i]);
7981                         sts_clr_val |= BIT(i);
7982                 }
7983         }
7984
7985         WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
7986 }
7987
7988 static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index,
7989                                         struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
7990                                         u64 *event_mask)
7991 {
7992         u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7993         int i;
7994
7995         for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
7996                 if (intr_cause_data & BIT(i))
7997                         dev_err_ratelimited(hdev->dev, "ROT%u. err cause: %s\n",
7998                                                 rot_index, guadi2_rot_error_cause[i]);
7999
8000         /* check if RAZWI happened */
8001         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
8002                                                 &razwi_with_intr_cause->razwi_info, event_mask);
8003 }
8004
8005 static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char *interrupt_name,
8006                                         struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8007                                         u64 *event_mask)
8008 {
8009         u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8010         int i;
8011
8012         for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8013                 if (intr_cause_data & BIT(i))
8014                         dev_err_ratelimited(hdev->dev, "TPC%d_%s interrupt cause: %s\n",
8015                                         tpc_index, interrupt_name, gaudi2_tpc_interrupts_cause[i]);
8016
8017         /* check if RAZWI happened */
8018         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
8019                                                 &razwi_with_intr_cause->razwi_info, event_mask);
8020 }
8021
8022 static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const char *interrupt_name,
8023                                 struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8024 {
8025         u32 sts_addr, sts_val, sts_clr_val = 0;
8026         int i;
8027
8028         if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8029                 /* DCORE DEC */
8030                 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8031                                 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8032                                 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8033         else
8034                 /* PCIE DEC */
8035                 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8036                                 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8037
8038         sts_val = RREG32(sts_addr);
8039
8040         for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8041                 if (sts_val & BIT(i)) {
8042                         dev_err_ratelimited(hdev->dev, "DEC%u_%s err cause: %s\n",
8043                                         dec_index, interrupt_name, gaudi2_dec_error_cause[i]);
8044                         sts_clr_val |= BIT(i);
8045                 }
8046         }
8047
8048         /* check if RAZWI happened */
8049         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info,
8050                                                 event_mask);
8051
8052         /* Write 1 clear errors */
8053         WREG32(sts_addr, sts_clr_val);
8054 }
8055
8056 static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const char *interrupt_name,
8057                                 struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8058 {
8059         u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
8060         int i;
8061
8062         sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8063         sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8064
8065         sts_val = RREG32(sts_addr);
8066
8067         for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8068                 if (sts_val & BIT(i)) {
8069                         dev_err_ratelimited(hdev->dev, "MME%u_%s err cause: %s\n",
8070                                         mme_index, interrupt_name, guadi2_mme_error_cause[i]);
8071                         sts_clr_val |= BIT(i);
8072                 }
8073         }
8074
8075         /* check if RAZWI happened */
8076         for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8077                 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info,
8078                                                         event_mask);
8079
8080         WREG32(sts_clr_addr, sts_clr_val);
8081 }
8082
8083 static void gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u8 mme_index, u8 sbte_index,
8084                                         u64 intr_cause_data)
8085 {
8086         int i;
8087
8088         for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8089                 if (intr_cause_data & BIT(i))
8090                         dev_err_ratelimited(hdev->dev, "MME%uSBTE%u_AXI_ERR_RSP err cause: %s\n",
8091                                         mme_index, sbte_index, guadi2_mme_sbte_error_cause[i]);
8092 }
8093
8094 static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index,
8095                                         struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8096 {
8097         u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
8098         int i;
8099
8100         sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8101         sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8102
8103         sts_val = RREG32(sts_addr);
8104
8105         for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8106                 if (sts_val & BIT(i)) {
8107                         dev_err_ratelimited(hdev->dev,
8108                                         "MME%u_WAP_SOURCE_RESULT_INVALID err cause: %s\n",
8109                                         mme_index, guadi2_mme_wap_error_cause[i]);
8110                         sts_clr_val |= BIT(i);
8111                 }
8112         }
8113
8114         /* check if RAZWI happened on WAP0/1 */
8115         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info,
8116                                                 event_mask);
8117         gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info,
8118                                                 event_mask);
8119
8120         WREG32(sts_clr_addr, sts_clr_val);
8121 }
8122
8123 static void gaudi2_handle_kdma_core_event(struct hl_device *hdev, u64 intr_cause_data)
8124 {
8125         int i;
8126
8127         /* If an AXI read or write error is received, an error is reported and
8128          * interrupt message is sent. Due to an HW errata, when reading the cause
8129          * register of the KDMA engine, the reported error is always HBW even if
8130          * the actual error caused by a LBW KDMA transaction.
8131          */
8132         for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8133                 if (intr_cause_data & BIT(i))
8134                         dev_err_ratelimited(hdev->dev, "kdma core err cause: %s\n",
8135                                                 gaudi2_kdma_core_interrupts_cause[i]);
8136 }
8137
8138 static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_data)
8139 {
8140         int i;
8141
8142         for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8143                 if (intr_cause_data & BIT(i))
8144                         dev_err_ratelimited(hdev->dev, "dma core err cause: %s\n",
8145                                                 gaudi2_dma_core_interrupts_cause[i]);
8146 }
8147
8148 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8149 {
8150         u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8151
8152         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8153         if (RREG32(razwi_happened_addr)) {
8154                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8155                                                         NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8156                 WREG32(razwi_happened_addr, 0x1);
8157         }
8158
8159         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8160         if (RREG32(razwi_happened_addr)) {
8161                 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8162                                                         NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8163                 WREG32(razwi_happened_addr, 0x1);
8164         }
8165
8166         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8167         if (RREG32(razwi_happened_addr)) {
8168                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8169                                                         NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8170                 WREG32(razwi_happened_addr, 0x1);
8171         }
8172
8173         razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8174         if (RREG32(razwi_happened_addr)) {
8175                 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8176                                                         NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8177                 WREG32(razwi_happened_addr, 0x1);
8178         }
8179 }
8180
8181 static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data,
8182                                                 u64 *event_mask)
8183 {
8184         int i;
8185
8186         for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8187                 if (!(intr_cause_data & BIT_ULL(i)))
8188                         continue;
8189
8190                 dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
8191                                         gaudi2_pcie_addr_dec_error_cause[i]);
8192
8193                 switch (intr_cause_data & BIT_ULL(i)) {
8194                 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8195                         break;
8196                 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8197                         gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8198                         break;
8199                 }
8200         }
8201 }
8202
8203 static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data)
8204
8205 {
8206         int i;
8207
8208         for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8209                 if (intr_cause_data & BIT_ULL(i))
8210                         dev_err_ratelimited(hdev->dev, "PMMU PIF err cause: %s\n",
8211                                         gaudi2_pmmu_fatal_interrupts_cause[i]);
8212         }
8213 }
8214
8215 static void gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8216 {
8217         u32 dcore_id, hif_id;
8218         int i;
8219
8220         dcore_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) / 4;
8221         hif_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) % 4;
8222
8223         for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8224                 if (intr_cause_data & BIT_ULL(i))
8225                         dev_err_ratelimited(hdev->dev, "DCORE%u_HIF%u: %s\n", dcore_id, hif_id,
8226                                         gaudi2_hif_fatal_interrupts_cause[i]);
8227         }
8228 }
8229
8230 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8231                                         u64 *event_mask)
8232 {
8233         u32 valid, val;
8234         u64 addr;
8235
8236         valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8237
8238         if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8239                 return;
8240
8241         val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8242         addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8243         addr <<= 32;
8244         addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8245
8246         dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8247                                 is_pmmu ? "PMMU" : "HMMU", addr);
8248         hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8249
8250         WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8251 }
8252
8253 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8254 {
8255         u32 valid, val;
8256         u64 addr;
8257
8258         valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8259
8260         if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8261                 return;
8262
8263         val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8264         addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8265         addr <<= 32;
8266         addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8267
8268         dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8269                                 is_pmmu ? "PMMU" : "HMMU", addr);
8270         WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8271 }
8272
8273 static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char *mmu_name,
8274                                                 u64 mmu_base, bool is_pmmu, u64 *event_mask)
8275 {
8276         u32 spi_sei_cause, interrupt_clr = 0x0;
8277         int i;
8278
8279         spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8280
8281         for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8282                 if (spi_sei_cause & BIT(i)) {
8283                         dev_err_ratelimited(hdev->dev, "%s SPI_SEI ERR. err cause: %s\n",
8284                                                 mmu_name, gaudi2_mmu_spi_sei[i].cause);
8285
8286                         if (i == 0)
8287                                 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8288                         else if (i == 1)
8289                                 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8290
8291                         if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8292                                 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8293                 }
8294         }
8295
8296         /* Clear cause */
8297         WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8298
8299         /* Clear interrupt */
8300         WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8301 }
8302
8303 static void gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
8304 {
8305         u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log;
8306         u32 cq_intr_addr, cq_intr_val, cq_intr_queue_index;
8307         int i;
8308
8309         sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8310         cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8311
8312         sei_cause_val = RREG32(sei_cause_addr);
8313         sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8314         cq_intr_val = RREG32(cq_intr_addr);
8315
8316         /* SEI interrupt */
8317         if (sei_cause_cause) {
8318                 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8319                 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8320                                         sei_cause_val);
8321
8322                 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8323                         if (!(sei_cause_cause & BIT(i)))
8324                                 continue;
8325
8326                         dev_err_ratelimited(hdev->dev, "SM%u SEI ERR. err cause: %s. %s: 0x%X\n",
8327                                         sm_index,
8328                                         gaudi2_sm_sei_cause[i].cause_name,
8329                                         gaudi2_sm_sei_cause[i].log_name,
8330                                         sei_cause_log & gaudi2_sm_sei_cause[i].log_mask);
8331                         break;
8332                 }
8333
8334                 /* Clear SM_SEI_CAUSE */
8335                 WREG32(sei_cause_addr, 0);
8336         }
8337
8338         /* CQ interrupt */
8339         if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8340                 cq_intr_queue_index =
8341                                 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8342                                         cq_intr_val);
8343
8344                 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8345                                 sm_index, cq_intr_queue_index);
8346
8347                 /* Clear CQ_INTR */
8348                 WREG32(cq_intr_addr, 0);
8349         }
8350 }
8351
8352 static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8353 {
8354         bool is_pmmu = false;
8355         char desc[32];
8356         u64 mmu_base;
8357         u8 index;
8358
8359         switch (event_type) {
8360         case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8361                 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8362                 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8363                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8364                 break;
8365         case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8366                 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8367                 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8368                 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8369                 break;
8370         case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8371                 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8372                 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8373                 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8374                 break;
8375         case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8376                 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8377                 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8378                 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8379                 break;
8380         case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8381                 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8382                 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8383                 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8384                 break;
8385         case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8386                 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8387                 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8388                 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8389                 break;
8390         case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8391                 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8392                 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8393                 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8394                 break;
8395         case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8396                 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8397                 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8398                 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8399                 break;
8400         case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8401         case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8402                 is_pmmu = true;
8403                 mmu_base = mmPMMU_HBW_MMU_BASE;
8404                 snprintf(desc, ARRAY_SIZE(desc), "PMMU");
8405                 break;
8406         default:
8407                 return;
8408         }
8409
8410         gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu, event_mask);
8411 }
8412
8413
8414 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
8415 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8416                         struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8417 {
8418         u32 addr, beat, beat_shift;
8419         bool rc = false;
8420
8421         dev_err_ratelimited(hdev->dev,
8422                         "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8423                         FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8424                         FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8425                         FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8426
8427         addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8428         dev_err_ratelimited(hdev->dev,
8429                         "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8430                         FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8431                         FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8432                         FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8433                         FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8434                         FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8435
8436         /* For each beat (RDQS edge), look for possible errors and print relevant info */
8437         for (beat = 0 ; beat < 4 ; beat++) {
8438                 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8439                         (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8440                         dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8441                                                 beat,
8442                                                 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8443                                                 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8444
8445                 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8446                         (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8447                         dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8448                                                 beat,
8449                                                 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8450                                                 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8451                         rc |= true;
8452                 }
8453
8454                 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8455                 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8456                         (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8457                         dev_err_ratelimited(hdev->dev,
8458                                         "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8459                                         beat,
8460                                         le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8461                                         (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8462                                                 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8463                                                 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8464                         rc |= true;
8465                 }
8466
8467                 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8468                 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8469                                         le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8470                 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8471                                         le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8472         }
8473
8474         return rc;
8475 }
8476
8477 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8478                         struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8479 {
8480         struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8481         u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8482
8483         dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8484
8485         dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8486                                 derr & 0x3, derr & 0xc);
8487
8488         /* JIRA H6-3286 - the following prints may not be valid */
8489         dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8490         for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8491                 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8492                 dev_err_ratelimited(hdev->dev,
8493                                 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8494                                 i,
8495                                 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8496                                 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8497                                 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8498                                 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8499         }
8500 }
8501
8502 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8503                 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8504 {
8505         __le32 *col_cmd = ca_par_err_data->dbg_col;
8506         __le16 *row_cmd = ca_par_err_data->dbg_row;
8507         u32 i;
8508
8509         dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8510
8511         dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8512         for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8513                 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8514                         le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8515                         le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8516 }
8517
8518 /* Returns true if hard reset is needed or false otherwise */
8519 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8520                                         struct hl_eq_hbm_sei_data *sei_data)
8521 {
8522         bool require_hard_reset = false;
8523         u32 hbm_id, mc_id, cause_idx;
8524
8525         hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8526         mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8527
8528         cause_idx = sei_data->hdr.sei_cause;
8529         if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8530                 dev_err_ratelimited(hdev->dev, "Invalid HBM SEI event cause (%d) provided by FW\n",
8531                                         cause_idx);
8532                 return true;
8533         }
8534
8535         if (sei_data->hdr.is_critical)
8536                 dev_err(hdev->dev,
8537                         "System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8538                         hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8539                         hbm_mc_sei_cause[cause_idx]);
8540
8541         else
8542                 dev_err_ratelimited(hdev->dev,
8543                         "System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8544                         hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8545                         hbm_mc_sei_cause[cause_idx]);
8546
8547         /* Print error-specific info */
8548         switch (cause_idx) {
8549         case HBM_SEI_CATTRIP:
8550                 require_hard_reset = true;
8551                 break;
8552
8553         case  HBM_SEI_CMD_PARITY_EVEN:
8554                 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8555                                                 le32_to_cpu(sei_data->hdr.cnt));
8556                 require_hard_reset = true;
8557                 break;
8558
8559         case  HBM_SEI_CMD_PARITY_ODD:
8560                 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8561                                                 le32_to_cpu(sei_data->hdr.cnt));
8562                 require_hard_reset = true;
8563                 break;
8564
8565         case HBM_SEI_WRITE_DATA_PARITY_ERR:
8566                 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8567                                                 le32_to_cpu(sei_data->hdr.cnt));
8568                 require_hard_reset = true;
8569                 break;
8570
8571         case HBM_SEI_READ_ERR:
8572                 /* Unlike other SEI events, read error requires further processing of the
8573                  * raw data in order to determine the root cause.
8574                  */
8575                 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8576                                                                 &sei_data->read_err_info,
8577                                                                 le32_to_cpu(sei_data->hdr.cnt));
8578                 break;
8579
8580         default:
8581                 break;
8582         }
8583
8584         require_hard_reset |= !!sei_data->hdr.is_critical;
8585
8586         return require_hard_reset;
8587 }
8588
8589 static void gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u64 intr_cause_data)
8590 {
8591         dev_err(hdev->dev,
8592                 "HBM catastrophic temperature error (CATTRIP) cause %#llx\n",
8593                 intr_cause_data);
8594 }
8595
8596 static void gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8597 {
8598         u32 i;
8599
8600         for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8601                 if (intr_cause_data & hbm_mc_spi[i].mask)
8602                         dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8603                                 hbm_mc_spi[i].cause);
8604 }
8605
8606 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type)
8607 {
8608         ktime_t zero_time = ktime_set(0, 0);
8609
8610         mutex_lock(&hdev->clk_throttling.lock);
8611
8612         switch (event_type) {
8613         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8614                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8615                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8616                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8617                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8618                 dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8619                 break;
8620
8621         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8622                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8623                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8624                 dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8625                 break;
8626
8627         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8628                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8629                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8630                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8631                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8632                 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8633                 break;
8634
8635         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8636                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8637                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8638                 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8639                 break;
8640
8641         default:
8642                 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8643                 break;
8644         }
8645
8646         mutex_unlock(&hdev->clk_throttling.lock);
8647 }
8648
8649 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev,
8650                                         struct cpucp_pkt_sync_err *sync_err)
8651 {
8652         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8653
8654         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8655                 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8656 }
8657
8658 static void gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev)
8659 {
8660         u32 p2p_intr, msix_gw_intr;
8661
8662         p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8663         msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8664
8665         if (p2p_intr) {
8666                 dev_err_ratelimited(hdev->dev,
8667                         "pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8668                         RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8669
8670                 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8671         }
8672
8673         if (msix_gw_intr) {
8674                 dev_err_ratelimited(hdev->dev,
8675                         "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8676                         RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8677
8678                 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8679         }
8680 }
8681
8682 static void gaudi2_handle_pcie_drain(struct hl_device *hdev,
8683                         struct hl_eq_pcie_drain_ind_data *drain_data)
8684 {
8685         u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause;
8686
8687         cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8688         lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8689         lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8690         hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8691         hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8692
8693         if (cause & BIT_ULL(0))
8694                 dev_err_ratelimited(hdev->dev,
8695                         "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8696                         !!lbw_rd, !!lbw_wr);
8697
8698         if (cause & BIT_ULL(1))
8699                 dev_err_ratelimited(hdev->dev,
8700                         "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8701                         hbw_rd, hbw_wr);
8702 }
8703
8704 static void gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8705 {
8706         int i;
8707
8708         for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8709                 if (intr_cause_data & BIT_ULL(i))
8710                         dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8711                                 gaudi2_psoc_axi_drain_interrupts_cause[i]);
8712         }
8713 }
8714
8715 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev,
8716                                         struct cpucp_pkt_sync_err *sync_err)
8717 {
8718         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8719
8720         dev_warn(hdev->dev,
8721                 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8722                 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8723 }
8724
8725 static void hl_arc_event_handle(struct hl_device *hdev,
8726                                         struct hl_eq_engine_arc_intr_data *data)
8727 {
8728         struct hl_engine_arc_dccm_queue_full_irq *q;
8729         u32 intr_type, engine_id;
8730         u64 payload;
8731
8732         intr_type = le32_to_cpu(data->intr_type);
8733         engine_id = le32_to_cpu(data->engine_id);
8734         payload = le64_to_cpu(data->payload);
8735
8736         switch (intr_type) {
8737         case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8738                 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8739
8740                 dev_err_ratelimited(hdev->dev,
8741                                 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8742                                 engine_id, intr_type, q->queue_index);
8743                 break;
8744         default:
8745                 dev_err_ratelimited(hdev->dev, "Unknown ARC event type\n");
8746         }
8747 }
8748
8749 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8750 {
8751         struct gaudi2_device *gaudi2 = hdev->asic_specific;
8752         bool reset_required = false, is_critical = false;
8753         u32 ctl, reset_flags = HL_DRV_RESET_HARD;
8754         int index, sbte_index;
8755         u64 event_mask = 0;
8756         u16 event_type;
8757
8758         ctl = le32_to_cpu(eq_entry->hdr.ctl);
8759         event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8760
8761         if (event_type >= GAUDI2_EVENT_SIZE) {
8762                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8763                                 event_type, GAUDI2_EVENT_SIZE - 1);
8764                 return;
8765         }
8766
8767         gaudi2->events_stat[event_type]++;
8768         gaudi2->events_stat_aggregate[event_type]++;
8769
8770         gaudi2_print_irq_info(hdev, event_type);
8771
8772         switch (event_type) {
8773         case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8774                 fallthrough;
8775         case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8776                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8777                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8778                 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8779                 is_critical = eq_entry->ecc_data.is_critical;
8780                 break;
8781
8782         case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8783                 fallthrough;
8784         case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8785                 fallthrough;
8786         case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8787                 gaudi2_handle_qman_err(hdev, event_type);
8788                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8789                 break;
8790
8791         case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8792                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8793                 gaudi2_handle_arc_farm_sei_err(hdev);
8794                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8795                 break;
8796
8797         case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8798                 gaudi2_handle_cpu_sei_err(hdev);
8799                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8800                 break;
8801
8802         case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8803         case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8804                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8805                 gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info, &event_mask);
8806                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8807                 break;
8808
8809         case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8810         case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8811                 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8812                 gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause, &event_mask);
8813                 gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8814                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8815                 break;
8816
8817         case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8818                 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8819                 gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP",
8820                                                 &eq_entry->razwi_with_intr_cause, &event_mask);
8821                 gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8822                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8823                 break;
8824
8825         case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8826                 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8827                 gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info,
8828                                         &event_mask);
8829                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8830                 break;
8831
8832         case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8833         case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8834         case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8835         case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8836         case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8837         case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8838         case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8839         case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8840         case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8841         case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8842         case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8843         case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8844         case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8845         case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8846         case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8847         case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8848         case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8849         case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8850         case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8851         case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8852         case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8853         case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8854         case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8855         case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8856         case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8857                 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8858                         (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8859                 gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause,
8860                                                 &event_mask);
8861                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8862                 break;
8863
8864         case GAUDI2_EVENT_DEC0_SPI:
8865         case GAUDI2_EVENT_DEC1_SPI:
8866         case GAUDI2_EVENT_DEC2_SPI:
8867         case GAUDI2_EVENT_DEC3_SPI:
8868         case GAUDI2_EVENT_DEC4_SPI:
8869         case GAUDI2_EVENT_DEC5_SPI:
8870         case GAUDI2_EVENT_DEC6_SPI:
8871         case GAUDI2_EVENT_DEC7_SPI:
8872         case GAUDI2_EVENT_DEC8_SPI:
8873         case GAUDI2_EVENT_DEC9_SPI:
8874                 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8875                                 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8876                 gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info, &event_mask);
8877                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8878                 break;
8879
8880         case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8881         case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8882         case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8883         case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8884                 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8885                                 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8886                                                 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8887                 gaudi2_handle_mme_err(hdev, index,
8888                                 "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info, &event_mask);
8889                 gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8890                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8891                 break;
8892
8893         case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
8894         case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
8895         case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
8896         case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
8897                 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
8898                                 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
8899                                         GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
8900                 gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info,
8901                                         &event_mask);
8902                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8903                 break;
8904
8905         case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
8906         case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
8907         case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
8908         case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
8909                 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
8910                                 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
8911                                         GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
8912                 gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info, &event_mask);
8913                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8914                 break;
8915
8916         case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
8917         case GAUDI2_EVENT_KDMA0_CORE:
8918                 gaudi2_handle_kdma_core_event(hdev,
8919                                         le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8920                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8921                 break;
8922
8923         case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
8924                 gaudi2_handle_dma_core_event(hdev,
8925                                         le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8926                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8927                 break;
8928
8929         case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
8930                 gaudi2_print_pcie_addr_dec_info(hdev,
8931                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
8932                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8933                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8934                 break;
8935
8936         case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8937         case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8938         case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8939         case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8940                 gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
8941                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8942                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8943                 break;
8944
8945         case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
8946                 gaudi2_handle_hif_fatal(hdev, event_type,
8947                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8948                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8949                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8950                 break;
8951
8952         case GAUDI2_EVENT_PMMU_FATAL_0:
8953                 gaudi2_handle_pif_fatal(hdev,
8954                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8955                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8956                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8957                 break;
8958
8959         case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
8960                 gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
8961                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8962                 break;
8963
8964         case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
8965                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8966                 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
8967                         reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8968                         reset_required = true;
8969                 }
8970                 break;
8971
8972         case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
8973                 gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8974                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8975                 break;
8976
8977         case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
8978                 gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8979                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8980                 break;
8981
8982         case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
8983                 gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
8984                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8985                 break;
8986
8987         case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
8988                 gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8989                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8990                 break;
8991
8992         case GAUDI2_EVENT_CPU_AXI_ECC:
8993                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8994                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8995                 break;
8996         case GAUDI2_EVENT_CPU_L2_RAM_ECC:
8997                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8998                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8999                 break;
9000         case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9001         case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9002         case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9003         case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9004                 index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) /
9005                                 (GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
9006                                         GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
9007                 sbte_index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) %
9008                                 (GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
9009                                         GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
9010                 gaudi2_handle_mme_sbte_err(hdev, index, sbte_index,
9011                                                 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9012                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9013                 break;
9014         case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9015                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9016                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9017                 break;
9018         case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9019                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9020                 break;
9021         case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9022                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9023                 break;
9024         case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9025                 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9026                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9027                 break;
9028         case GAUDI2_EVENT_PCIE_FATAL_ERR:
9029                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9030                 break;
9031         case GAUDI2_EVENT_TPC0_BMON_SPMU:
9032         case GAUDI2_EVENT_TPC1_BMON_SPMU:
9033         case GAUDI2_EVENT_TPC2_BMON_SPMU:
9034         case GAUDI2_EVENT_TPC3_BMON_SPMU:
9035         case GAUDI2_EVENT_TPC4_BMON_SPMU:
9036         case GAUDI2_EVENT_TPC5_BMON_SPMU:
9037         case GAUDI2_EVENT_TPC6_BMON_SPMU:
9038         case GAUDI2_EVENT_TPC7_BMON_SPMU:
9039         case GAUDI2_EVENT_TPC8_BMON_SPMU:
9040         case GAUDI2_EVENT_TPC9_BMON_SPMU:
9041         case GAUDI2_EVENT_TPC10_BMON_SPMU:
9042         case GAUDI2_EVENT_TPC11_BMON_SPMU:
9043         case GAUDI2_EVENT_TPC12_BMON_SPMU:
9044         case GAUDI2_EVENT_TPC13_BMON_SPMU:
9045         case GAUDI2_EVENT_TPC14_BMON_SPMU:
9046         case GAUDI2_EVENT_TPC15_BMON_SPMU:
9047         case GAUDI2_EVENT_TPC16_BMON_SPMU:
9048         case GAUDI2_EVENT_TPC17_BMON_SPMU:
9049         case GAUDI2_EVENT_TPC18_BMON_SPMU:
9050         case GAUDI2_EVENT_TPC19_BMON_SPMU:
9051         case GAUDI2_EVENT_TPC20_BMON_SPMU:
9052         case GAUDI2_EVENT_TPC21_BMON_SPMU:
9053         case GAUDI2_EVENT_TPC22_BMON_SPMU:
9054         case GAUDI2_EVENT_TPC23_BMON_SPMU:
9055         case GAUDI2_EVENT_TPC24_BMON_SPMU:
9056         case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9057         case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9058         case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9059         case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9060         case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9061         case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9062         case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9063         case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9064         case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9065         case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9066         case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9067         case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9068         case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9069                 fallthrough;
9070         case GAUDI2_EVENT_DEC0_BMON_SPMU:
9071         case GAUDI2_EVENT_DEC1_BMON_SPMU:
9072         case GAUDI2_EVENT_DEC2_BMON_SPMU:
9073         case GAUDI2_EVENT_DEC3_BMON_SPMU:
9074         case GAUDI2_EVENT_DEC4_BMON_SPMU:
9075         case GAUDI2_EVENT_DEC5_BMON_SPMU:
9076         case GAUDI2_EVENT_DEC6_BMON_SPMU:
9077         case GAUDI2_EVENT_DEC7_BMON_SPMU:
9078         case GAUDI2_EVENT_DEC8_BMON_SPMU:
9079         case GAUDI2_EVENT_DEC9_BMON_SPMU:
9080         case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9081                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9082                 break;
9083
9084         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9085         case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9086         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9087         case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9088                 gaudi2_print_clk_change_info(hdev, event_type);
9089                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9090                 break;
9091
9092         case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9093                 gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
9094                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9095                 break;
9096
9097         case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9098                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9099                 /* Do nothing- FW will handle it */
9100                 break;
9101
9102         case GAUDI2_EVENT_PCIE_P2P_MSIX:
9103                 gaudi2_handle_pcie_p2p_msix(hdev);
9104                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9105                 break;
9106
9107         case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9108                 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9109                 gaudi2_handle_sm_err(hdev, index);
9110                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9111                 break;
9112
9113         case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9114                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9115                 break;
9116
9117         case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9118                 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9119                                                 le64_to_cpu(eq_entry->data[0]));
9120                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9121                 break;
9122         case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9123                 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9124                                                 le64_to_cpu(eq_entry->data[0]));
9125                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9126                 break;
9127
9128         case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9129                 gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err);
9130                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9131                 break;
9132
9133         case GAUDI2_EVENT_ARC_DCCM_FULL:
9134                 hl_arc_event_handle(hdev, &eq_entry->arc_data);
9135                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9136                 break;
9137
9138         case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9139                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9140                 is_critical = true;
9141                 break;
9142
9143         default:
9144                 if (gaudi2_irq_map_table[event_type].valid)
9145                         dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9146                                                 event_type);
9147         }
9148
9149         if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
9150                                 (hdev->hard_reset_on_fw_events ||
9151                                 (hdev->asic_prop.fw_security_enabled && is_critical)))
9152                 goto reset_device;
9153
9154         /* Send unmask irq only for interrupts not classified as MSG */
9155         if (!gaudi2_irq_map_table[event_type].msg)
9156                 hl_fw_unmask_irq(hdev, event_type);
9157
9158         if (event_mask)
9159                 hl_notifier_event_send_all(hdev, event_mask);
9160
9161         return;
9162
9163 reset_device:
9164         if (hdev->asic_prop.fw_security_enabled && is_critical) {
9165                 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9166                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9167         } else {
9168                 reset_flags |= HL_DRV_RESET_DELAY;
9169         }
9170         event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9171         hl_device_cond_reset(hdev, reset_flags, event_mask);
9172 }
9173
9174 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9175                         struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9176                         u32 hw_queue_id, u32 size, u64 addr, u32 val)
9177 {
9178         u32 ctl, pkt_size;
9179         int rc = 0;
9180
9181         ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9182         ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9183         ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9184         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9185
9186         lin_dma_pkt->ctl = cpu_to_le32(ctl);
9187         lin_dma_pkt->src_addr = cpu_to_le64(val);
9188         lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9189         lin_dma_pkt->tsize = cpu_to_le32(size);
9190
9191         pkt_size = sizeof(struct packet_lin_dma);
9192
9193         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9194         if (rc)
9195                 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9196                                 hw_queue_id);
9197
9198         return rc;
9199 }
9200
9201 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9202 {
9203         u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9204                                         GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9205                                         GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9206                                         GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9207         u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9208                 old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9209         u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9210         struct asic_fixed_properties *prop = &hdev->asic_prop;
9211         void *lin_dma_pkts_arr;
9212         dma_addr_t pkt_dma_addr;
9213         int rc = 0, dma_num = 0;
9214
9215         if (prop->edma_enabled_mask == 0) {
9216                 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9217                 return -EIO;
9218         }
9219
9220         sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9221         sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9222         comp_addr = CFG_BASE + sob_addr;
9223         comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9224                 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9225         mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9226                 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9227
9228         /* Calculate how many lin dma pkts we'll need */
9229         num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9230         pkt_size = sizeof(struct packet_lin_dma);
9231
9232         lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9233                                         &pkt_dma_addr, GFP_KERNEL);
9234         if (!lin_dma_pkts_arr)
9235                 return -ENOMEM;
9236
9237         /*
9238          * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9239          * only the first one to restore later
9240          * also set the sob addr for all edma cores for completion.
9241          * set QM as trusted to allow it to access physical address with MMU bp.
9242          */
9243         old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9244         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9245                 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9246                         u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9247                         u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9248
9249                         if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9250                                 continue;
9251
9252                         WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9253                                         edma_offset, mmubp);
9254                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9255                                         lower_32_bits(comp_addr));
9256                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9257                                         upper_32_bits(comp_addr));
9258                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9259                                         comp_val);
9260                         gaudi2_qman_set_test_mode(hdev,
9261                                         edma_queues_id[dcore] + 4 * edma_idx, true);
9262                 }
9263         }
9264
9265         WREG32(sob_addr, 0);
9266
9267         while (cur_addr < end_addr) {
9268                 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9269                         for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9270                                 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9271
9272                                 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9273                                         continue;
9274
9275                                 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9276
9277                                 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9278                                         (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9279                                         pkt_dma_addr + dma_num * pkt_size,
9280                                         edma_queues_id[dcore] + edma_idx * 4,
9281                                         chunk_size, cur_addr, val);
9282                                 if (rc)
9283                                         goto end;
9284
9285                                 dma_num++;
9286                                 cur_addr += chunk_size;
9287                                 if (cur_addr == end_addr)
9288                                         break;
9289                         }
9290                 }
9291         }
9292
9293         rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9294         if (rc) {
9295                 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9296                 goto end;
9297         }
9298 end:
9299         for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9300                 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9301                         u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9302                         u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9303
9304                         if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9305                                 continue;
9306
9307                         WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9308                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9309                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9310                         WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9311                         gaudi2_qman_set_test_mode(hdev,
9312                                         edma_queues_id[dcore] + 4 * edma_idx, false);
9313                 }
9314         }
9315
9316         WREG32(sob_addr, 0);
9317         hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
9318
9319         return rc;
9320 }
9321
9322 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9323 {
9324         int rc;
9325         struct asic_fixed_properties *prop = &hdev->asic_prop;
9326         u64 size = prop->dram_end_address - prop->dram_user_base_address;
9327
9328         rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9329
9330         if (rc)
9331                 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9332                                 prop->dram_user_base_address, size);
9333         return rc;
9334 }
9335
9336 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9337 {
9338         int rc;
9339         struct asic_fixed_properties *prop = &hdev->asic_prop;
9340         u64 val = hdev->memory_scrub_val;
9341         u64 addr, size;
9342
9343         if (!hdev->memory_scrub)
9344                 return 0;
9345
9346         /* scrub SRAM */
9347         addr = prop->sram_user_base_address;
9348         size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9349         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9350                         addr, addr + size, val);
9351         rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9352         if (rc) {
9353                 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9354                 return rc;
9355         }
9356
9357         /* scrub DRAM */
9358         rc = gaudi2_scrub_device_dram(hdev, val);
9359         if (rc) {
9360                 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9361                 return rc;
9362         }
9363         return 0;
9364 }
9365
9366 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9367 {
9368         u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9369                 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9370         u32 val, size, offset;
9371         int dcore_id;
9372
9373         offset = hdev->asic_prop.first_available_cq[0] * 4;
9374         cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9375         cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9376         cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9377         cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9378         cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9379         cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9380         size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9381                         (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9382
9383         /* memset dcore0 CQ registers */
9384         gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9385         gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9386         gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9387         gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9388         gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9389         gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9390
9391         cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9392         cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9393         cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9394         cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9395         cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9396         cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9397         size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9398
9399         for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9400                 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9401                 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9402                 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9403                 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9404                 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9405                 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9406
9407                 cq_lbw_l_addr += DCORE_OFFSET;
9408                 cq_lbw_h_addr += DCORE_OFFSET;
9409                 cq_lbw_data_addr += DCORE_OFFSET;
9410                 cq_base_l_addr += DCORE_OFFSET;
9411                 cq_base_h_addr += DCORE_OFFSET;
9412                 cq_size_addr += DCORE_OFFSET;
9413         }
9414
9415         offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9416         addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9417         val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9418         size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9419
9420         /* memset dcore0 monitors */
9421         gaudi2_memset_device_lbw(hdev, addr, size, val);
9422
9423         addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9424         gaudi2_memset_device_lbw(hdev, addr, size, 0);
9425
9426         mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9427         mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9428         size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9429
9430         for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9431                 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9432                 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9433                 mon_sts_addr += DCORE_OFFSET;
9434                 mon_cfg_addr += DCORE_OFFSET;
9435         }
9436
9437         offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9438         addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9439         val = 0;
9440         size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9441                         (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9442
9443         /* memset dcore0 sobs */
9444         gaudi2_memset_device_lbw(hdev, addr, size, val);
9445
9446         addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9447         size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9448
9449         for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9450                 gaudi2_memset_device_lbw(hdev, addr, size, val);
9451                 addr += DCORE_OFFSET;
9452         }
9453
9454         /* Flush all WREG to prevent race */
9455         val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9456 }
9457
9458 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9459 {
9460         u32 reg_base, hw_queue_id;
9461
9462         for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9463                                                         hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9464                 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9465                         continue;
9466
9467                 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9468
9469                 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9470                 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9471         }
9472
9473         /* Flush all WREG to prevent race */
9474         RREG32(mmPDMA0_QM_ARB_CFG_0);
9475 }
9476
9477 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9478 {
9479         u32 reg_base, hw_queue_id;
9480
9481         for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9482                                                         hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9483                 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9484                         continue;
9485
9486                 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9487
9488                 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9489                 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9490         }
9491
9492         /* Flush all WREG to prevent race */
9493         RREG32(mmPDMA0_QM_ARB_CFG_0);
9494 }
9495
9496 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9497 {
9498         return 0;
9499 }
9500
9501 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9502 {
9503 }
9504
9505 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9506                                                 struct dup_block_ctx *cfg_ctx)
9507 {
9508         u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9509         u8 seq;
9510         int i;
9511
9512         for (i = 0 ; i < cfg_ctx->instances ; i++) {
9513                 seq = block_idx * cfg_ctx->instances + i;
9514
9515                 /* skip disabled instance */
9516                 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9517                         continue;
9518
9519                 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9520                                         cfg_ctx->data);
9521         }
9522 }
9523
9524 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9525                                                 u64 mask)
9526 {
9527         int i;
9528
9529         cfg_ctx->enabled_mask = mask;
9530
9531         for (i = 0 ; i < cfg_ctx->blocks ; i++)
9532                 gaudi2_init_block_instances(hdev, i, cfg_ctx);
9533 }
9534
9535 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9536 {
9537         gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9538 }
9539
9540 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9541 {
9542         void *host_mem_virtual_addr;
9543         dma_addr_t host_mem_dma_addr;
9544         u64 reserved_va_base;
9545         u32 pos, size_left, size_to_dma;
9546         struct hl_ctx *ctx;
9547         int rc = 0;
9548
9549         /* Fetch the ctx */
9550         ctx = hl_get_compute_ctx(hdev);
9551         if (!ctx) {
9552                 dev_err(hdev->dev, "No ctx available\n");
9553                 return -EINVAL;
9554         }
9555
9556         /* Allocate buffers for read and for poll */
9557         host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9558                                                                 GFP_KERNEL | __GFP_ZERO);
9559         if (host_mem_virtual_addr == NULL) {
9560                 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9561                 rc = -ENOMEM;
9562                 goto put_ctx;
9563         }
9564
9565         /* Reserve VM region on asic side */
9566         reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9567                                                 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9568         if (!reserved_va_base) {
9569                 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9570                 rc = -ENOMEM;
9571                 goto free_data_buffer;
9572         }
9573
9574         /* Create mapping on asic side */
9575         mutex_lock(&hdev->mmu_lock);
9576         rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9577         hl_mmu_invalidate_cache_range(hdev, false,
9578                                       MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9579                                       ctx->asid, reserved_va_base, SZ_2M);
9580         mutex_unlock(&hdev->mmu_lock);
9581         if (rc) {
9582                 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9583                 goto unreserve_va;
9584         }
9585
9586         /* Enable MMU on KDMA */
9587         gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9588
9589         pos = 0;
9590         size_left = size;
9591         size_to_dma = SZ_2M;
9592
9593         while (size_left > 0) {
9594                 if (size_left < SZ_2M)
9595                         size_to_dma = size_left;
9596
9597                 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9598                 if (rc)
9599                         break;
9600
9601                 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9602
9603                 if (size_left <= SZ_2M)
9604                         break;
9605
9606                 pos += SZ_2M;
9607                 addr += SZ_2M;
9608                 size_left -= SZ_2M;
9609         }
9610
9611         gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9612
9613         mutex_lock(&hdev->mmu_lock);
9614         hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9615         hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9616                                       ctx->asid, reserved_va_base, SZ_2M);
9617         mutex_unlock(&hdev->mmu_lock);
9618 unreserve_va:
9619         hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9620 free_data_buffer:
9621         hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9622 put_ctx:
9623         hl_ctx_put(ctx);
9624
9625         return rc;
9626 }
9627
9628 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9629 {
9630         struct gaudi2_device *gaudi2 = hdev->asic_specific;
9631         int min_alloc_order, rc;
9632
9633         if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9634                 return 0;
9635
9636         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9637                                                                 HOST_SPACE_INTERNAL_CB_SZ,
9638                                                                 &hdev->internal_cb_pool_dma_addr,
9639                                                                 GFP_KERNEL | __GFP_ZERO);
9640
9641         if (!hdev->internal_cb_pool_virt_addr)
9642                 return -ENOMEM;
9643
9644         min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9645                                         gaudi2_get_wait_cb_size(hdev)));
9646
9647         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9648         if (!hdev->internal_cb_pool) {
9649                 dev_err(hdev->dev, "Failed to create internal CB pool\n");
9650                 rc = -ENOMEM;
9651                 goto free_internal_cb_pool;
9652         }
9653
9654         rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9655                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
9656         if (rc) {
9657                 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9658                 rc = -EFAULT;
9659                 goto destroy_internal_cb_pool;
9660         }
9661
9662         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9663                                         HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9664
9665         if (!hdev->internal_cb_va_base) {
9666                 rc = -ENOMEM;
9667                 goto destroy_internal_cb_pool;
9668         }
9669
9670         mutex_lock(&hdev->mmu_lock);
9671         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9672                                         HOST_SPACE_INTERNAL_CB_SZ);
9673         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9674         mutex_unlock(&hdev->mmu_lock);
9675
9676         if (rc)
9677                 goto unreserve_internal_cb_pool;
9678
9679         return 0;
9680
9681 unreserve_internal_cb_pool:
9682         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9683 destroy_internal_cb_pool:
9684         gen_pool_destroy(hdev->internal_cb_pool);
9685 free_internal_cb_pool:
9686         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9687                                         hdev->internal_cb_pool_dma_addr);
9688
9689         return rc;
9690 }
9691
9692 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9693 {
9694         struct gaudi2_device *gaudi2 = hdev->asic_specific;
9695
9696         if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9697                 return;
9698
9699         mutex_lock(&hdev->mmu_lock);
9700         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9701         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9702         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9703         mutex_unlock(&hdev->mmu_lock);
9704
9705         gen_pool_destroy(hdev->internal_cb_pool);
9706
9707         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9708                                         hdev->internal_cb_pool_dma_addr);
9709 }
9710
9711 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9712 {
9713         gaudi2_restore_user_sm_registers(hdev);
9714         gaudi2_restore_user_qm_registers(hdev);
9715 }
9716
9717 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9718 {
9719         struct hl_device *hdev = ctx->hdev;
9720         struct asic_fixed_properties *prop = &hdev->asic_prop;
9721         struct gaudi2_device *gaudi2 = hdev->asic_specific;
9722         int rc;
9723
9724         rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9725                                 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9726         if (rc)
9727                 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9728                         RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9729
9730         return rc;
9731 }
9732
9733 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9734 {
9735         struct hl_device *hdev = ctx->hdev;
9736         struct asic_fixed_properties *prop = &hdev->asic_prop;
9737         int rc;
9738
9739         rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9740                                 prop->pmmu.page_size, true);
9741         if (rc)
9742                 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9743                         RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9744 }
9745
9746 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9747 {
9748         int rc;
9749
9750         rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9751         if (rc)
9752                 return rc;
9753
9754         /* No need to clear user registers if the device has just
9755          * performed reset, we restore only nic qm registers
9756          */
9757         if (ctx->hdev->reset_upon_device_release)
9758                 gaudi2_restore_nic_qm_registers(ctx->hdev);
9759         else
9760                 gaudi2_restore_user_registers(ctx->hdev);
9761
9762         rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9763         if (rc)
9764                 return rc;
9765
9766         rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9767         if (rc)
9768                 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9769
9770         return rc;
9771 }
9772
9773 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9774 {
9775         if (ctx->asid == HL_KERNEL_ASID_ID)
9776                 return;
9777
9778         gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9779
9780         gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9781 }
9782
9783 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9784 {
9785         struct hl_device *hdev = cs->ctx->hdev;
9786         int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9787         u32 mon_payload, sob_id, mon_id;
9788
9789         if (!cs_needs_completion(cs))
9790                 return 0;
9791
9792         /*
9793          * First 64 SOB/MON are reserved for driver for QMAN auto completion
9794          * mechanism. Each SOB/MON pair are used for a pending CS with the same
9795          * cyclic index. The SOB value is increased when each of the CS jobs is
9796          * completed. When the SOB reaches the number of CS jobs, the monitor
9797          * generates MSI-X interrupt.
9798          */
9799
9800         sob_id = mon_id = index;
9801         mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9802                                 (1 << CQ_ENTRY_READY_SHIFT) | index;
9803
9804         gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9805                                 cs->jobs_cnt);
9806
9807         return 0;
9808 }
9809
9810 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9811 {
9812         return HL_INVALID_QUEUE;
9813 }
9814
9815 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9816 {
9817         struct hl_cb *cb = data;
9818         struct packet_msg_short *pkt;
9819         u32 value, ctl, pkt_size = sizeof(*pkt);
9820
9821         pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9822         memset(pkt, 0, pkt_size);
9823
9824         /* Inc by 1, Mode ADD */
9825         value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9826         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9827
9828         ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9829         ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9830         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9831         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9832         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9833
9834         pkt->value = cpu_to_le32(value);
9835         pkt->ctl = cpu_to_le32(ctl);
9836
9837         return size + pkt_size;
9838 }
9839
9840 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9841 {
9842         u32 ctl, pkt_size = sizeof(*pkt);
9843
9844         memset(pkt, 0, pkt_size);
9845
9846         ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9847         ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
9848         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9849         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9850         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9851
9852         pkt->value = cpu_to_le32(value);
9853         pkt->ctl = cpu_to_le32(ctl);
9854
9855         return pkt_size;
9856 }
9857
9858 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9859                                         u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
9860 {
9861         u32 ctl, value, pkt_size = sizeof(*pkt);
9862         u8 mask;
9863
9864         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
9865                 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
9866                 return 0;
9867         }
9868
9869         memset(pkt, 0, pkt_size);
9870
9871         value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
9872         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
9873         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
9874         value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
9875
9876         ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9877         ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9878         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9879         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9880         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9881
9882         pkt->value = cpu_to_le32(value);
9883         pkt->ctl = cpu_to_le32(ctl);
9884
9885         return pkt_size;
9886 }
9887
9888 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
9889 {
9890         u32 ctl, cfg, pkt_size = sizeof(*pkt);
9891
9892         memset(pkt, 0, pkt_size);
9893
9894         cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
9895         cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
9896         cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
9897
9898         ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
9899         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9900         ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9901
9902         pkt->cfg = cpu_to_le32(cfg);
9903         pkt->ctl = cpu_to_le32(ctl);
9904
9905         return pkt_size;
9906 }
9907
9908 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
9909 {
9910         struct hl_cb *cb = prop->data;
9911         void *buf = (void *) (uintptr_t) (cb->kernel_address);
9912
9913         u64 monitor_base, fence_addr = 0;
9914         u32 stream_index, size = prop->size;
9915         u16 msg_addr_offset;
9916
9917         stream_index = prop->q_idx % 4;
9918         fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
9919                         QM_FENCE2_OFFSET + stream_index * 4;
9920
9921         /*
9922          * monitor_base should be the content of the base0 address registers,
9923          * so it will be added to the msg short offsets
9924          */
9925         monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9926
9927         /* First monitor config packet: low address of the sync */
9928         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
9929                                 monitor_base;
9930
9931         size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
9932
9933         /* Second monitor config packet: high address of the sync */
9934         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
9935                                 monitor_base;
9936
9937         size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
9938
9939         /*
9940          * Third monitor config packet: the payload, i.e. what to write when the
9941          * sync triggers
9942          */
9943         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
9944                                 monitor_base;
9945
9946         size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9947
9948         /* Fourth monitor config packet: bind the monitor to a sync object */
9949         msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
9950
9951         size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
9952                                                 prop->sob_val, msg_addr_offset);
9953
9954         /* Fence packet */
9955         size += gaudi2_add_fence_pkt(buf + size);
9956
9957         return size;
9958 }
9959
9960 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
9961 {
9962         struct hl_hw_sob *hw_sob = data;
9963
9964         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
9965
9966         WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
9967
9968         kref_init(&hw_sob->kref);
9969 }
9970
9971 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
9972 {
9973 }
9974
9975 static u64 gaudi2_get_device_time(struct hl_device *hdev)
9976 {
9977         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9978
9979         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9980 }
9981
9982 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
9983 {
9984         return 0;
9985 }
9986
9987 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
9988                                         struct hl_cs *cs, u32 wait_queue_id,
9989                                         u32 collective_engine_id, u32 encaps_signal_offset)
9990 {
9991         return -EINVAL;
9992 }
9993
9994 /*
9995  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
9996  *                   to DMMU page-size address (64MB) before mapping it in
9997  *                   the MMU.
9998  * The operation is performed on both the virtual and physical addresses.
9999  * for device with 6 HBMs the scramble is:
10000  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10001  *
10002  * Example:
10003  * =============================================================================
10004  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10005  * Phys address                                                     in MMU last
10006  *                                                                    HOP
10007  * =============================================================================
10008  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10009  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10010  * =============================================================================
10011  */
10012 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10013 {
10014         struct asic_fixed_properties *prop = &hdev->asic_prop;
10015         u32 divisor, mod_va;
10016         u64 div_va;
10017
10018         /* accept any address in the DRAM address space */
10019         if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10020                                                                         VA_HBM_SPACE_END)) {
10021
10022                 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10023                 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10024                 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10025                         (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10026                         (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10027         }
10028
10029         return raw_addr;
10030 }
10031
10032 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10033 {
10034         struct asic_fixed_properties *prop = &hdev->asic_prop;
10035         u32 divisor, mod_va;
10036         u64 div_va;
10037
10038         /* accept any address in the DRAM address space */
10039         if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10040                                                                         VA_HBM_SPACE_END)) {
10041
10042                 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10043                 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10044                                         PAGE_SIZE_64MB, &mod_va);
10045
10046                 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10047                                         (div_va * divisor + mod_va));
10048         }
10049
10050         return scrambled_addr;
10051 }
10052
10053 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10054 {
10055         u32 base = 0, dcore_id, dec_id;
10056
10057         if (core_id >= NUMBER_OF_DEC) {
10058                 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10059                 goto out;
10060         }
10061
10062         if (core_id < 8) {
10063                 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10064                 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10065
10066                 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10067                                 dec_id * DCORE_VDEC_OFFSET;
10068         } else {
10069                 /* PCIe Shared Decoder */
10070                 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10071         }
10072 out:
10073         return base;
10074 }
10075
10076 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10077                                 u32 *block_size, u32 *block_id)
10078 {
10079         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10080         int i;
10081
10082         for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10083                 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10084                         *block_id = i;
10085                         if (block_size)
10086                                 *block_size = gaudi2->mapped_blocks[i].size;
10087                         return 0;
10088                 }
10089         }
10090
10091         dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10092
10093         return -EINVAL;
10094 }
10095
10096 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10097                         u32 block_id, u32 block_size)
10098 {
10099         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10100         u64 offset_in_bar;
10101         u64 address;
10102         int rc;
10103
10104         if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10105                 dev_err(hdev->dev, "Invalid block id %u", block_id);
10106                 return -EINVAL;
10107         }
10108
10109         /* we allow mapping only an entire block */
10110         if (block_size != gaudi2->mapped_blocks[block_id].size) {
10111                 dev_err(hdev->dev, "Invalid block size %u", block_size);
10112                 return -EINVAL;
10113         }
10114
10115         offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10116
10117         address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10118
10119         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10120                         VM_DONTCOPY | VM_NORESERVE;
10121
10122         rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10123                         block_size, vma->vm_page_prot);
10124         if (rc)
10125                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10126
10127         return rc;
10128 }
10129
10130 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10131 {
10132         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10133
10134         struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10135         u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10136
10137         if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10138                 WREG32(irq_handler_offset,
10139                         gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10140 }
10141
10142 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10143 {
10144         switch (mmu_id) {
10145         case HW_CAP_DCORE0_DMMU0:
10146                 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10147                 break;
10148         case HW_CAP_DCORE0_DMMU1:
10149                 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10150                 break;
10151         case HW_CAP_DCORE0_DMMU2:
10152                 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10153                 break;
10154         case HW_CAP_DCORE0_DMMU3:
10155                 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10156                 break;
10157         case HW_CAP_DCORE1_DMMU0:
10158                 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10159                 break;
10160         case HW_CAP_DCORE1_DMMU1:
10161                 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10162                 break;
10163         case HW_CAP_DCORE1_DMMU2:
10164                 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10165                 break;
10166         case HW_CAP_DCORE1_DMMU3:
10167                 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10168                 break;
10169         case HW_CAP_DCORE2_DMMU0:
10170                 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10171                 break;
10172         case HW_CAP_DCORE2_DMMU1:
10173                 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10174                 break;
10175         case HW_CAP_DCORE2_DMMU2:
10176                 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10177                 break;
10178         case HW_CAP_DCORE2_DMMU3:
10179                 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10180                 break;
10181         case HW_CAP_DCORE3_DMMU0:
10182                 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10183                 break;
10184         case HW_CAP_DCORE3_DMMU1:
10185                 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10186                 break;
10187         case HW_CAP_DCORE3_DMMU2:
10188                 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10189                 break;
10190         case HW_CAP_DCORE3_DMMU3:
10191                 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10192                 break;
10193         case HW_CAP_PMMU:
10194                 *mmu_base = mmPMMU_HBW_MMU_BASE;
10195                 break;
10196         default:
10197                 return -EINVAL;
10198         }
10199
10200         return 0;
10201 }
10202
10203 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10204 {
10205         bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10206         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10207         u32 mmu_base;
10208
10209         if (!(gaudi2->hw_cap_initialized & mmu_id))
10210                 return;
10211
10212         if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10213                 return;
10214
10215         gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10216         gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10217 }
10218
10219 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10220 {
10221         u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10222
10223         /* check all HMMUs */
10224         for (i = 0 ; i < num_of_hmmus ; i++) {
10225                 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10226
10227                 if (mmu_cap_mask & mmu_id)
10228                         gaudi2_ack_mmu_error(hdev, mmu_id);
10229         }
10230
10231         /* check PMMU */
10232         if (mmu_cap_mask & HW_CAP_PMMU)
10233                 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10234
10235         return 0;
10236 }
10237
10238 static void gaudi2_get_msi_info(__le32 *table)
10239 {
10240         table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10241 }
10242
10243 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10244 {
10245         switch (pll_idx) {
10246         case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10247         case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10248         case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10249         case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10250         case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10251         case HL_GAUDI2_MME_PLL: return MME_PLL;
10252         case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10253         case HL_GAUDI2_IF_PLL: return IF_PLL;
10254         case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10255         case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10256         case HL_GAUDI2_VID_PLL: return VID_PLL;
10257         case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10258         default: return -EINVAL;
10259         }
10260 }
10261
10262 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10263 {
10264         /* Not implemented */
10265         return 0;
10266 }
10267
10268 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10269 {
10270         /* Not implemented */
10271         return 0;
10272 }
10273
10274 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10275                                 struct hl_device *hdev, struct hl_mon_state_dump *mon)
10276 {
10277         /* Not implemented */
10278         return 0;
10279 }
10280
10281
10282 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10283                                 u64 status_base_offset, enum hl_sync_engine_type engine_type,
10284                                 u32 engine_id, char **buf, size_t *size, size_t *offset)
10285 {
10286         /* Not implemented */
10287         return 0;
10288 }
10289
10290
10291 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10292         .monitor_valid = gaudi2_monitor_valid,
10293         .print_single_monitor = gaudi2_print_single_monitor,
10294         .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10295         .print_fences_single_engine = gaudi2_print_fences_single_engine,
10296 };
10297
10298 static void gaudi2_state_dump_init(struct hl_device *hdev)
10299 {
10300         /* Not implemented */
10301         hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10302         hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10303 }
10304
10305 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10306 {
10307         return 0;
10308 }
10309
10310 static u32 *gaudi2_get_stream_master_qid_arr(void)
10311 {
10312         return NULL;
10313 }
10314
10315 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10316                                 struct attribute_group *dev_vrm_attr_grp)
10317 {
10318         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10319         hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10320 }
10321
10322 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10323                                         u32 page_size, u32 *real_page_size, bool is_dram_addr)
10324 {
10325         struct asic_fixed_properties *prop = &hdev->asic_prop;
10326
10327         /* for host pages the page size must be  */
10328         if (!is_dram_addr) {
10329                 if (page_size % mmu_prop->page_size)
10330                         goto page_size_err;
10331
10332                 *real_page_size = mmu_prop->page_size;
10333                 return 0;
10334         }
10335
10336         if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10337                 goto page_size_err;
10338
10339         /*
10340          * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10341          * than DRAM page size).
10342          * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10343          * this mismatch when calculating the address to place in the MMU page table.
10344          * (in that case also make sure that the dram_page_size is not greater than the
10345          * mmu page size)
10346          */
10347         *real_page_size = prop->dram_page_size;
10348
10349         return 0;
10350
10351 page_size_err:
10352         dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10353                                                         page_size, mmu_prop->page_size >> 10);
10354         return -EFAULT;
10355 }
10356
10357 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10358 {
10359         return -EOPNOTSUPP;
10360 }
10361
10362 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10363 {
10364         struct gaudi2_device *gaudi2 = hdev->asic_specific;
10365
10366         if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
10367                 return 0;
10368
10369         return hl_fw_send_device_activity(hdev, open);
10370 }
10371
10372 static const struct hl_asic_funcs gaudi2_funcs = {
10373         .early_init = gaudi2_early_init,
10374         .early_fini = gaudi2_early_fini,
10375         .late_init = gaudi2_late_init,
10376         .late_fini = gaudi2_late_fini,
10377         .sw_init = gaudi2_sw_init,
10378         .sw_fini = gaudi2_sw_fini,
10379         .hw_init = gaudi2_hw_init,
10380         .hw_fini = gaudi2_hw_fini,
10381         .halt_engines = gaudi2_halt_engines,
10382         .suspend = gaudi2_suspend,
10383         .resume = gaudi2_resume,
10384         .mmap = gaudi2_mmap,
10385         .ring_doorbell = gaudi2_ring_doorbell,
10386         .pqe_write = gaudi2_pqe_write,
10387         .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10388         .asic_dma_free_coherent = gaudi2_dma_free_coherent,
10389         .scrub_device_mem = gaudi2_scrub_device_mem,
10390         .scrub_device_dram = gaudi2_scrub_device_dram,
10391         .get_int_queue_base = NULL,
10392         .test_queues = gaudi2_test_queues,
10393         .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10394         .asic_dma_pool_free = gaudi2_dma_pool_free,
10395         .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10396         .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10397         .asic_dma_unmap_single = gaudi2_dma_unmap_single,
10398         .asic_dma_map_single = gaudi2_dma_map_single,
10399         .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10400         .cs_parser = gaudi2_cs_parser,
10401         .asic_dma_map_sgtable = hl_dma_map_sgtable,
10402         .add_end_of_cb_packets = NULL,
10403         .update_eq_ci = gaudi2_update_eq_ci,
10404         .context_switch = gaudi2_context_switch,
10405         .restore_phase_topology = gaudi2_restore_phase_topology,
10406         .debugfs_read_dma = gaudi2_debugfs_read_dma,
10407         .add_device_attr = gaudi2_add_device_attr,
10408         .handle_eqe = gaudi2_handle_eqe,
10409         .get_events_stat = gaudi2_get_events_stat,
10410         .read_pte = NULL,
10411         .write_pte = NULL,
10412         .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10413         .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10414         .mmu_prefetch_cache_range = NULL,
10415         .send_heartbeat = gaudi2_send_heartbeat,
10416         .debug_coresight = gaudi2_debug_coresight,
10417         .is_device_idle = gaudi2_is_device_idle,
10418         .compute_reset_late_init = gaudi2_compute_reset_late_init,
10419         .hw_queues_lock = gaudi2_hw_queues_lock,
10420         .hw_queues_unlock = gaudi2_hw_queues_unlock,
10421         .get_pci_id = gaudi2_get_pci_id,
10422         .get_eeprom_data = gaudi2_get_eeprom_data,
10423         .get_monitor_dump = gaudi2_get_monitor_dump,
10424         .send_cpu_message = gaudi2_send_cpu_message,
10425         .pci_bars_map = gaudi2_pci_bars_map,
10426         .init_iatu = gaudi2_init_iatu,
10427         .rreg = hl_rreg,
10428         .wreg = hl_wreg,
10429         .halt_coresight = gaudi2_halt_coresight,
10430         .ctx_init = gaudi2_ctx_init,
10431         .ctx_fini = gaudi2_ctx_fini,
10432         .pre_schedule_cs = gaudi2_pre_schedule_cs,
10433         .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10434         .load_firmware_to_device = NULL,
10435         .load_boot_fit_to_device = NULL,
10436         .get_signal_cb_size = gaudi2_get_signal_cb_size,
10437         .get_wait_cb_size = gaudi2_get_wait_cb_size,
10438         .gen_signal_cb = gaudi2_gen_signal_cb,
10439         .gen_wait_cb = gaudi2_gen_wait_cb,
10440         .reset_sob = gaudi2_reset_sob,
10441         .reset_sob_group = gaudi2_reset_sob_group,
10442         .get_device_time = gaudi2_get_device_time,
10443         .pb_print_security_errors = gaudi2_pb_print_security_errors,
10444         .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10445         .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10446         .get_dec_base_addr = gaudi2_get_dec_base_addr,
10447         .scramble_addr = gaudi2_mmu_scramble_addr,
10448         .descramble_addr = gaudi2_mmu_descramble_addr,
10449         .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10450         .get_hw_block_id = gaudi2_get_hw_block_id,
10451         .hw_block_mmap = gaudi2_block_mmap,
10452         .enable_events_from_fw = gaudi2_enable_events_from_fw,
10453         .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10454         .get_msi_info = gaudi2_get_msi_info,
10455         .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10456         .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10457         .init_firmware_loader = gaudi2_init_firmware_loader,
10458         .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10459         .state_dump_init = gaudi2_state_dump_init,
10460         .get_sob_addr = &gaudi2_get_sob_addr,
10461         .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10462         .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10463         .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10464         .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10465         .access_dev_mem = hl_access_dev_mem,
10466         .set_dram_bar_base = gaudi2_set_hbm_bar_base,
10467         .set_engine_cores = gaudi2_set_engine_cores,
10468         .send_device_activity = gaudi2_send_device_activity,
10469 };
10470
10471 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10472 {
10473         hdev->asic_funcs = &gaudi2_funcs;
10474 }