Merge tag 'linux-kselftest-fixes-5.15-rc5' of git://git.kernel.org/pub/scm/linux...
[platform/kernel/linux-rpi.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461         bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465                                                                 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467                                         struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469                                         u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471                                         u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473                                 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479                                 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481                                 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486                 return HL_COLLECTIVE_MASTER;
487
488         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490                 return HL_COLLECTIVE_SLAVE;
491
492         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494                 return HL_COLLECTIVE_SLAVE;
495
496         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498                 return HL_COLLECTIVE_SLAVE;
499
500         return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507         if (hdev->card_type == cpucp_card_type_pmc) {
508                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510                 if (prop->fw_security_enabled)
511                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512                 else
513                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514         } else {
515                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517         }
518 }
519
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522         struct asic_fixed_properties *prop = &hdev->asic_prop;
523         u32 num_sync_stream_queues = 0;
524         int i;
525
526         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527         prop->hw_queues_props = kcalloc(prop->max_queues,
528                         sizeof(struct hw_queue_properties),
529                         GFP_KERNEL);
530
531         if (!prop->hw_queues_props)
532                 return -ENOMEM;
533
534         for (i = 0 ; i < prop->max_queues ; i++) {
535                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537                         prop->hw_queues_props[i].driver_only = 0;
538                         prop->hw_queues_props[i].supports_sync_stream = 1;
539                         prop->hw_queues_props[i].cb_alloc_flags =
540                                 CB_ALLOC_KERNEL;
541                         num_sync_stream_queues++;
542                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544                         prop->hw_queues_props[i].driver_only = 1;
545                         prop->hw_queues_props[i].supports_sync_stream = 0;
546                         prop->hw_queues_props[i].cb_alloc_flags =
547                                 CB_ALLOC_KERNEL;
548                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 0;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_USER;
554
555                 }
556                 prop->hw_queues_props[i].collective_mode =
557                                                 get_collective_mode(hdev, i);
558         }
559
560         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562         prop->collective_first_sob = 0;
563         prop->collective_first_mon = 0;
564
565         /* 2 SOBs per internal queue stream are reserved for collective */
566         prop->sync_stream_first_sob =
567                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568                         * QMAN_STREAMS * HL_RSVD_SOBS;
569
570         /* 1 monitor per internal queue stream are reserved for collective
571          * 2 monitors per external queue stream are reserved for collective
572          */
573         prop->sync_stream_first_mon =
574                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575                         (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577         prop->dram_base_address = DRAM_PHYS_BASE;
578         prop->dram_size = GAUDI_HBM_SIZE_32GB;
579         prop->dram_end_address = prop->dram_base_address +
580                                         prop->dram_size;
581         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583         prop->sram_base_address = SRAM_BASE_ADDR;
584         prop->sram_size = SRAM_SIZE;
585         prop->sram_end_address = prop->sram_base_address +
586                                         prop->sram_size;
587         prop->sram_user_base_address = prop->sram_base_address +
588                                         SRAM_USER_BASE_OFFSET;
589
590         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591         if (hdev->pldm)
592                 prop->mmu_pgt_size = 0x800000; /* 8MB */
593         else
594                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595         prop->mmu_pte_size = HL_PTE_SIZE;
596         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598         prop->dram_page_size = PAGE_SIZE_2MB;
599         prop->dram_supports_virtual_memory = false;
600
601         prop->pmmu.hop0_shift = HOP0_SHIFT;
602         prop->pmmu.hop1_shift = HOP1_SHIFT;
603         prop->pmmu.hop2_shift = HOP2_SHIFT;
604         prop->pmmu.hop3_shift = HOP3_SHIFT;
605         prop->pmmu.hop4_shift = HOP4_SHIFT;
606         prop->pmmu.hop0_mask = HOP0_MASK;
607         prop->pmmu.hop1_mask = HOP1_MASK;
608         prop->pmmu.hop2_mask = HOP2_MASK;
609         prop->pmmu.hop3_mask = HOP3_MASK;
610         prop->pmmu.hop4_mask = HOP4_MASK;
611         prop->pmmu.start_addr = VA_HOST_SPACE_START;
612         prop->pmmu.end_addr =
613                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614         prop->pmmu.page_size = PAGE_SIZE_4KB;
615         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616
617         /* PMMU and HPMMU are the same except of page size */
618         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620
621         /* shifts and masks are the same in PMMU and DMMU */
622         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624         prop->dmmu.end_addr = VA_HOST_SPACE_END;
625         prop->dmmu.page_size = PAGE_SIZE_2MB;
626
627         prop->cfg_size = CFG_SIZE;
628         prop->max_asid = MAX_ASID;
629         prop->num_of_events = GAUDI_EVENT_SIZE;
630         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631
632         set_default_power_values(hdev);
633
634         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636
637         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639
640         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641                                         CARD_NAME_MAX_LEN);
642
643         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644
645         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646                         prop->sync_stream_first_sob +
647                         (num_sync_stream_queues * HL_RSVD_SOBS);
648         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649                         prop->sync_stream_first_mon +
650                         (num_sync_stream_queues * HL_RSVD_MONS);
651
652         prop->first_available_user_msix_interrupt = USHRT_MAX;
653
654         for (i = 0 ; i < HL_MAX_DCORES ; i++)
655                 prop->first_available_cq[i] = USHRT_MAX;
656
657         prop->fw_cpu_boot_dev_sts0_valid = false;
658         prop->fw_cpu_boot_dev_sts1_valid = false;
659         prop->hard_reset_done_by_fw = false;
660         prop->gic_interrupts_enable = true;
661
662         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663
664         return 0;
665 }
666
667 static int gaudi_pci_bars_map(struct hl_device *hdev)
668 {
669         static const char * const name[] = {"SRAM", "CFG", "HBM"};
670         bool is_wc[3] = {false, false, true};
671         int rc;
672
673         rc = hl_pci_bars_map(hdev, name, is_wc);
674         if (rc)
675                 return rc;
676
677         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
678                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
679
680         return 0;
681 }
682
683 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
684 {
685         struct gaudi_device *gaudi = hdev->asic_specific;
686         struct hl_inbound_pci_region pci_region;
687         u64 old_addr = addr;
688         int rc;
689
690         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
691                 return old_addr;
692
693         if (hdev->asic_prop.iatu_done_by_fw)
694                 return U64_MAX;
695
696         /* Inbound Region 2 - Bar 4 - Point to HBM */
697         pci_region.mode = PCI_BAR_MATCH_MODE;
698         pci_region.bar = HBM_BAR_ID;
699         pci_region.addr = addr;
700         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
701         if (rc)
702                 return U64_MAX;
703
704         if (gaudi) {
705                 old_addr = gaudi->hbm_bar_cur_addr;
706                 gaudi->hbm_bar_cur_addr = addr;
707         }
708
709         return old_addr;
710 }
711
712 static int gaudi_init_iatu(struct hl_device *hdev)
713 {
714         struct hl_inbound_pci_region inbound_region;
715         struct hl_outbound_pci_region outbound_region;
716         int rc;
717
718         if (hdev->asic_prop.iatu_done_by_fw)
719                 return 0;
720
721         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
722         inbound_region.mode = PCI_BAR_MATCH_MODE;
723         inbound_region.bar = SRAM_BAR_ID;
724         inbound_region.addr = SRAM_BASE_ADDR;
725         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
726         if (rc)
727                 goto done;
728
729         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
730         inbound_region.mode = PCI_BAR_MATCH_MODE;
731         inbound_region.bar = CFG_BAR_ID;
732         inbound_region.addr = SPI_FLASH_BASE_ADDR;
733         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
734         if (rc)
735                 goto done;
736
737         /* Inbound Region 2 - Bar 4 - Point to HBM */
738         inbound_region.mode = PCI_BAR_MATCH_MODE;
739         inbound_region.bar = HBM_BAR_ID;
740         inbound_region.addr = DRAM_PHYS_BASE;
741         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
742         if (rc)
743                 goto done;
744
745         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
746
747         /* Outbound Region 0 - Point to Host */
748         outbound_region.addr = HOST_PHYS_BASE;
749         outbound_region.size = HOST_PHYS_SIZE;
750         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
751
752 done:
753         return rc;
754 }
755
756 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
757 {
758         return RREG32(mmHW_STATE);
759 }
760
761 static int gaudi_early_init(struct hl_device *hdev)
762 {
763         struct asic_fixed_properties *prop = &hdev->asic_prop;
764         struct pci_dev *pdev = hdev->pdev;
765         u32 fw_boot_status;
766         int rc;
767
768         rc = gaudi_set_fixed_properties(hdev);
769         if (rc) {
770                 dev_err(hdev->dev, "Failed setting fixed properties\n");
771                 return rc;
772         }
773
774         /* Check BAR sizes */
775         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
776                 dev_err(hdev->dev,
777                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
778                         SRAM_BAR_ID,
779                         (unsigned long long) pci_resource_len(pdev,
780                                                         SRAM_BAR_ID),
781                         SRAM_BAR_SIZE);
782                 rc = -ENODEV;
783                 goto free_queue_props;
784         }
785
786         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
787                 dev_err(hdev->dev,
788                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
789                         CFG_BAR_ID,
790                         (unsigned long long) pci_resource_len(pdev,
791                                                                 CFG_BAR_ID),
792                         CFG_BAR_SIZE);
793                 rc = -ENODEV;
794                 goto free_queue_props;
795         }
796
797         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
798
799         /* If FW security is enabled at this point it means no access to ELBI */
800         if (hdev->asic_prop.fw_security_enabled) {
801                 hdev->asic_prop.iatu_done_by_fw = true;
802
803                 /*
804                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
805                  * decision can only be taken based on PCI ID security.
806                  */
807                 hdev->asic_prop.gic_interrupts_enable = false;
808                 goto pci_init;
809         }
810
811         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
812                                 &fw_boot_status);
813         if (rc)
814                 goto free_queue_props;
815
816         /* Check whether FW is configuring iATU */
817         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
818                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
819                 hdev->asic_prop.iatu_done_by_fw = true;
820
821 pci_init:
822         rc = hl_pci_init(hdev);
823         if (rc)
824                 goto free_queue_props;
825
826         /* Before continuing in the initialization, we need to read the preboot
827          * version to determine whether we run with a security-enabled firmware
828          */
829         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
830                                         mmCPU_BOOT_DEV_STS0,
831                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
832                                         mmCPU_BOOT_ERR1,
833                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
834         if (rc) {
835                 if (hdev->reset_on_preboot_fail)
836                         hdev->asic_funcs->hw_fini(hdev, true, false);
837                 goto pci_fini;
838         }
839
840         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
841                 dev_info(hdev->dev,
842                         "H/W state is dirty, must reset before initializing\n");
843                 hdev->asic_funcs->hw_fini(hdev, true, false);
844         }
845
846         return 0;
847
848 pci_fini:
849         hl_pci_fini(hdev);
850 free_queue_props:
851         kfree(hdev->asic_prop.hw_queues_props);
852         return rc;
853 }
854
855 static int gaudi_early_fini(struct hl_device *hdev)
856 {
857         kfree(hdev->asic_prop.hw_queues_props);
858         hl_pci_fini(hdev);
859
860         return 0;
861 }
862
863 /**
864  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
865  *
866  * @hdev: pointer to hl_device structure
867  *
868  */
869 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
870 {
871         struct asic_fixed_properties *prop = &hdev->asic_prop;
872         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
873         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
874         int rc;
875
876         if (hdev->asic_prop.fw_security_enabled) {
877                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
878
879                 if (rc)
880                         return rc;
881
882                 freq = pll_freq_arr[2];
883         } else {
884                 /* Backward compatibility */
885                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
886                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
887                 nr = RREG32(mmPSOC_CPU_PLL_NR);
888                 nf = RREG32(mmPSOC_CPU_PLL_NF);
889                 od = RREG32(mmPSOC_CPU_PLL_OD);
890
891                 if (div_sel == DIV_SEL_REF_CLK ||
892                                 div_sel == DIV_SEL_DIVIDED_REF) {
893                         if (div_sel == DIV_SEL_REF_CLK)
894                                 freq = PLL_REF_CLK;
895                         else
896                                 freq = PLL_REF_CLK / (div_fctr + 1);
897                 } else if (div_sel == DIV_SEL_PLL_CLK ||
898                         div_sel == DIV_SEL_DIVIDED_PLL) {
899                         pll_clk = PLL_REF_CLK * (nf + 1) /
900                                         ((nr + 1) * (od + 1));
901                         if (div_sel == DIV_SEL_PLL_CLK)
902                                 freq = pll_clk;
903                         else
904                                 freq = pll_clk / (div_fctr + 1);
905                 } else {
906                         dev_warn(hdev->dev,
907                                 "Received invalid div select value: %d",
908                                 div_sel);
909                         freq = 0;
910                 }
911         }
912
913         prop->psoc_timestamp_frequency = freq;
914         prop->psoc_pci_pll_nr = nr;
915         prop->psoc_pci_pll_nf = nf;
916         prop->psoc_pci_pll_od = od;
917         prop->psoc_pci_pll_div_factor = div_fctr;
918
919         return 0;
920 }
921
922 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
923                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
924 {
925         struct asic_fixed_properties *prop = &hdev->asic_prop;
926         struct packet_lin_dma *init_tpc_mem_pkt;
927         struct hl_cs_job *job;
928         struct hl_cb *cb;
929         u64 dst_addr;
930         u32 cb_size, ctl;
931         u8 tpc_id;
932         int rc;
933
934         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
935         if (!cb)
936                 return -EFAULT;
937
938         init_tpc_mem_pkt = cb->kernel_address;
939         cb_size = sizeof(*init_tpc_mem_pkt);
940         memset(init_tpc_mem_pkt, 0, cb_size);
941
942         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
943
944         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
945         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
946         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
947         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
948
949         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
950
951         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
952         dst_addr = (prop->sram_user_base_address &
953                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
954                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
955         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
956
957         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
958         if (!job) {
959                 dev_err(hdev->dev, "Failed to allocate a new job\n");
960                 rc = -ENOMEM;
961                 goto release_cb;
962         }
963
964         job->id = 0;
965         job->user_cb = cb;
966         atomic_inc(&job->user_cb->cs_cnt);
967         job->user_cb_size = cb_size;
968         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
969         job->patched_cb = job->user_cb;
970         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
971
972         hl_debugfs_add_job(hdev, job);
973
974         rc = gaudi_send_job_on_qman0(hdev, job);
975
976         if (rc)
977                 goto free_job;
978
979         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
980                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
981                 if (rc)
982                         break;
983         }
984
985 free_job:
986         hl_userptr_delete_list(hdev, &job->userptr_list);
987         hl_debugfs_remove_job(hdev, job);
988         kfree(job);
989         atomic_dec(&cb->cs_cnt);
990
991 release_cb:
992         hl_cb_put(cb);
993         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
994
995         return rc;
996 }
997
998 /*
999  * gaudi_init_tpc_mem() - Initialize TPC memories.
1000  * @hdev: Pointer to hl_device structure.
1001  *
1002  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1003  *
1004  * Return: 0 for success, negative value for error.
1005  */
1006 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1007 {
1008         const struct firmware *fw;
1009         size_t fw_size;
1010         void *cpu_addr;
1011         dma_addr_t dma_handle;
1012         int rc, count = 5;
1013
1014 again:
1015         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1016         if (rc == -EINTR && count-- > 0) {
1017                 msleep(50);
1018                 goto again;
1019         }
1020
1021         if (rc) {
1022                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1023                                 GAUDI_TPC_FW_FILE);
1024                 goto out;
1025         }
1026
1027         fw_size = fw->size;
1028         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1029                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1030         if (!cpu_addr) {
1031                 dev_err(hdev->dev,
1032                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1033                         fw_size);
1034                 rc = -ENOMEM;
1035                 goto out;
1036         }
1037
1038         memcpy(cpu_addr, fw->data, fw_size);
1039
1040         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1041
1042         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1043                         dma_handle);
1044
1045 out:
1046         release_firmware(fw);
1047         return rc;
1048 }
1049
1050 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1051 {
1052         struct gaudi_device *gaudi = hdev->asic_specific;
1053         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1054         struct hl_hw_queue *q;
1055         u32 i, sob_id, sob_group_id, queue_id;
1056
1057         /* Iterate through SOB groups and assign a SOB for each slave queue */
1058         sob_group_id =
1059                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1060         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1061
1062         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1063         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1064                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1065                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1066         }
1067
1068         /* Both DMA5 and TPC7 use the same resources since only a single
1069          * engine need to participate in the reduction process
1070          */
1071         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1072         q = &hdev->kernel_queues[queue_id];
1073         q->sync_stream_prop.collective_sob_id =
1074                         sob_id + NIC_NUMBER_OF_ENGINES;
1075
1076         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1077         q = &hdev->kernel_queues[queue_id];
1078         q->sync_stream_prop.collective_sob_id =
1079                         sob_id + NIC_NUMBER_OF_ENGINES;
1080 }
1081
1082 static void gaudi_sob_group_hw_reset(struct kref *ref)
1083 {
1084         struct gaudi_hw_sob_group *hw_sob_group =
1085                 container_of(ref, struct gaudi_hw_sob_group, kref);
1086         struct hl_device *hdev = hw_sob_group->hdev;
1087         int i;
1088
1089         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1090                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1091                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1092
1093         kref_init(&hw_sob_group->kref);
1094 }
1095
1096 static void gaudi_sob_group_reset_error(struct kref *ref)
1097 {
1098         struct gaudi_hw_sob_group *hw_sob_group =
1099                 container_of(ref, struct gaudi_hw_sob_group, kref);
1100         struct hl_device *hdev = hw_sob_group->hdev;
1101
1102         dev_crit(hdev->dev,
1103                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1104                 hw_sob_group->base_sob_id);
1105 }
1106
1107 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1108 {
1109         struct gaudi_collective_properties *prop;
1110         int i;
1111
1112         prop = &gaudi->collective_props;
1113
1114         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1115
1116         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1117                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1118                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1119                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1120         /* Set collective engine bit */
1121         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1122                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1123 }
1124
1125 static int gaudi_collective_init(struct hl_device *hdev)
1126 {
1127         u32 i, sob_id, reserved_sobs_per_group;
1128         struct gaudi_collective_properties *prop;
1129         struct gaudi_device *gaudi;
1130
1131         gaudi = hdev->asic_specific;
1132         prop = &gaudi->collective_props;
1133         sob_id = hdev->asic_prop.collective_first_sob;
1134
1135         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1136         reserved_sobs_per_group =
1137                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1138
1139         /* Init SOB groups */
1140         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1141                 prop->hw_sob_group[i].hdev = hdev;
1142                 prop->hw_sob_group[i].base_sob_id = sob_id;
1143                 sob_id += reserved_sobs_per_group;
1144                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1145         }
1146
1147         for (i = 0 ; i < QMAN_STREAMS; i++) {
1148                 prop->next_sob_group_val[i] = 1;
1149                 prop->curr_sob_group_idx[i] = 0;
1150                 gaudi_collective_map_sobs(hdev, i);
1151         }
1152
1153         gaudi_collective_mstr_sob_mask_set(gaudi);
1154
1155         return 0;
1156 }
1157
1158 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1159 {
1160         struct gaudi_device *gaudi = hdev->asic_specific;
1161         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1162
1163         kref_put(&cprop->hw_sob_group[sob_group].kref,
1164                                         gaudi_sob_group_hw_reset);
1165 }
1166
1167 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1168                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1169 {
1170         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1171         struct gaudi_collective_properties *cprop;
1172         struct hl_gen_wait_properties wait_prop;
1173         struct hl_sync_stream_properties *prop;
1174         struct gaudi_device *gaudi;
1175
1176         gaudi = hdev->asic_specific;
1177         cprop = &gaudi->collective_props;
1178         queue_id = job->hw_queue_id;
1179         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1180
1181         master_sob_base =
1182                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1183         master_monitor = prop->collective_mstr_mon_id[0];
1184
1185         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1186
1187         dev_dbg(hdev->dev,
1188                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1189                 master_sob_base, cprop->mstr_sob_mask[0],
1190                 cprop->next_sob_group_val[stream],
1191                 master_monitor, queue_id);
1192
1193         wait_prop.data = (void *) job->patched_cb;
1194         wait_prop.sob_base = master_sob_base;
1195         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1196         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1197         wait_prop.mon_id = master_monitor;
1198         wait_prop.q_idx = queue_id;
1199         wait_prop.size = cb_size;
1200         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201
1202         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1203         master_monitor = prop->collective_mstr_mon_id[1];
1204
1205         dev_dbg(hdev->dev,
1206                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207                 master_sob_base, cprop->mstr_sob_mask[1],
1208                 cprop->next_sob_group_val[stream],
1209                 master_monitor, queue_id);
1210
1211         wait_prop.sob_base = master_sob_base;
1212         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1213         wait_prop.mon_id = master_monitor;
1214         wait_prop.size = cb_size;
1215         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 }
1217
1218 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1219                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1220 {
1221         struct hl_gen_wait_properties wait_prop;
1222         struct hl_sync_stream_properties *prop;
1223         u32 queue_id, cb_size = 0;
1224
1225         queue_id = job->hw_queue_id;
1226         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1227
1228         if (job->cs->encaps_signals) {
1229                 /* use the encaps signal handle store earlier in the flow
1230                  * and set the SOB information from the encaps
1231                  * signals handle
1232                  */
1233                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1234                                                 cs_cmpl);
1235
1236                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1237                                 job->cs->sequence,
1238                                 cs_cmpl->hw_sob->sob_id,
1239                                 cs_cmpl->sob_val);
1240         }
1241
1242         /* Add to wait CBs using slave monitor */
1243         wait_prop.data = (void *) job->user_cb;
1244         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1245         wait_prop.sob_mask = 0x1;
1246         wait_prop.sob_val = cs_cmpl->sob_val;
1247         wait_prop.mon_id = prop->collective_slave_mon_id;
1248         wait_prop.q_idx = queue_id;
1249         wait_prop.size = cb_size;
1250
1251         dev_dbg(hdev->dev,
1252                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1253                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1254                 prop->collective_slave_mon_id, queue_id);
1255
1256         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257
1258         dev_dbg(hdev->dev,
1259                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1260                 prop->collective_sob_id, queue_id);
1261
1262         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1263                         prop->collective_sob_id, cb_size, false);
1264 }
1265
1266 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1267 {
1268         struct hl_cs_compl *signal_cs_cmpl =
1269                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1270         struct hl_cs_compl *cs_cmpl =
1271                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1272         struct gaudi_collective_properties *cprop;
1273         u32 stream, queue_id, sob_group_offset;
1274         struct gaudi_device *gaudi;
1275         struct hl_device *hdev;
1276         struct hl_cs_job *job;
1277         struct hl_ctx *ctx;
1278
1279         ctx = cs->ctx;
1280         hdev = ctx->hdev;
1281         gaudi = hdev->asic_specific;
1282         cprop = &gaudi->collective_props;
1283
1284         /* In encaps signals case the SOB info will be retrieved from
1285          * the handle in gaudi_collective_slave_init_job.
1286          */
1287         if (!cs->encaps_signals) {
1288                 /* copy the SOB id and value of the signal CS */
1289                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1290                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1291         }
1292
1293         /* check again if the signal cs already completed.
1294          * if yes then don't send any wait cs since the hw_sob
1295          * could be in reset already. if signal is not completed
1296          * then get refcount to hw_sob to prevent resetting the sob
1297          * while wait cs is not submitted.
1298          * note that this check is protected by two locks,
1299          * hw queue lock and completion object lock,
1300          * and the same completion object lock also protects
1301          * the hw_sob reset handler function.
1302          * The hw_queue lock prevent out of sync of hw_sob
1303          * refcount value, changed by signal/wait flows.
1304          */
1305         spin_lock(&signal_cs_cmpl->lock);
1306
1307         if (completion_done(&cs->signal_fence->completion)) {
1308                 spin_unlock(&signal_cs_cmpl->lock);
1309                 return -EINVAL;
1310         }
1311         /* Increment kref since all slave queues are now waiting on it */
1312         kref_get(&cs_cmpl->hw_sob->kref);
1313
1314         spin_unlock(&signal_cs_cmpl->lock);
1315
1316         /* Calculate the stream from collective master queue (1st job) */
1317         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1318         stream = job->hw_queue_id % 4;
1319         sob_group_offset =
1320                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1321
1322         list_for_each_entry(job, &cs->job_list, cs_node) {
1323                 queue_id = job->hw_queue_id;
1324
1325                 if (hdev->kernel_queues[queue_id].collective_mode ==
1326                                 HL_COLLECTIVE_MASTER)
1327                         gaudi_collective_master_init_job(hdev, job, stream,
1328                                                 sob_group_offset);
1329                 else
1330                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1331         }
1332
1333         cs_cmpl->sob_group = sob_group_offset;
1334
1335         /* Handle sob group kref and wraparound */
1336         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1337         cprop->next_sob_group_val[stream]++;
1338
1339         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1340                 /*
1341                  * Decrement as we reached the max value.
1342                  * The release function won't be called here as we've
1343                  * just incremented the refcount.
1344                  */
1345                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1346                                 gaudi_sob_group_reset_error);
1347                 cprop->next_sob_group_val[stream] = 1;
1348                 /* only two SOBs are currently in use */
1349                 cprop->curr_sob_group_idx[stream] =
1350                         (cprop->curr_sob_group_idx[stream] + 1) &
1351                                                         (HL_RSVD_SOBS - 1);
1352
1353                 gaudi_collective_map_sobs(hdev, stream);
1354
1355                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1356                                 cprop->curr_sob_group_idx[stream], stream);
1357         }
1358
1359         mb();
1360         hl_fence_put(cs->signal_fence);
1361         cs->signal_fence = NULL;
1362
1363         return 0;
1364 }
1365
1366 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1367                 struct hl_ctx *ctx, struct hl_cs *cs,
1368                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1369                 u32 encaps_signal_offset)
1370 {
1371         struct hw_queue_properties *hw_queue_prop;
1372         struct hl_cs_counters_atomic *cntr;
1373         struct hl_cs_job *job;
1374         struct hl_cb *cb;
1375         u32 cb_size;
1376         bool patched_cb;
1377
1378         cntr = &hdev->aggregated_cs_counters;
1379
1380         if (mode == HL_COLLECTIVE_MASTER) {
1381                 /* CB size of collective master queue contains
1382                  * 4 msg short packets for monitor 1 configuration
1383                  * 1 fence packet
1384                  * 4 msg short packets for monitor 2 configuration
1385                  * 1 fence packet
1386                  * 2 msg prot packets for completion and MSI-X
1387                  */
1388                 cb_size = sizeof(struct packet_msg_short) * 8 +
1389                                 sizeof(struct packet_fence) * 2 +
1390                                 sizeof(struct packet_msg_prot) * 2;
1391                 patched_cb = true;
1392         } else {
1393                 /* CB size of collective slave queues contains
1394                  * 4 msg short packets for monitor configuration
1395                  * 1 fence packet
1396                  * 1 additional msg short packet for sob signal
1397                  */
1398                 cb_size = sizeof(struct packet_msg_short) * 5 +
1399                                 sizeof(struct packet_fence);
1400                 patched_cb = false;
1401         }
1402
1403         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1404         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1405         if (!job) {
1406                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1407                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1408                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1409                 return -ENOMEM;
1410         }
1411
1412         /* Allocate internal mapped CB for non patched CBs */
1413         cb = hl_cb_kernel_create(hdev, cb_size,
1414                         hdev->mmu_enable && !patched_cb);
1415         if (!cb) {
1416                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1417                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1418                 kfree(job);
1419                 return -EFAULT;
1420         }
1421
1422         job->id = 0;
1423         job->cs = cs;
1424         job->user_cb = cb;
1425         atomic_inc(&job->user_cb->cs_cnt);
1426         job->user_cb_size = cb_size;
1427         job->hw_queue_id = queue_id;
1428
1429         /* since its guaranteed to have only one chunk in the collective wait
1430          * cs, we can use this chunk to set the encapsulated signal offset
1431          * in the jobs.
1432          */
1433         if (cs->encaps_signals)
1434                 job->encaps_sig_wait_offset = encaps_signal_offset;
1435
1436         /*
1437          * No need in parsing, user CB is the patched CB.
1438          * We call hl_cb_destroy() out of two reasons - we don't need
1439          * the CB in the CB idr anymore and to decrement its refcount as
1440          * it was incremented inside hl_cb_kernel_create().
1441          */
1442         if (patched_cb)
1443                 job->patched_cb = job->user_cb;
1444         else
1445                 job->patched_cb = NULL;
1446
1447         job->job_cb_size = job->user_cb_size;
1448         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1449
1450         /* increment refcount as for external queues we get completion */
1451         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1452                 cs_get(cs);
1453
1454         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1455
1456         list_add_tail(&job->cs_node, &cs->job_list);
1457
1458         hl_debugfs_add_job(hdev, job);
1459
1460         return 0;
1461 }
1462
1463 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1464                 struct hl_ctx *ctx, struct hl_cs *cs,
1465                 u32 wait_queue_id, u32 collective_engine_id,
1466                 u32 encaps_signal_offset)
1467 {
1468         struct gaudi_device *gaudi = hdev->asic_specific;
1469         struct hw_queue_properties *hw_queue_prop;
1470         u32 queue_id, collective_queue, num_jobs;
1471         u32 stream, nic_queue, nic_idx = 0;
1472         bool skip;
1473         int i, rc = 0;
1474
1475         /* Verify wait queue id is configured as master */
1476         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1477         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1478                 dev_err(hdev->dev,
1479                         "Queue %d is not configured as collective master\n",
1480                         wait_queue_id);
1481                 return -EINVAL;
1482         }
1483
1484         /* Verify engine id is supported */
1485         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1486                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1487                 dev_err(hdev->dev,
1488                         "Collective wait does not support engine %u\n",
1489                         collective_engine_id);
1490                 return -EINVAL;
1491         }
1492
1493         stream = wait_queue_id % 4;
1494
1495         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1496                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1497         else
1498                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1499
1500         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1501         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1502
1503         /* First job goes to the collective master queue, it will wait for
1504          * the collective slave queues to finish execution.
1505          * The synchronization is done using two monitors:
1506          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1507          * reduction engine (DMA5/TPC7).
1508          *
1509          * Rest of the jobs goes to the collective slave queues which will
1510          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1511          */
1512         for (i = 0 ; i < num_jobs ; i++) {
1513                 if (i == 0) {
1514                         queue_id = wait_queue_id;
1515                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1516                                 HL_COLLECTIVE_MASTER, queue_id,
1517                                 wait_queue_id, encaps_signal_offset);
1518                 } else {
1519                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1520                                 if (gaudi->hw_cap_initialized &
1521                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1522                                         skip = false;
1523                                 else
1524                                         skip = true;
1525
1526                                 queue_id = nic_queue;
1527                                 nic_queue += 4;
1528                                 nic_idx++;
1529
1530                                 if (skip)
1531                                         continue;
1532                         } else {
1533                                 queue_id = collective_queue;
1534                         }
1535
1536                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537                                 HL_COLLECTIVE_SLAVE, queue_id,
1538                                 wait_queue_id, encaps_signal_offset);
1539                 }
1540
1541                 if (rc)
1542                         return rc;
1543         }
1544
1545         return rc;
1546 }
1547
1548 static int gaudi_late_init(struct hl_device *hdev)
1549 {
1550         struct gaudi_device *gaudi = hdev->asic_specific;
1551         int rc;
1552
1553         rc = gaudi->cpucp_info_get(hdev);
1554         if (rc) {
1555                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1556                 return rc;
1557         }
1558
1559         if ((hdev->card_type == cpucp_card_type_pci) &&
1560                         (hdev->nic_ports_mask & 0x3)) {
1561                 dev_info(hdev->dev,
1562                         "PCI card detected, only 8 ports are enabled\n");
1563                 hdev->nic_ports_mask &= ~0x3;
1564
1565                 /* Stop and disable unused NIC QMANs */
1566                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1567                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1568                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1569
1570                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573
1574                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1575                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1576
1577                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1578         }
1579
1580         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1581         if (rc) {
1582                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1583                 return rc;
1584         }
1585
1586         /* Scrub both SRAM and DRAM */
1587         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1588         if (rc)
1589                 goto disable_pci_access;
1590
1591         rc = gaudi_fetch_psoc_frequency(hdev);
1592         if (rc) {
1593                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1594                 goto disable_pci_access;
1595         }
1596
1597         rc = gaudi_mmu_clear_pgt_range(hdev);
1598         if (rc) {
1599                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1600                 goto disable_pci_access;
1601         }
1602
1603         rc = gaudi_init_tpc_mem(hdev);
1604         if (rc) {
1605                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1606                 goto disable_pci_access;
1607         }
1608
1609         rc = gaudi_collective_init(hdev);
1610         if (rc) {
1611                 dev_err(hdev->dev, "Failed to init collective\n");
1612                 goto disable_pci_access;
1613         }
1614
1615         /* We only support a single ASID for the user, so for the sake of optimization, just
1616          * initialize the ASID one time during device initialization with the fixed value of 1
1617          */
1618         gaudi_mmu_prepare(hdev, 1);
1619
1620         return 0;
1621
1622 disable_pci_access:
1623         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1624
1625         return rc;
1626 }
1627
1628 static void gaudi_late_fini(struct hl_device *hdev)
1629 {
1630         const struct hwmon_channel_info **channel_info_arr;
1631         int i = 0;
1632
1633         if (!hdev->hl_chip_info->info)
1634                 return;
1635
1636         channel_info_arr = hdev->hl_chip_info->info;
1637
1638         while (channel_info_arr[i]) {
1639                 kfree(channel_info_arr[i]->config);
1640                 kfree(channel_info_arr[i]);
1641                 i++;
1642         }
1643
1644         kfree(channel_info_arr);
1645
1646         hdev->hl_chip_info->info = NULL;
1647 }
1648
1649 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1650 {
1651         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1652         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1653         int i, j, rc = 0;
1654
1655         /*
1656          * The device CPU works with 40-bits addresses, while bit 39 must be set
1657          * to '1' when accessing the host.
1658          * Bits 49:39 of the full host address are saved for a later
1659          * configuration of the HW to perform extension to 50 bits.
1660          * Because there is a single HW register that holds the extension bits,
1661          * these bits must be identical in all allocated range.
1662          */
1663
1664         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1665                 virt_addr_arr[i] =
1666                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1667                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1668                                                 &dma_addr_arr[i],
1669                                                 GFP_KERNEL | __GFP_ZERO);
1670                 if (!virt_addr_arr[i]) {
1671                         rc = -ENOMEM;
1672                         goto free_dma_mem_arr;
1673                 }
1674
1675                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1676                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1677                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1678                         break;
1679         }
1680
1681         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1682                 dev_err(hdev->dev,
1683                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1684                 rc = -EFAULT;
1685                 goto free_dma_mem_arr;
1686         }
1687
1688         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1689         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1690         hdev->cpu_pci_msb_addr =
1691                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1692
1693         if (!hdev->asic_prop.fw_security_enabled)
1694                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1695
1696 free_dma_mem_arr:
1697         for (j = 0 ; j < i ; j++)
1698                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1699                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1700                                                 virt_addr_arr[j],
1701                                                 dma_addr_arr[j]);
1702
1703         return rc;
1704 }
1705
1706 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1707 {
1708         struct gaudi_device *gaudi = hdev->asic_specific;
1709         struct gaudi_internal_qman_info *q;
1710         u32 i;
1711
1712         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1713                 q = &gaudi->internal_qmans[i];
1714                 if (!q->pq_kernel_addr)
1715                         continue;
1716                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1717                                                         q->pq_kernel_addr,
1718                                                         q->pq_dma_addr);
1719         }
1720 }
1721
1722 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1723 {
1724         struct gaudi_device *gaudi = hdev->asic_specific;
1725         struct gaudi_internal_qman_info *q;
1726         int rc, i;
1727
1728         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1729                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1730                         continue;
1731
1732                 q = &gaudi->internal_qmans[i];
1733
1734                 switch (i) {
1735                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1736                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1737                         break;
1738                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1739                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1740                         break;
1741                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1742                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1743                         break;
1744                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1745                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1746                         break;
1747                 default:
1748                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1749                         rc = -EINVAL;
1750                         goto free_internal_qmans_pq_mem;
1751                 }
1752
1753                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1754                                                 hdev, q->pq_size,
1755                                                 &q->pq_dma_addr,
1756                                                 GFP_KERNEL | __GFP_ZERO);
1757                 if (!q->pq_kernel_addr) {
1758                         rc = -ENOMEM;
1759                         goto free_internal_qmans_pq_mem;
1760                 }
1761         }
1762
1763         return 0;
1764
1765 free_internal_qmans_pq_mem:
1766         gaudi_free_internal_qmans_pq_mem(hdev);
1767         return rc;
1768 }
1769
1770 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1771 {
1772         struct asic_fixed_properties *prop = &hdev->asic_prop;
1773         struct pci_mem_region *region;
1774
1775         /* CFG */
1776         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1777         region->region_base = CFG_BASE;
1778         region->region_size = CFG_SIZE;
1779         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1780         region->bar_size = CFG_BAR_SIZE;
1781         region->bar_id = CFG_BAR_ID;
1782         region->used = 1;
1783
1784         /* SRAM */
1785         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1786         region->region_base = SRAM_BASE_ADDR;
1787         region->region_size = SRAM_SIZE;
1788         region->offset_in_bar = 0;
1789         region->bar_size = SRAM_BAR_SIZE;
1790         region->bar_id = SRAM_BAR_ID;
1791         region->used = 1;
1792
1793         /* DRAM */
1794         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1795         region->region_base = DRAM_PHYS_BASE;
1796         region->region_size = hdev->asic_prop.dram_size;
1797         region->offset_in_bar = 0;
1798         region->bar_size = prop->dram_pci_bar_size;
1799         region->bar_id = HBM_BAR_ID;
1800         region->used = 1;
1801
1802         /* SP SRAM */
1803         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1804         region->region_base = PSOC_SCRATCHPAD_ADDR;
1805         region->region_size = PSOC_SCRATCHPAD_SIZE;
1806         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1807         region->bar_size = CFG_BAR_SIZE;
1808         region->bar_id = CFG_BAR_ID;
1809         region->used = 1;
1810 }
1811
1812 static int gaudi_sw_init(struct hl_device *hdev)
1813 {
1814         struct gaudi_device *gaudi;
1815         u32 i, event_id = 0;
1816         int rc;
1817
1818         /* Allocate device structure */
1819         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1820         if (!gaudi)
1821                 return -ENOMEM;
1822
1823         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1824                 if (gaudi_irq_map_table[i].valid) {
1825                         if (event_id == GAUDI_EVENT_SIZE) {
1826                                 dev_err(hdev->dev,
1827                                         "Event array exceeds the limit of %u events\n",
1828                                         GAUDI_EVENT_SIZE);
1829                                 rc = -EINVAL;
1830                                 goto free_gaudi_device;
1831                         }
1832
1833                         gaudi->events[event_id++] =
1834                                         gaudi_irq_map_table[i].fc_id;
1835                 }
1836         }
1837
1838         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1839
1840         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1841
1842         hdev->asic_specific = gaudi;
1843
1844         /* Create DMA pool for small allocations */
1845         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1846                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1847         if (!hdev->dma_pool) {
1848                 dev_err(hdev->dev, "failed to create DMA pool\n");
1849                 rc = -ENOMEM;
1850                 goto free_gaudi_device;
1851         }
1852
1853         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1854         if (rc)
1855                 goto free_dma_pool;
1856
1857         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1858         if (!hdev->cpu_accessible_dma_pool) {
1859                 dev_err(hdev->dev,
1860                         "Failed to create CPU accessible DMA pool\n");
1861                 rc = -ENOMEM;
1862                 goto free_cpu_dma_mem;
1863         }
1864
1865         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1866                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1867                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1868         if (rc) {
1869                 dev_err(hdev->dev,
1870                         "Failed to add memory to CPU accessible DMA pool\n");
1871                 rc = -EFAULT;
1872                 goto free_cpu_accessible_dma_pool;
1873         }
1874
1875         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1876         if (rc)
1877                 goto free_cpu_accessible_dma_pool;
1878
1879         spin_lock_init(&gaudi->hw_queues_lock);
1880         mutex_init(&gaudi->clk_gate_mutex);
1881
1882         hdev->supports_sync_stream = true;
1883         hdev->supports_coresight = true;
1884         hdev->supports_staged_submission = true;
1885         hdev->supports_wait_for_multi_cs = true;
1886
1887         hdev->asic_funcs->set_pci_memory_regions(hdev);
1888         hdev->stream_master_qid_arr =
1889                                 hdev->asic_funcs->get_stream_master_qid_arr();
1890         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1891
1892         return 0;
1893
1894 free_cpu_accessible_dma_pool:
1895         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1896 free_cpu_dma_mem:
1897         if (!hdev->asic_prop.fw_security_enabled)
1898                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1899                                         hdev->cpu_pci_msb_addr);
1900         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1901                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1902                         hdev->cpu_accessible_dma_mem,
1903                         hdev->cpu_accessible_dma_address);
1904 free_dma_pool:
1905         dma_pool_destroy(hdev->dma_pool);
1906 free_gaudi_device:
1907         kfree(gaudi);
1908         return rc;
1909 }
1910
1911 static int gaudi_sw_fini(struct hl_device *hdev)
1912 {
1913         struct gaudi_device *gaudi = hdev->asic_specific;
1914
1915         gaudi_free_internal_qmans_pq_mem(hdev);
1916
1917         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1918
1919         if (!hdev->asic_prop.fw_security_enabled)
1920                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1921                                         hdev->cpu_pci_msb_addr);
1922
1923         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1924                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1925                         hdev->cpu_accessible_dma_mem,
1926                         hdev->cpu_accessible_dma_address);
1927
1928         dma_pool_destroy(hdev->dma_pool);
1929
1930         mutex_destroy(&gaudi->clk_gate_mutex);
1931
1932         kfree(gaudi);
1933
1934         return 0;
1935 }
1936
1937 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1938 {
1939         struct hl_device *hdev = arg;
1940         int i;
1941
1942         if (hdev->disabled)
1943                 return IRQ_HANDLED;
1944
1945         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1946                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1947
1948         hl_irq_handler_eq(irq, &hdev->event_queue);
1949
1950         return IRQ_HANDLED;
1951 }
1952
1953 /*
1954  * For backward compatibility, new MSI interrupts should be set after the
1955  * existing CPU and NIC interrupts.
1956  */
1957 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1958                                 bool cpu_eq)
1959 {
1960         int msi_vec;
1961
1962         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1963                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1964                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1965
1966         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1967                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1968
1969         return pci_irq_vector(hdev->pdev, msi_vec);
1970 }
1971
1972 static int gaudi_enable_msi_single(struct hl_device *hdev)
1973 {
1974         int rc, irq;
1975
1976         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1977
1978         irq = gaudi_pci_irq_vector(hdev, 0, false);
1979         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1980                         "gaudi single msi", hdev);
1981         if (rc)
1982                 dev_err(hdev->dev,
1983                         "Failed to request single MSI IRQ\n");
1984
1985         return rc;
1986 }
1987
1988 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1989 {
1990         int cq_cnt = hdev->asic_prop.completion_queues_count;
1991         int rc, i, irq_cnt_init, irq;
1992
1993         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1994                 irq = gaudi_pci_irq_vector(hdev, i, false);
1995                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1996                                 &hdev->completion_queue[i]);
1997                 if (rc) {
1998                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1999                         goto free_irqs;
2000                 }
2001         }
2002
2003         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2004         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2005                                 &hdev->event_queue);
2006         if (rc) {
2007                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2008                 goto free_irqs;
2009         }
2010
2011         return 0;
2012
2013 free_irqs:
2014         for (i = 0 ; i < irq_cnt_init ; i++)
2015                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2016                                 &hdev->completion_queue[i]);
2017         return rc;
2018 }
2019
2020 static int gaudi_enable_msi(struct hl_device *hdev)
2021 {
2022         struct gaudi_device *gaudi = hdev->asic_specific;
2023         int rc;
2024
2025         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2026                 return 0;
2027
2028         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2029         if (rc < 0) {
2030                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031                 return rc;
2032         }
2033
2034         if (rc < NUMBER_OF_INTERRUPTS) {
2035                 gaudi->multi_msi_mode = false;
2036                 rc = gaudi_enable_msi_single(hdev);
2037         } else {
2038                 gaudi->multi_msi_mode = true;
2039                 rc = gaudi_enable_msi_multi(hdev);
2040         }
2041
2042         if (rc)
2043                 goto free_pci_irq_vectors;
2044
2045         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2046
2047         return 0;
2048
2049 free_pci_irq_vectors:
2050         pci_free_irq_vectors(hdev->pdev);
2051         return rc;
2052 }
2053
2054 static void gaudi_sync_irqs(struct hl_device *hdev)
2055 {
2056         struct gaudi_device *gaudi = hdev->asic_specific;
2057         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2058
2059         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060                 return;
2061
2062         /* Wait for all pending IRQs to be finished */
2063         if (gaudi->multi_msi_mode) {
2064                 for (i = 0 ; i < cq_cnt ; i++)
2065                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2066
2067                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2068                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2069                                                 true));
2070         } else {
2071                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2072         }
2073 }
2074
2075 static void gaudi_disable_msi(struct hl_device *hdev)
2076 {
2077         struct gaudi_device *gaudi = hdev->asic_specific;
2078         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2079
2080         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2081                 return;
2082
2083         gaudi_sync_irqs(hdev);
2084
2085         if (gaudi->multi_msi_mode) {
2086                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2087                                                 true);
2088                 free_irq(irq, &hdev->event_queue);
2089
2090                 for (i = 0 ; i < cq_cnt ; i++) {
2091                         irq = gaudi_pci_irq_vector(hdev, i, false);
2092                         free_irq(irq, &hdev->completion_queue[i]);
2093                 }
2094         } else {
2095                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2096         }
2097
2098         pci_free_irq_vectors(hdev->pdev);
2099
2100         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2101 }
2102
2103 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2104 {
2105         struct gaudi_device *gaudi = hdev->asic_specific;
2106
2107         if (hdev->asic_prop.fw_security_enabled)
2108                 return;
2109
2110         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2111                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2112                 return;
2113
2114         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2115                 return;
2116
2117         if (!hdev->sram_scrambler_enable)
2118                 return;
2119
2120         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2121                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2123                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2125                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2127                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2129                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2131                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2133                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2135                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136
2137         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2138                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153
2154         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2155                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2156         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2157                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2159                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2161                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2163                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2165                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2167                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2169                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170
2171         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2172 }
2173
2174 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2175 {
2176         struct gaudi_device *gaudi = hdev->asic_specific;
2177
2178         if (hdev->asic_prop.fw_security_enabled)
2179                 return;
2180
2181         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2182                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2183                 return;
2184
2185         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2186                 return;
2187
2188         if (!hdev->dram_scrambler_enable)
2189                 return;
2190
2191         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2192                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2193         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2194                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2196                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2198                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2200                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2202                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2204                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2206                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207
2208         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224
2225         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2226                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2227         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2228                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2230                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2232                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2234                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2236                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2238                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2240                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241
2242         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2243 }
2244
2245 static void gaudi_init_e2e(struct hl_device *hdev)
2246 {
2247         if (hdev->asic_prop.fw_security_enabled)
2248                 return;
2249
2250         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2251                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2252                 return;
2253
2254         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2255         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2256         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2257         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2258
2259         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263
2264         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268
2269         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273
2274         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278
2279         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283
2284         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288
2289         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2290         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2291         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2292         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2293
2294         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2295         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2296         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2297         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2298
2299         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2300         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2301         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2302         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2303
2304         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2305         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2306         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2307         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2308
2309         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2310         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2311         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2312         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2313
2314         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2315         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2316         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2317         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2318
2319         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2320         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2322         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2323
2324         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2325         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2326         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2327         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2328
2329         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2330         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2331         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2332         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2333
2334         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338
2339         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343
2344         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348
2349         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353
2354         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2355         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2356         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2357         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2358
2359         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2360         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2361         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2362         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2363
2364         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2365         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2366         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2367         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2368
2369         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2370         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2371         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2372         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2373
2374         if (!hdev->dram_scrambler_enable) {
2375                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2376                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2377                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2378                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2379
2380                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2381                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2382                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2383                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2384
2385                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2386                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2387                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2388                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2389
2390                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2391                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2392                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2393                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2394
2395                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2396                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2397                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2398                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2399
2400                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2401                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2402                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2403                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2404
2405                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2406                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2407                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2408                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2409
2410                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2411                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2412                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2413                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2414
2415                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2416                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2417                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2418                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2419
2420                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2421                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2422                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2423                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2424
2425                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2426                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2427                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2428                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2429
2430                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2431                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2432                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2433                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2434
2435                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2436                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2437                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2438                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2439
2440                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2441                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2442                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2443                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2444
2445                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2446                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2447                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2448                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2449
2450                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2451                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2452                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2453                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2454
2455                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459
2460                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464
2465                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469
2470                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474
2475                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2476                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2477                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2478                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2479
2480                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2481                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2482                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2483                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2484
2485                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2486                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2487                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2488                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2489
2490                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2491                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2492                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2493                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2494         }
2495
2496         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2497                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2499                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500
2501         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2502                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2504                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505
2506         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2507                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2509                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510
2511         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2512                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2514                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515
2516         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2517                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2519                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520
2521         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2522                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2524                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525
2526         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2527                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2529                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530
2531         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2532                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2534                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535
2536         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2537                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2539                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540
2541         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2542                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2544                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545
2546         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2547                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2549                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550
2551         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2552                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2554                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555
2556         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2557                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2559                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560
2561         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2562                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2564                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565
2566         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2567                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2569                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570
2571         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2572                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2574                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575
2576         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2577                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2579                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580
2581         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2582                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2584                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585
2586         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2587                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2589                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590
2591         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2592                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2594                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595
2596         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2597                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2599                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600
2601         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2602                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2604                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605
2606         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2607                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2609                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610
2611         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2612                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2614                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615 }
2616
2617 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2618 {
2619         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2620
2621         if (hdev->asic_prop.fw_security_enabled)
2622                 return;
2623
2624         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2625                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2626                 return;
2627
2628         hbm0_wr = 0x33333333;
2629         hbm0_rd = 0x77777777;
2630         hbm1_wr = 0x55555555;
2631         hbm1_rd = 0xDDDDDDDD;
2632
2633         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2634         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2635         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2636         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2637
2638         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2639         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2640         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2641         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2642
2643         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2644         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2645         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2646         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2647
2648         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2649         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2650         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2651         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2652
2653         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2654                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2655                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2656         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2657                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2660                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2663                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665
2666         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2667                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2668                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2669         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2670                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2673                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2676                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678 }
2679
2680 static void gaudi_init_golden_registers(struct hl_device *hdev)
2681 {
2682         u32 tpc_offset;
2683         int tpc_id, i;
2684
2685         gaudi_init_e2e(hdev);
2686         gaudi_init_hbm_cred(hdev);
2687
2688         for (tpc_id = 0, tpc_offset = 0;
2689                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2690                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2691                 /* Mask all arithmetic interrupts from TPC */
2692                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2693                 /* Set 16 cache lines */
2694                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2695                                 ICACHE_FETCH_LINE_NUM, 2);
2696         }
2697
2698         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2699         for (i = 0 ; i < 128 ; i += 8)
2700                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2701
2702         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2703         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2704         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 }
2707
2708 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2709                                         int qman_id, dma_addr_t qman_pq_addr)
2710 {
2711         struct cpu_dyn_regs *dyn_regs =
2712                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2714         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2715         u32 q_off, dma_qm_offset;
2716         u32 dma_qm_err_cfg, irq_handler_offset;
2717
2718         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2719
2720         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2721                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2722         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2723                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724         so_base_en_lo = lower_32_bits(CFG_BASE +
2725                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2726         so_base_en_hi = upper_32_bits(CFG_BASE +
2727                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2729                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2730         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2731                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732         so_base_ws_lo = lower_32_bits(CFG_BASE +
2733                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2734         so_base_ws_hi = upper_32_bits(CFG_BASE +
2735                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736
2737         q_off = dma_qm_offset + qman_id * 4;
2738
2739         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2740         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2741
2742         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2743         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2744         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2745
2746         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2747         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2748                                                         QMAN_LDMA_SRC_OFFSET);
2749         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2750                                                         QMAN_LDMA_DST_OFFSET);
2751
2752         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2753         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2754         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2755         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2756         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2757         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2758         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2759         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2760
2761         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2762
2763         /* The following configuration is needed only once per QMAN */
2764         if (qman_id == 0) {
2765                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2766                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2767                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2768
2769                 /* Configure RAZWI IRQ */
2770                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2771                 if (hdev->stop_on_err)
2772                         dma_qm_err_cfg |=
2773                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2774
2775                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2776
2777                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2778                         lower_32_bits(CFG_BASE + irq_handler_offset));
2779                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2780                         upper_32_bits(CFG_BASE + irq_handler_offset));
2781
2782                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2783                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2784                                                                         dma_id);
2785
2786                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2787                                 QM_ARB_ERR_MSG_EN_MASK);
2788
2789                 /* Increase ARB WDT to support streams architecture */
2790                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2791                                 GAUDI_ARB_WDT_TIMEOUT);
2792
2793                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2794                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2795
2796                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2797         }
2798 }
2799
2800 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2801 {
2802         struct cpu_dyn_regs *dyn_regs =
2803                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2804         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2805         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2806         u32 irq_handler_offset;
2807
2808         /* Set to maximum possible according to physical size */
2809         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2810         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2811
2812         /* WA for H/W bug H3-2116 */
2813         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2814
2815         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2816         if (hdev->stop_on_err)
2817                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2818
2819         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2820
2821         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2822                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2823                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2824
2825         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2826                 lower_32_bits(CFG_BASE + irq_handler_offset));
2827         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2828                 upper_32_bits(CFG_BASE + irq_handler_offset));
2829
2830         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2831                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2832         WREG32(mmDMA0_CORE_PROT + dma_offset,
2833                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2834         /* If the channel is secured, it should be in MMU bypass mode */
2835         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2836                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2837         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2838 }
2839
2840 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2841                                 u32 enable_mask)
2842 {
2843         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2844
2845         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2846 }
2847
2848 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2849 {
2850         struct gaudi_device *gaudi = hdev->asic_specific;
2851         struct hl_hw_queue *q;
2852         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2853
2854         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2855                 return;
2856
2857         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2858                 dma_id = gaudi_dma_assignment[i];
2859                 /*
2860                  * For queues after the CPU Q need to add 1 to get the correct
2861                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2862                  * order to get the correct MSI register.
2863                  */
2864                 if (dma_id > 1) {
2865                         cpu_skip = 1;
2866                         nic_skip = NIC_NUMBER_OF_ENGINES;
2867                 } else {
2868                         cpu_skip = 0;
2869                         nic_skip = 0;
2870                 }
2871
2872                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2873                         q_idx = 4 * dma_id + j + cpu_skip;
2874                         q = &hdev->kernel_queues[q_idx];
2875                         q->cq_id = cq_id++;
2876                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2877                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2878                                                 q->bus_address);
2879                 }
2880
2881                 gaudi_init_dma_core(hdev, dma_id);
2882
2883                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2884         }
2885
2886         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2887 }
2888
2889 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2890                                         int qman_id, u64 qman_base_addr)
2891 {
2892         struct cpu_dyn_regs *dyn_regs =
2893                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2894         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2895         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2896         u32 dma_qm_err_cfg, irq_handler_offset;
2897         u32 q_off, dma_qm_offset;
2898
2899         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2900
2901         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2902                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2903         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2904                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905         so_base_en_lo = lower_32_bits(CFG_BASE +
2906                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2907         so_base_en_hi = upper_32_bits(CFG_BASE +
2908                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2910                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2912                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913         so_base_ws_lo = lower_32_bits(CFG_BASE +
2914                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915         so_base_ws_hi = upper_32_bits(CFG_BASE +
2916                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917
2918         q_off = dma_qm_offset + qman_id * 4;
2919
2920         if (qman_id < 4) {
2921                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2922                                         lower_32_bits(qman_base_addr));
2923                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2924                                         upper_32_bits(qman_base_addr));
2925
2926                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2927                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2928                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2929
2930                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2931                                                         QMAN_CPDMA_SIZE_OFFSET);
2932                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2933                                                         QMAN_CPDMA_SRC_OFFSET);
2934                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2935                                                         QMAN_CPDMA_DST_OFFSET);
2936         } else {
2937                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2938                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2939                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2940
2941                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2942                                                         QMAN_LDMA_SIZE_OFFSET);
2943                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2944                                                         QMAN_LDMA_SRC_OFFSET);
2945                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2946                                                         QMAN_LDMA_DST_OFFSET);
2947
2948                 /* Configure RAZWI IRQ */
2949                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2950                 if (hdev->stop_on_err)
2951                         dma_qm_err_cfg |=
2952                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2953
2954                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2955
2956                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2957                         lower_32_bits(CFG_BASE + irq_handler_offset));
2958                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2959                         upper_32_bits(CFG_BASE + irq_handler_offset));
2960
2961                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2962                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2963                                                                         dma_id);
2964
2965                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2966                                 QM_ARB_ERR_MSG_EN_MASK);
2967
2968                 /* Increase ARB WDT to support streams architecture */
2969                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2970                                 GAUDI_ARB_WDT_TIMEOUT);
2971
2972                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2973                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2974                                 QMAN_INTERNAL_MAKE_TRUSTED);
2975         }
2976
2977         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2978         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2979         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2980         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2981
2982         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2983         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2984                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2985                                 mtr_base_ws_lo);
2986                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2987                                 mtr_base_ws_hi);
2988                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2989                                 so_base_ws_lo);
2990                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2991                                 so_base_ws_hi);
2992         }
2993 }
2994
2995 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2996 {
2997         struct gaudi_device *gaudi = hdev->asic_specific;
2998         struct gaudi_internal_qman_info *q;
2999         u64 qman_base_addr;
3000         int i, j, dma_id, internal_q_index;
3001
3002         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3003                 return;
3004
3005         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3006                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3007
3008                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3009                          /*
3010                           * Add the CPU queue in order to get the correct queue
3011                           * number as all internal queue are placed after it
3012                           */
3013                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3014
3015                         q = &gaudi->internal_qmans[internal_q_index];
3016                         qman_base_addr = (u64) q->pq_dma_addr;
3017                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3018                                                 qman_base_addr);
3019                 }
3020
3021                 /* Initializing lower CP for HBM DMA QMAN */
3022                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3023
3024                 gaudi_init_dma_core(hdev, dma_id);
3025
3026                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3027         }
3028
3029         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3030 }
3031
3032 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3033                                         int qman_id, u64 qman_base_addr)
3034 {
3035         struct cpu_dyn_regs *dyn_regs =
3036                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3037         u32 mtr_base_lo, mtr_base_hi;
3038         u32 so_base_lo, so_base_hi;
3039         u32 irq_handler_offset;
3040         u32 q_off, mme_id;
3041         u32 mme_qm_err_cfg;
3042
3043         mtr_base_lo = lower_32_bits(CFG_BASE +
3044                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3045         mtr_base_hi = upper_32_bits(CFG_BASE +
3046                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047         so_base_lo = lower_32_bits(CFG_BASE +
3048                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3049         so_base_hi = upper_32_bits(CFG_BASE +
3050                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051
3052         q_off = mme_offset + qman_id * 4;
3053
3054         if (qman_id < 4) {
3055                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3056                                         lower_32_bits(qman_base_addr));
3057                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3058                                         upper_32_bits(qman_base_addr));
3059
3060                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3061                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3062                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3063
3064                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3065                                                         QMAN_CPDMA_SIZE_OFFSET);
3066                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3067                                                         QMAN_CPDMA_SRC_OFFSET);
3068                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3069                                                         QMAN_CPDMA_DST_OFFSET);
3070         } else {
3071                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3072                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3073                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3074
3075                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3076                                                         QMAN_LDMA_SIZE_OFFSET);
3077                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3078                                                         QMAN_LDMA_SRC_OFFSET);
3079                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3080                                                         QMAN_LDMA_DST_OFFSET);
3081
3082                 /* Configure RAZWI IRQ */
3083                 mme_id = mme_offset /
3084                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3085
3086                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3087                 if (hdev->stop_on_err)
3088                         mme_qm_err_cfg |=
3089                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3090
3091                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3092
3093                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3094                         lower_32_bits(CFG_BASE + irq_handler_offset));
3095                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3096                         upper_32_bits(CFG_BASE + irq_handler_offset));
3097
3098                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3099                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3100                                                                         mme_id);
3101
3102                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3103                                 QM_ARB_ERR_MSG_EN_MASK);
3104
3105                 /* Increase ARB WDT to support streams architecture */
3106                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3107                                 GAUDI_ARB_WDT_TIMEOUT);
3108
3109                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3110                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3111                                 QMAN_INTERNAL_MAKE_TRUSTED);
3112         }
3113
3114         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3115         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3116         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3117         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3118 }
3119
3120 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3121 {
3122         struct gaudi_device *gaudi = hdev->asic_specific;
3123         struct gaudi_internal_qman_info *q;
3124         u64 qman_base_addr;
3125         u32 mme_offset;
3126         int i, internal_q_index;
3127
3128         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3129                 return;
3130
3131         /*
3132          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3133          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3134          */
3135
3136         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3137
3138         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3139                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3140                 q = &gaudi->internal_qmans[internal_q_index];
3141                 qman_base_addr = (u64) q->pq_dma_addr;
3142                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3143                                         qman_base_addr);
3144                 if (i == 3)
3145                         mme_offset = 0;
3146         }
3147
3148         /* Initializing lower CP for MME QMANs */
3149         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3150         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3151         gaudi_init_mme_qman(hdev, 0, 4, 0);
3152
3153         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3154         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3155
3156         gaudi->hw_cap_initialized |= HW_CAP_MME;
3157 }
3158
3159 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3160                                 int qman_id, u64 qman_base_addr)
3161 {
3162         struct cpu_dyn_regs *dyn_regs =
3163                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3164         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3165         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3166         u32 tpc_qm_err_cfg, irq_handler_offset;
3167         u32 q_off, tpc_id;
3168
3169         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3170                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3172                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173         so_base_en_lo = lower_32_bits(CFG_BASE +
3174                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175         so_base_en_hi = upper_32_bits(CFG_BASE +
3176                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3178                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3179         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3180                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181         so_base_ws_lo = lower_32_bits(CFG_BASE +
3182                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3183         so_base_ws_hi = upper_32_bits(CFG_BASE +
3184                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185
3186         q_off = tpc_offset + qman_id * 4;
3187
3188         tpc_id = tpc_offset /
3189                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3190
3191         if (qman_id < 4) {
3192                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3193                                         lower_32_bits(qman_base_addr));
3194                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3195                                         upper_32_bits(qman_base_addr));
3196
3197                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3198                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3199                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3200
3201                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3202                                                         QMAN_CPDMA_SIZE_OFFSET);
3203                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3204                                                         QMAN_CPDMA_SRC_OFFSET);
3205                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3206                                                         QMAN_CPDMA_DST_OFFSET);
3207         } else {
3208                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3209                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3210                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3211
3212                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3213                                                         QMAN_LDMA_SIZE_OFFSET);
3214                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3215                                                         QMAN_LDMA_SRC_OFFSET);
3216                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3217                                                         QMAN_LDMA_DST_OFFSET);
3218
3219                 /* Configure RAZWI IRQ */
3220                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3221                 if (hdev->stop_on_err)
3222                         tpc_qm_err_cfg |=
3223                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3224
3225                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3226
3227                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3228                         lower_32_bits(CFG_BASE + irq_handler_offset));
3229                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3230                         upper_32_bits(CFG_BASE + irq_handler_offset));
3231
3232                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3233                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3234                                                                         tpc_id);
3235
3236                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3237                                 QM_ARB_ERR_MSG_EN_MASK);
3238
3239                 /* Increase ARB WDT to support streams architecture */
3240                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3241                                 GAUDI_ARB_WDT_TIMEOUT);
3242
3243                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3244                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3245                                 QMAN_INTERNAL_MAKE_TRUSTED);
3246         }
3247
3248         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3249         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3250         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3251         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3252
3253         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3254         if (tpc_id == 6) {
3255                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3256                                 mtr_base_ws_lo);
3257                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3258                                 mtr_base_ws_hi);
3259                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3260                                 so_base_ws_lo);
3261                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3262                                 so_base_ws_hi);
3263         }
3264 }
3265
3266 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3267 {
3268         struct gaudi_device *gaudi = hdev->asic_specific;
3269         struct gaudi_internal_qman_info *q;
3270         u64 qman_base_addr;
3271         u32 so_base_hi, tpc_offset = 0;
3272         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3273                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3274         int i, tpc_id, internal_q_index;
3275
3276         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3277                 return;
3278
3279         so_base_hi = upper_32_bits(CFG_BASE +
3280                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3281
3282         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3283                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3284                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3285                                                 tpc_id * QMAN_STREAMS + i;
3286                         q = &gaudi->internal_qmans[internal_q_index];
3287                         qman_base_addr = (u64) q->pq_dma_addr;
3288                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3289                                                 qman_base_addr);
3290
3291                         if (i == 3) {
3292                                 /* Initializing lower CP for TPC QMAN */
3293                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3294
3295                                 /* Enable the QMAN and TPC channel */
3296                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3297                                                 QMAN_TPC_ENABLE);
3298                         }
3299                 }
3300
3301                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3302                                 so_base_hi);
3303
3304                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3305
3306                 gaudi->hw_cap_initialized |=
3307                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3308         }
3309 }
3310
3311 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3312                                 int qman_id, u64 qman_base_addr, int nic_id)
3313 {
3314         struct cpu_dyn_regs *dyn_regs =
3315                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3316         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3317         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3318         u32 nic_qm_err_cfg, irq_handler_offset;
3319         u32 q_off;
3320
3321         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3322                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3323         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3324                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325         so_base_en_lo = lower_32_bits(CFG_BASE +
3326                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3327         so_base_en_hi = upper_32_bits(CFG_BASE +
3328                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3330                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3331         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3332                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333         so_base_ws_lo = lower_32_bits(CFG_BASE +
3334                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3335         so_base_ws_hi = upper_32_bits(CFG_BASE +
3336                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337
3338         q_off = nic_offset + qman_id * 4;
3339
3340         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3341         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3342
3343         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3344         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3345         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3346
3347         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3348                                                         QMAN_LDMA_SIZE_OFFSET);
3349         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3350                                                         QMAN_LDMA_SRC_OFFSET);
3351         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3352                                                         QMAN_LDMA_DST_OFFSET);
3353
3354         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3355         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3356         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3357         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3358
3359         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3360         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3361         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3362         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3363         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3364
3365         if (qman_id == 0) {
3366                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3367                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3368                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3369
3370                 /* Configure RAZWI IRQ */
3371                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3372                 if (hdev->stop_on_err)
3373                         nic_qm_err_cfg |=
3374                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3375
3376                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3377
3378                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3379                         lower_32_bits(CFG_BASE + irq_handler_offset));
3380                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3381                         upper_32_bits(CFG_BASE + irq_handler_offset));
3382
3383                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3384                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3385                                                                         nic_id);
3386
3387                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3388                                 QM_ARB_ERR_MSG_EN_MASK);
3389
3390                 /* Increase ARB WDT to support streams architecture */
3391                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3392                                 GAUDI_ARB_WDT_TIMEOUT);
3393
3394                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3395                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3396                                 QMAN_INTERNAL_MAKE_TRUSTED);
3397         }
3398 }
3399
3400 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3401 {
3402         struct gaudi_device *gaudi = hdev->asic_specific;
3403         struct gaudi_internal_qman_info *q;
3404         u64 qman_base_addr;
3405         u32 nic_offset = 0;
3406         u32 nic_delta_between_qmans =
3407                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408         u32 nic_delta_between_nics =
3409                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410         int i, nic_id, internal_q_index;
3411
3412         if (!hdev->nic_ports_mask)
3413                 return;
3414
3415         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3416                 return;
3417
3418         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3419
3420         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3421                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3422                         nic_offset += nic_delta_between_qmans;
3423                         if (nic_id & 1) {
3424                                 nic_offset -= (nic_delta_between_qmans * 2);
3425                                 nic_offset += nic_delta_between_nics;
3426                         }
3427                         continue;
3428                 }
3429
3430                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3431                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3432                                                 nic_id * QMAN_STREAMS + i;
3433                         q = &gaudi->internal_qmans[internal_q_index];
3434                         qman_base_addr = (u64) q->pq_dma_addr;
3435                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3436                                                 qman_base_addr, nic_id);
3437                 }
3438
3439                 /* Enable the QMAN */
3440                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3441
3442                 nic_offset += nic_delta_between_qmans;
3443                 if (nic_id & 1) {
3444                         nic_offset -= (nic_delta_between_qmans * 2);
3445                         nic_offset += nic_delta_between_nics;
3446                 }
3447
3448                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3449         }
3450 }
3451
3452 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3453 {
3454         struct gaudi_device *gaudi = hdev->asic_specific;
3455
3456         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457                 return;
3458
3459         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3460         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3461         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3462 }
3463
3464 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3465 {
3466         struct gaudi_device *gaudi = hdev->asic_specific;
3467
3468         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3469                 return;
3470
3471         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3472         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3473         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3474         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3475         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3476 }
3477
3478 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3479 {
3480         struct gaudi_device *gaudi = hdev->asic_specific;
3481
3482         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483                 return;
3484
3485         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3486         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3487 }
3488
3489 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3490 {
3491         struct gaudi_device *gaudi = hdev->asic_specific;
3492         u32 tpc_offset = 0;
3493         int tpc_id;
3494
3495         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496                 return;
3497
3498         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3499                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3500                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3501         }
3502 }
3503
3504 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3505 {
3506         struct gaudi_device *gaudi = hdev->asic_specific;
3507         u32 nic_mask, nic_offset = 0;
3508         u32 nic_delta_between_qmans =
3509                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3510         u32 nic_delta_between_nics =
3511                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512         int nic_id;
3513
3514         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3515                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3516
3517                 if (gaudi->hw_cap_initialized & nic_mask)
3518                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3519
3520                 nic_offset += nic_delta_between_qmans;
3521                 if (nic_id & 1) {
3522                         nic_offset -= (nic_delta_between_qmans * 2);
3523                         nic_offset += nic_delta_between_nics;
3524                 }
3525         }
3526 }
3527
3528 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3529 {
3530         struct gaudi_device *gaudi = hdev->asic_specific;
3531
3532         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3533                 return;
3534
3535         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3536         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3538         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 }
3540
3541 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3542 {
3543         struct gaudi_device *gaudi = hdev->asic_specific;
3544
3545         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3546                 return;
3547
3548         /* Stop CPs of HBM DMA QMANs */
3549
3550         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3551         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 }
3556
3557 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3558 {
3559         struct gaudi_device *gaudi = hdev->asic_specific;
3560
3561         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3562                 return;
3563
3564         /* Stop CPs of MME QMANs */
3565         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567 }
3568
3569 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3570 {
3571         struct gaudi_device *gaudi = hdev->asic_specific;
3572
3573         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3574                 return;
3575
3576         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3578         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 }
3585
3586 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3587 {
3588         struct gaudi_device *gaudi = hdev->asic_specific;
3589
3590         /* Stop upper CPs of QMANs */
3591
3592         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3593                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3594                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597
3598         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3599                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3600                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603
3604         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3605                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3606                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609
3610         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3611                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3612                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615
3616         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3617                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3618                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621
3622         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3623                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3624                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627
3628         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3629                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3630                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633
3634         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3635                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3636                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639
3640         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3641                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3642                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645
3646         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3647                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3648                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651 }
3652
3653 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3654 {
3655         struct gaudi_device *gaudi = hdev->asic_specific;
3656
3657         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3658                 return;
3659
3660         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3661         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3662         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 }
3664
3665 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3666 {
3667         struct gaudi_device *gaudi = hdev->asic_specific;
3668
3669         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3670                 return;
3671
3672         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3673         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3674         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 }
3678
3679 static void gaudi_mme_stall(struct hl_device *hdev)
3680 {
3681         struct gaudi_device *gaudi = hdev->asic_specific;
3682
3683         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3684                 return;
3685
3686         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3687         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3688         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3689         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3690         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3691         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3693         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3695         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3697         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3699         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3701         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3703 }
3704
3705 static void gaudi_tpc_stall(struct hl_device *hdev)
3706 {
3707         struct gaudi_device *gaudi = hdev->asic_specific;
3708
3709         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3710                 return;
3711
3712         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3713         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3714         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 }
3721
3722 static void gaudi_set_clock_gating(struct hl_device *hdev)
3723 {
3724         struct gaudi_device *gaudi = hdev->asic_specific;
3725         u32 qman_offset;
3726         bool enable;
3727         int i;
3728
3729         /* In case we are during debug session, don't enable the clock gate
3730          * as it may interfere
3731          */
3732         if (hdev->in_debug)
3733                 return;
3734
3735         if (hdev->asic_prop.fw_security_enabled)
3736                 return;
3737
3738         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3739                 enable = !!(hdev->clock_gating_mask &
3740                                 (BIT_ULL(gaudi_dma_assignment[i])));
3741
3742                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3743                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3744                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3745                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3746                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3747         }
3748
3749         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3750                 enable = !!(hdev->clock_gating_mask &
3751                                 (BIT_ULL(gaudi_dma_assignment[i])));
3752
3753                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3754                  * we need to not enable clock gating in that DMA
3755                  */
3756                 if (i == GAUDI_HBM_DMA_4)
3757                         enable = 0;
3758
3759                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3760                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3761                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3763                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764         }
3765
3766         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3767         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3768         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3769
3770         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3771         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3772         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3773
3774         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3775                 enable = !!(hdev->clock_gating_mask &
3776                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3777
3778                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3779                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3780                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3781                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3782
3783                 qman_offset += TPC_QMAN_OFFSET;
3784         }
3785
3786         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3787 }
3788
3789 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3790 {
3791         struct gaudi_device *gaudi = hdev->asic_specific;
3792         u32 qman_offset;
3793         int i;
3794
3795         if (hdev->asic_prop.fw_security_enabled)
3796                 return;
3797
3798         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3799                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3800                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3801
3802                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3803         }
3804
3805         WREG32(mmMME0_QM_CGM_CFG, 0);
3806         WREG32(mmMME0_QM_CGM_CFG1, 0);
3807         WREG32(mmMME2_QM_CGM_CFG, 0);
3808         WREG32(mmMME2_QM_CGM_CFG1, 0);
3809
3810         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3811                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3812                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3813
3814                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3815         }
3816
3817         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3818 }
3819
3820 static void gaudi_enable_timestamp(struct hl_device *hdev)
3821 {
3822         /* Disable the timestamp counter */
3823         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3824
3825         /* Zero the lower/upper parts of the 64-bit counter */
3826         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3827         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3828
3829         /* Enable the counter */
3830         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3831 }
3832
3833 static void gaudi_disable_timestamp(struct hl_device *hdev)
3834 {
3835         /* Disable the timestamp counter */
3836         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3837 }
3838
3839 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3840 {
3841         u32 wait_timeout_ms;
3842
3843         dev_info(hdev->dev,
3844                 "Halting compute engines and disabling interrupts\n");
3845
3846         if (hdev->pldm)
3847                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3848         else
3849                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3850
3851         if (fw_reset)
3852                 goto skip_engines;
3853
3854         gaudi_stop_nic_qmans(hdev);
3855         gaudi_stop_mme_qmans(hdev);
3856         gaudi_stop_tpc_qmans(hdev);
3857         gaudi_stop_hbm_dma_qmans(hdev);
3858         gaudi_stop_pci_dma_qmans(hdev);
3859
3860         hdev->asic_funcs->disable_clock_gating(hdev);
3861
3862         msleep(wait_timeout_ms);
3863
3864         gaudi_pci_dma_stall(hdev);
3865         gaudi_hbm_dma_stall(hdev);
3866         gaudi_tpc_stall(hdev);
3867         gaudi_mme_stall(hdev);
3868
3869         msleep(wait_timeout_ms);
3870
3871         gaudi_disable_nic_qmans(hdev);
3872         gaudi_disable_mme_qmans(hdev);
3873         gaudi_disable_tpc_qmans(hdev);
3874         gaudi_disable_hbm_dma_qmans(hdev);
3875         gaudi_disable_pci_dma_qmans(hdev);
3876
3877         gaudi_disable_timestamp(hdev);
3878
3879 skip_engines:
3880         gaudi_disable_msi(hdev);
3881 }
3882
3883 static int gaudi_mmu_init(struct hl_device *hdev)
3884 {
3885         struct asic_fixed_properties *prop = &hdev->asic_prop;
3886         struct gaudi_device *gaudi = hdev->asic_specific;
3887         u64 hop0_addr;
3888         int rc, i;
3889
3890         if (!hdev->mmu_enable)
3891                 return 0;
3892
3893         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3894                 return 0;
3895
3896         for (i = 0 ; i < prop->max_asid ; i++) {
3897                 hop0_addr = prop->mmu_pgt_addr +
3898                                 (i * prop->mmu_hop_table_size);
3899
3900                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3901                 if (rc) {
3902                         dev_err(hdev->dev,
3903                                 "failed to set hop0 addr for asid %d\n", i);
3904                         goto err;
3905                 }
3906         }
3907
3908         /* init MMU cache manage page */
3909         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3910         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3911
3912         /* mem cache invalidation */
3913         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3914
3915         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3916
3917         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3918         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3919
3920         WREG32(mmSTLB_HOP_CONFIGURATION,
3921                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3922
3923         /*
3924          * The H/W expects the first PI after init to be 1. After wraparound
3925          * we'll write 0.
3926          */
3927         gaudi->mmu_cache_inv_pi = 1;
3928
3929         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3930
3931         return 0;
3932
3933 err:
3934         return rc;
3935 }
3936
3937 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3938 {
3939         void __iomem *dst;
3940
3941         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3942
3943         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3944 }
3945
3946 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3947 {
3948         void __iomem *dst;
3949
3950         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3951
3952         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3953 }
3954
3955 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3956 {
3957         struct dynamic_fw_load_mgr *dynamic_loader;
3958         struct cpu_dyn_regs *dyn_regs;
3959
3960         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3961
3962         /*
3963          * here we update initial values for few specific dynamic regs (as
3964          * before reading the first descriptor from FW those value has to be
3965          * hard-coded) in later stages of the protocol those values will be
3966          * updated automatically by reading the FW descriptor so data there
3967          * will always be up-to-date
3968          */
3969         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3970         dyn_regs->kmd_msg_to_cpu =
3971                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3972         dyn_regs->cpu_cmd_status_to_host =
3973                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3974
3975         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3976 }
3977
3978 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3979 {
3980         struct static_fw_load_mgr *static_loader;
3981
3982         static_loader = &hdev->fw_loader.static_loader;
3983
3984         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3985         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3986         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3987         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3988         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3989         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3990         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3991         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3992         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3993         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3994         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3995         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3996         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3997                         GAUDI_PLDM_RESET_WAIT_MSEC :
3998                         GAUDI_CPU_RESET_WAIT_MSEC;
3999 }
4000
4001 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4002 {
4003         struct asic_fixed_properties *prop = &hdev->asic_prop;
4004         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4005
4006         /* fill common fields */
4007         fw_loader->linux_loaded = false;
4008         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4009         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4010         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4011         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4012         fw_loader->skip_bmc = !hdev->bmc_enable;
4013         fw_loader->sram_bar_id = SRAM_BAR_ID;
4014         fw_loader->dram_bar_id = HBM_BAR_ID;
4015
4016         if (prop->dynamic_fw_load)
4017                 gaudi_init_dynamic_firmware_loader(hdev);
4018         else
4019                 gaudi_init_static_firmware_loader(hdev);
4020 }
4021
4022 static int gaudi_init_cpu(struct hl_device *hdev)
4023 {
4024         struct gaudi_device *gaudi = hdev->asic_specific;
4025         int rc;
4026
4027         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4028                 return 0;
4029
4030         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4031                 return 0;
4032
4033         /*
4034          * The device CPU works with 40 bits addresses.
4035          * This register sets the extension to 50 bits.
4036          */
4037         if (!hdev->asic_prop.fw_security_enabled)
4038                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4039
4040         rc = hl_fw_init_cpu(hdev);
4041
4042         if (rc)
4043                 return rc;
4044
4045         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4046
4047         return 0;
4048 }
4049
4050 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4051 {
4052         struct cpu_dyn_regs *dyn_regs =
4053                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4054         struct asic_fixed_properties *prop = &hdev->asic_prop;
4055         struct gaudi_device *gaudi = hdev->asic_specific;
4056         u32 status, irq_handler_offset;
4057         struct hl_eq *eq;
4058         struct hl_hw_queue *cpu_pq =
4059                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4060         int err;
4061
4062         if (!hdev->cpu_queues_enable)
4063                 return 0;
4064
4065         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4066                 return 0;
4067
4068         eq = &hdev->event_queue;
4069
4070         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4071         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4072
4073         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4074         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4075
4076         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4077                         lower_32_bits(hdev->cpu_accessible_dma_address));
4078         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4079                         upper_32_bits(hdev->cpu_accessible_dma_address));
4080
4081         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4082         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4083         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4084
4085         /* Used for EQ CI */
4086         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4087
4088         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4089
4090         if (gaudi->multi_msi_mode)
4091                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4092         else
4093                 WREG32(mmCPU_IF_QUEUE_INIT,
4094                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4095
4096         irq_handler_offset = prop->gic_interrupts_enable ?
4097                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4098                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4099
4100         WREG32(irq_handler_offset,
4101                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4102
4103         err = hl_poll_timeout(
4104                 hdev,
4105                 mmCPU_IF_QUEUE_INIT,
4106                 status,
4107                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4108                 1000,
4109                 cpu_timeout);
4110
4111         if (err) {
4112                 dev_err(hdev->dev,
4113                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4114                 return -EIO;
4115         }
4116
4117         /* update FW application security bits */
4118         if (prop->fw_cpu_boot_dev_sts0_valid)
4119                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4120         if (prop->fw_cpu_boot_dev_sts1_valid)
4121                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4122
4123         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4124         return 0;
4125 }
4126
4127 static void gaudi_pre_hw_init(struct hl_device *hdev)
4128 {
4129         /* Perform read from the device to make sure device is up */
4130         RREG32(mmHW_STATE);
4131
4132         if (!hdev->asic_prop.fw_security_enabled) {
4133                 /* Set the access through PCI bars (Linux driver only) as
4134                  * secured
4135                  */
4136                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4137                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4138                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4139
4140                 /* Perform read to flush the waiting writes to ensure
4141                  * configuration was set in the device
4142                  */
4143                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4144         }
4145
4146         /*
4147          * Let's mark in the H/W that we have reached this point. We check
4148          * this value in the reset_before_init function to understand whether
4149          * we need to reset the chip before doing H/W init. This register is
4150          * cleared by the H/W upon H/W reset
4151          */
4152         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4153 }
4154
4155 static int gaudi_hw_init(struct hl_device *hdev)
4156 {
4157         struct gaudi_device *gaudi = hdev->asic_specific;
4158         int rc;
4159
4160         gaudi_pre_hw_init(hdev);
4161
4162         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4163          * So we set it here and if anyone tries to move it later to
4164          * a different address, there will be an error
4165          */
4166         if (hdev->asic_prop.iatu_done_by_fw)
4167                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4168
4169         /*
4170          * Before pushing u-boot/linux to device, need to set the hbm bar to
4171          * base address of dram
4172          */
4173         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4174                 dev_err(hdev->dev,
4175                         "failed to map HBM bar to DRAM base address\n");
4176                 return -EIO;
4177         }
4178
4179         rc = gaudi_init_cpu(hdev);
4180         if (rc) {
4181                 dev_err(hdev->dev, "failed to initialize CPU\n");
4182                 return rc;
4183         }
4184
4185         /* In case the clock gating was enabled in preboot we need to disable
4186          * it here before touching the MME/TPC registers.
4187          * There is no need to take clk gating mutex because when this function
4188          * runs, no other relevant code can run
4189          */
4190         hdev->asic_funcs->disable_clock_gating(hdev);
4191
4192         /* SRAM scrambler must be initialized after CPU is running from HBM */
4193         gaudi_init_scrambler_sram(hdev);
4194
4195         /* This is here just in case we are working without CPU */
4196         gaudi_init_scrambler_hbm(hdev);
4197
4198         gaudi_init_golden_registers(hdev);
4199
4200         rc = gaudi_mmu_init(hdev);
4201         if (rc)
4202                 return rc;
4203
4204         gaudi_init_security(hdev);
4205
4206         gaudi_init_pci_dma_qmans(hdev);
4207
4208         gaudi_init_hbm_dma_qmans(hdev);
4209
4210         gaudi_init_mme_qmans(hdev);
4211
4212         gaudi_init_tpc_qmans(hdev);
4213
4214         gaudi_init_nic_qmans(hdev);
4215
4216         hdev->asic_funcs->set_clock_gating(hdev);
4217
4218         gaudi_enable_timestamp(hdev);
4219
4220         /* MSI must be enabled before CPU queues and NIC are initialized */
4221         rc = gaudi_enable_msi(hdev);
4222         if (rc)
4223                 goto disable_queues;
4224
4225         /* must be called after MSI was enabled */
4226         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4227         if (rc) {
4228                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4229                         rc);
4230                 goto disable_msi;
4231         }
4232
4233         /* Perform read from the device to flush all configuration */
4234         RREG32(mmHW_STATE);
4235
4236         return 0;
4237
4238 disable_msi:
4239         gaudi_disable_msi(hdev);
4240 disable_queues:
4241         gaudi_disable_mme_qmans(hdev);
4242         gaudi_disable_pci_dma_qmans(hdev);
4243
4244         return rc;
4245 }
4246
4247 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4248 {
4249         struct cpu_dyn_regs *dyn_regs =
4250                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4251         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4252         struct gaudi_device *gaudi = hdev->asic_specific;
4253         bool driver_performs_reset;
4254
4255         if (!hard_reset) {
4256                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4257                 return;
4258         }
4259
4260         if (hdev->pldm) {
4261                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4262                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4263         } else {
4264                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4265                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4266         }
4267
4268         if (fw_reset) {
4269                 dev_info(hdev->dev,
4270                         "Firmware performs HARD reset, going to wait %dms\n",
4271                         reset_timeout_ms);
4272
4273                 goto skip_reset;
4274         }
4275
4276         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4277                                         !hdev->asic_prop.hard_reset_done_by_fw);
4278
4279         /* Set device to handle FLR by H/W as we will put the device CPU to
4280          * halt mode
4281          */
4282         if (driver_performs_reset)
4283                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4284                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4285
4286         /* If linux is loaded in the device CPU we need to communicate with it
4287          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4288          * registers in case of old F/Ws
4289          */
4290         if (hdev->fw_loader.linux_loaded) {
4291                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4292                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4293                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4294
4295                 WREG32(irq_handler_offset,
4296                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4297         } else {
4298                 if (hdev->asic_prop.hard_reset_done_by_fw)
4299                         hl_fw_ask_hard_reset_without_linux(hdev);
4300                 else
4301                         hl_fw_ask_halt_machine_without_linux(hdev);
4302         }
4303
4304         if (driver_performs_reset) {
4305
4306                 /* Configure the reset registers. Must be done as early as
4307                  * possible in case we fail during H/W initialization
4308                  */
4309                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4310                                                 (CFG_RST_H_DMA_MASK |
4311                                                 CFG_RST_H_MME_MASK |
4312                                                 CFG_RST_H_SM_MASK |
4313                                                 CFG_RST_H_TPC_7_MASK));
4314
4315                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4316
4317                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4318                                                 (CFG_RST_H_HBM_MASK |
4319                                                 CFG_RST_H_TPC_7_MASK |
4320                                                 CFG_RST_H_NIC_MASK |
4321                                                 CFG_RST_H_SM_MASK |
4322                                                 CFG_RST_H_DMA_MASK |
4323                                                 CFG_RST_H_MME_MASK |
4324                                                 CFG_RST_H_CPU_MASK |
4325                                                 CFG_RST_H_MMU_MASK));
4326
4327                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4328                                                 (CFG_RST_L_IF_MASK |
4329                                                 CFG_RST_L_PSOC_MASK |
4330                                                 CFG_RST_L_TPC_MASK));
4331
4332                 msleep(cpu_timeout_ms);
4333
4334                 /* Tell ASIC not to re-initialize PCIe */
4335                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4336
4337                 /* Restart BTL/BLR upon hard-reset */
4338                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4339
4340                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4341                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4342
4343                 dev_info(hdev->dev,
4344                         "Issued HARD reset command, going to wait %dms\n",
4345                         reset_timeout_ms);
4346         } else {
4347                 dev_info(hdev->dev,
4348                         "Firmware performs HARD reset, going to wait %dms\n",
4349                         reset_timeout_ms);
4350         }
4351
4352 skip_reset:
4353         /*
4354          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4355          * itself is in reset. Need to wait until the reset is deasserted
4356          */
4357         msleep(reset_timeout_ms);
4358
4359         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4360         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4361                 dev_err(hdev->dev,
4362                         "Timeout while waiting for device to reset 0x%x\n",
4363                         status);
4364
4365         if (gaudi) {
4366                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4367                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4368                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4369                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4370                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4371                                 HW_CAP_SRAM_SCRAMBLER |
4372                                 HW_CAP_HBM_SCRAMBLER |
4373                                 HW_CAP_CLK_GATE);
4374
4375                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4376
4377                 hdev->device_cpu_is_halted = false;
4378         }
4379 }
4380
4381 static int gaudi_suspend(struct hl_device *hdev)
4382 {
4383         int rc;
4384
4385         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4386         if (rc)
4387                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4388
4389         return rc;
4390 }
4391
4392 static int gaudi_resume(struct hl_device *hdev)
4393 {
4394         return gaudi_init_iatu(hdev);
4395 }
4396
4397 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4398                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4399 {
4400         int rc;
4401
4402         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4403                         VM_DONTCOPY | VM_NORESERVE;
4404
4405         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4406                                 (dma_addr - HOST_PHYS_BASE), size);
4407         if (rc)
4408                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4409
4410         return rc;
4411 }
4412
4413 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4414 {
4415         struct cpu_dyn_regs *dyn_regs =
4416                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4417         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4418         struct gaudi_device *gaudi = hdev->asic_specific;
4419         bool invalid_queue = false;
4420         int dma_id;
4421
4422         switch (hw_queue_id) {
4423         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4424                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4425                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4426                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4427                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4428                 break;
4429
4430         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4431                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4432                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4433                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4434                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4435                 break;
4436
4437         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4438                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4439                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4440                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4441                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4442                 break;
4443
4444         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4445                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4446                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4448                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449                 break;
4450
4451         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4452                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4453                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4455                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4459                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4460                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463                 break;
4464
4465         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4466                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4467                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4473                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4474                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477                 break;
4478
4479         case GAUDI_QUEUE_ID_CPU_PQ:
4480                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4481                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4482                 else
4483                         invalid_queue = true;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_MME_0_0:
4487                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_MME_0_1:
4491                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4492                 break;
4493
4494         case GAUDI_QUEUE_ID_MME_0_2:
4495                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_MME_0_3:
4499                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4500                 break;
4501
4502         case GAUDI_QUEUE_ID_MME_1_0:
4503                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_MME_1_1:
4507                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4508                 break;
4509
4510         case GAUDI_QUEUE_ID_MME_1_2:
4511                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_MME_1_3:
4515                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4516                 break;
4517
4518         case GAUDI_QUEUE_ID_TPC_0_0:
4519                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_TPC_0_1:
4523                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4524                 break;
4525
4526         case GAUDI_QUEUE_ID_TPC_0_2:
4527                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_TPC_0_3:
4531                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4532                 break;
4533
4534         case GAUDI_QUEUE_ID_TPC_1_0:
4535                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_TPC_1_1:
4539                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4540                 break;
4541
4542         case GAUDI_QUEUE_ID_TPC_1_2:
4543                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_TPC_1_3:
4547                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4548                 break;
4549
4550         case GAUDI_QUEUE_ID_TPC_2_0:
4551                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_TPC_2_1:
4555                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4556                 break;
4557
4558         case GAUDI_QUEUE_ID_TPC_2_2:
4559                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4560                 break;
4561
4562         case GAUDI_QUEUE_ID_TPC_2_3:
4563                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4564                 break;
4565
4566         case GAUDI_QUEUE_ID_TPC_3_0:
4567                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4568                 break;
4569
4570         case GAUDI_QUEUE_ID_TPC_3_1:
4571                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4572                 break;
4573
4574         case GAUDI_QUEUE_ID_TPC_3_2:
4575                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4576                 break;
4577
4578         case GAUDI_QUEUE_ID_TPC_3_3:
4579                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4580                 break;
4581
4582         case GAUDI_QUEUE_ID_TPC_4_0:
4583                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4584                 break;
4585
4586         case GAUDI_QUEUE_ID_TPC_4_1:
4587                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4588                 break;
4589
4590         case GAUDI_QUEUE_ID_TPC_4_2:
4591                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4592                 break;
4593
4594         case GAUDI_QUEUE_ID_TPC_4_3:
4595                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4596                 break;
4597
4598         case GAUDI_QUEUE_ID_TPC_5_0:
4599                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4600                 break;
4601
4602         case GAUDI_QUEUE_ID_TPC_5_1:
4603                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4604                 break;
4605
4606         case GAUDI_QUEUE_ID_TPC_5_2:
4607                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4608                 break;
4609
4610         case GAUDI_QUEUE_ID_TPC_5_3:
4611                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4612                 break;
4613
4614         case GAUDI_QUEUE_ID_TPC_6_0:
4615                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4616                 break;
4617
4618         case GAUDI_QUEUE_ID_TPC_6_1:
4619                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4620                 break;
4621
4622         case GAUDI_QUEUE_ID_TPC_6_2:
4623                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4624                 break;
4625
4626         case GAUDI_QUEUE_ID_TPC_6_3:
4627                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4628                 break;
4629
4630         case GAUDI_QUEUE_ID_TPC_7_0:
4631                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4632                 break;
4633
4634         case GAUDI_QUEUE_ID_TPC_7_1:
4635                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4636                 break;
4637
4638         case GAUDI_QUEUE_ID_TPC_7_2:
4639                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4640                 break;
4641
4642         case GAUDI_QUEUE_ID_TPC_7_3:
4643                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4644                 break;
4645
4646         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4647                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4648                         invalid_queue = true;
4649
4650                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4652                 break;
4653
4654         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4655                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4656                         invalid_queue = true;
4657
4658                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4660                 break;
4661
4662         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4663                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4664                         invalid_queue = true;
4665
4666                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4668                 break;
4669
4670         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4671                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4672                         invalid_queue = true;
4673
4674                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4676                 break;
4677
4678         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4679                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4680                         invalid_queue = true;
4681
4682                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4684                 break;
4685
4686         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4687                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4688                         invalid_queue = true;
4689
4690                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4692                 break;
4693
4694         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4695                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4696                         invalid_queue = true;
4697
4698                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4699                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4700                 break;
4701
4702         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4703                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4704                         invalid_queue = true;
4705
4706                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4707                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4708                 break;
4709
4710         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4711                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4712                         invalid_queue = true;
4713
4714                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4715                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4716                 break;
4717
4718         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4719                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4720                         invalid_queue = true;
4721
4722                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4723                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4724                 break;
4725
4726         default:
4727                 invalid_queue = true;
4728         }
4729
4730         if (invalid_queue) {
4731                 /* Should never get here */
4732                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4733                         hw_queue_id);
4734                 return;
4735         }
4736
4737         db_value = pi;
4738
4739         /* ring the doorbell */
4740         WREG32(db_reg_offset, db_value);
4741
4742         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4743                 /* make sure device CPU will read latest data from host */
4744                 mb();
4745
4746                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4747                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4748                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4749
4750                 WREG32(irq_handler_offset,
4751                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4752         }
4753 }
4754
4755 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4756                                 struct hl_bd *bd)
4757 {
4758         __le64 *pbd = (__le64 *) bd;
4759
4760         /* The QMANs are on the host memory so a simple copy suffice */
4761         pqe[0] = pbd[0];
4762         pqe[1] = pbd[1];
4763 }
4764
4765 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4766                                         dma_addr_t *dma_handle, gfp_t flags)
4767 {
4768         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4769                                                 dma_handle, flags);
4770
4771         /* Shift to the device's base physical address of host memory */
4772         if (kernel_addr)
4773                 *dma_handle += HOST_PHYS_BASE;
4774
4775         return kernel_addr;
4776 }
4777
4778 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4779                 void *cpu_addr, dma_addr_t dma_handle)
4780 {
4781         /* Cancel the device's base physical address of host memory */
4782         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4783
4784         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4785 }
4786
4787 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4788 {
4789         struct asic_fixed_properties *prop = &hdev->asic_prop;
4790         u64  cur_addr = DRAM_BASE_ADDR_USER;
4791         u32 val;
4792         u32 chunk_size;
4793         int rc, dma_id;
4794
4795         while (cur_addr < prop->dram_end_address) {
4796                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4797                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4798
4799                         chunk_size =
4800                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4801
4802                         dev_dbg(hdev->dev,
4803                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4804                                 cur_addr, cur_addr + chunk_size);
4805
4806                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4807                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4808                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4809                                                 lower_32_bits(cur_addr));
4810                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4811                                                 upper_32_bits(cur_addr));
4812                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4813                                         chunk_size);
4814                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4815                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4816                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4817
4818                         cur_addr += chunk_size;
4819
4820                         if (cur_addr == prop->dram_end_address)
4821                                 break;
4822                 }
4823
4824                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4825                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4826
4827                         rc = hl_poll_timeout(
4828                                 hdev,
4829                                 mmDMA0_CORE_STS0 + dma_offset,
4830                                 val,
4831                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4832                                 1000,
4833                                 HBM_SCRUBBING_TIMEOUT_US);
4834
4835                         if (rc) {
4836                                 dev_err(hdev->dev,
4837                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4838                                         dma_id);
4839                                 return -EIO;
4840                         }
4841                 }
4842         }
4843
4844         return 0;
4845 }
4846
4847 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4848 {
4849         struct asic_fixed_properties *prop = &hdev->asic_prop;
4850         struct gaudi_device *gaudi = hdev->asic_specific;
4851         int rc = 0;
4852         u64 val = 0;
4853
4854         if (!hdev->memory_scrub)
4855                 return 0;
4856
4857         if (!addr && !size) {
4858                 /* Wait till device is idle */
4859                 rc = hl_poll_timeout(
4860                                 hdev,
4861                                 mmDMA0_CORE_STS0/* dummy */,
4862                                 val/* dummy */,
4863                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4864                                                 0, NULL)),
4865                                                 1000,
4866                                                 HBM_SCRUBBING_TIMEOUT_US);
4867                 if (rc) {
4868                         dev_err(hdev->dev, "waiting for idle timeout\n");
4869                         return -EIO;
4870                 }
4871
4872                 /* Scrub SRAM */
4873                 addr = prop->sram_user_base_address;
4874                 size = hdev->pldm ? 0x10000 :
4875                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4876                 val = 0x7777777777777777ull;
4877
4878                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4879                 if (rc) {
4880                         dev_err(hdev->dev,
4881                                 "Failed to clear SRAM in mem scrub all\n");
4882                         return rc;
4883                 }
4884
4885                 mutex_lock(&gaudi->clk_gate_mutex);
4886                 hdev->asic_funcs->disable_clock_gating(hdev);
4887
4888                 /* Scrub HBM using all DMA channels in parallel */
4889                 rc = gaudi_hbm_scrubbing(hdev);
4890                 if (rc)
4891                         dev_err(hdev->dev,
4892                                 "Failed to clear HBM in mem scrub all\n");
4893
4894                 hdev->asic_funcs->set_clock_gating(hdev);
4895                 mutex_unlock(&gaudi->clk_gate_mutex);
4896         }
4897
4898         return rc;
4899 }
4900
4901 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4902                                 u32 queue_id, dma_addr_t *dma_handle,
4903                                 u16 *queue_len)
4904 {
4905         struct gaudi_device *gaudi = hdev->asic_specific;
4906         struct gaudi_internal_qman_info *q;
4907
4908         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4909                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4910                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4911                 return NULL;
4912         }
4913
4914         q = &gaudi->internal_qmans[queue_id];
4915         *dma_handle = q->pq_dma_addr;
4916         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4917
4918         return q->pq_kernel_addr;
4919 }
4920
4921 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4922                                 u16 len, u32 timeout, u64 *result)
4923 {
4924         struct gaudi_device *gaudi = hdev->asic_specific;
4925
4926         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4927                 if (result)
4928                         *result = 0;
4929                 return 0;
4930         }
4931
4932         if (!timeout)
4933                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4934
4935         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4936                                                 timeout, result);
4937 }
4938
4939 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4940 {
4941         struct packet_msg_prot *fence_pkt;
4942         dma_addr_t pkt_dma_addr;
4943         u32 fence_val, tmp, timeout_usec;
4944         dma_addr_t fence_dma_addr;
4945         u32 *fence_ptr;
4946         int rc;
4947
4948         if (hdev->pldm)
4949                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4950         else
4951                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4952
4953         fence_val = GAUDI_QMAN0_FENCE_VAL;
4954
4955         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956                                                         &fence_dma_addr);
4957         if (!fence_ptr) {
4958                 dev_err(hdev->dev,
4959                         "Failed to allocate memory for H/W queue %d testing\n",
4960                         hw_queue_id);
4961                 return -ENOMEM;
4962         }
4963
4964         *fence_ptr = 0;
4965
4966         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4967                                         sizeof(struct packet_msg_prot),
4968                                         GFP_KERNEL, &pkt_dma_addr);
4969         if (!fence_pkt) {
4970                 dev_err(hdev->dev,
4971                         "Failed to allocate packet for H/W queue %d testing\n",
4972                         hw_queue_id);
4973                 rc = -ENOMEM;
4974                 goto free_fence_ptr;
4975         }
4976
4977         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4978         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4979         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4980
4981         fence_pkt->ctl = cpu_to_le32(tmp);
4982         fence_pkt->value = cpu_to_le32(fence_val);
4983         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4984
4985         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4986                                         sizeof(struct packet_msg_prot),
4987                                         pkt_dma_addr);
4988         if (rc) {
4989                 dev_err(hdev->dev,
4990                         "Failed to send fence packet to H/W queue %d\n",
4991                         hw_queue_id);
4992                 goto free_pkt;
4993         }
4994
4995         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4996                                         1000, timeout_usec, true);
4997
4998         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4999
5000         if (rc == -ETIMEDOUT) {
5001                 dev_err(hdev->dev,
5002                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5003                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5004                 rc = -EIO;
5005         }
5006
5007 free_pkt:
5008         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5009                                         pkt_dma_addr);
5010 free_fence_ptr:
5011         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5012                                         fence_dma_addr);
5013         return rc;
5014 }
5015
5016 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5017 {
5018         struct gaudi_device *gaudi = hdev->asic_specific;
5019
5020         /*
5021          * check capability here as send_cpu_message() won't update the result
5022          * value if no capability
5023          */
5024         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5025                 return 0;
5026
5027         return hl_fw_test_cpu_queue(hdev);
5028 }
5029
5030 static int gaudi_test_queues(struct hl_device *hdev)
5031 {
5032         int i, rc, ret_val = 0;
5033
5034         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5035                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5036                         rc = gaudi_test_queue(hdev, i);
5037                         if (rc)
5038                                 ret_val = -EINVAL;
5039                 }
5040         }
5041
5042         rc = gaudi_test_cpu_queue(hdev);
5043         if (rc)
5044                 ret_val = -EINVAL;
5045
5046         return ret_val;
5047 }
5048
5049 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5050                 gfp_t mem_flags, dma_addr_t *dma_handle)
5051 {
5052         void *kernel_addr;
5053
5054         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5055                 return NULL;
5056
5057         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5058
5059         /* Shift to the device's base physical address of host memory */
5060         if (kernel_addr)
5061                 *dma_handle += HOST_PHYS_BASE;
5062
5063         return kernel_addr;
5064 }
5065
5066 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5067                         dma_addr_t dma_addr)
5068 {
5069         /* Cancel the device's base physical address of host memory */
5070         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5071
5072         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5073 }
5074
5075 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5076                                         size_t size, dma_addr_t *dma_handle)
5077 {
5078         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5079 }
5080
5081 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5082                                                 size_t size, void *vaddr)
5083 {
5084         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5085 }
5086
5087 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5088                         int nents, enum dma_data_direction dir)
5089 {
5090         struct scatterlist *sg;
5091         int i;
5092
5093         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5094                 return -ENOMEM;
5095
5096         /* Shift to the device's base physical address of host memory */
5097         for_each_sg(sgl, sg, nents, i)
5098                 sg->dma_address += HOST_PHYS_BASE;
5099
5100         return 0;
5101 }
5102
5103 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5104                         int nents, enum dma_data_direction dir)
5105 {
5106         struct scatterlist *sg;
5107         int i;
5108
5109         /* Cancel the device's base physical address of host memory */
5110         for_each_sg(sgl, sg, nents, i)
5111                 sg->dma_address -= HOST_PHYS_BASE;
5112
5113         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5114 }
5115
5116 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5117                                         struct sg_table *sgt)
5118 {
5119         struct scatterlist *sg, *sg_next_iter;
5120         u32 count, dma_desc_cnt;
5121         u64 len, len_next;
5122         dma_addr_t addr, addr_next;
5123
5124         dma_desc_cnt = 0;
5125
5126         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5127
5128                 len = sg_dma_len(sg);
5129                 addr = sg_dma_address(sg);
5130
5131                 if (len == 0)
5132                         break;
5133
5134                 while ((count + 1) < sgt->nents) {
5135                         sg_next_iter = sg_next(sg);
5136                         len_next = sg_dma_len(sg_next_iter);
5137                         addr_next = sg_dma_address(sg_next_iter);
5138
5139                         if (len_next == 0)
5140                                 break;
5141
5142                         if ((addr + len == addr_next) &&
5143                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5144                                 len += len_next;
5145                                 count++;
5146                                 sg = sg_next_iter;
5147                         } else {
5148                                 break;
5149                         }
5150                 }
5151
5152                 dma_desc_cnt++;
5153         }
5154
5155         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5156 }
5157
5158 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5159                                 struct hl_cs_parser *parser,
5160                                 struct packet_lin_dma *user_dma_pkt,
5161                                 u64 addr, enum dma_data_direction dir)
5162 {
5163         struct hl_userptr *userptr;
5164         int rc;
5165
5166         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5167                         parser->job_userptr_list, &userptr))
5168                 goto already_pinned;
5169
5170         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5171         if (!userptr)
5172                 return -ENOMEM;
5173
5174         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5175                                 userptr);
5176         if (rc)
5177                 goto free_userptr;
5178
5179         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5180
5181         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5182                                         userptr->sgt->nents, dir);
5183         if (rc) {
5184                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5185                 goto unpin_memory;
5186         }
5187
5188         userptr->dma_mapped = true;
5189         userptr->dir = dir;
5190
5191 already_pinned:
5192         parser->patched_cb_size +=
5193                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5194
5195         return 0;
5196
5197 unpin_memory:
5198         list_del(&userptr->job_node);
5199         hl_unpin_host_memory(hdev, userptr);
5200 free_userptr:
5201         kfree(userptr);
5202         return rc;
5203 }
5204
5205 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5206                                 struct hl_cs_parser *parser,
5207                                 struct packet_lin_dma *user_dma_pkt,
5208                                 bool src_in_host)
5209 {
5210         enum dma_data_direction dir;
5211         bool skip_host_mem_pin = false, user_memset;
5212         u64 addr;
5213         int rc = 0;
5214
5215         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5216                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5217                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218
5219         if (src_in_host) {
5220                 if (user_memset)
5221                         skip_host_mem_pin = true;
5222
5223                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5224                 dir = DMA_TO_DEVICE;
5225                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5226         } else {
5227                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5228                 dir = DMA_FROM_DEVICE;
5229                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5230                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5231                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5232         }
5233
5234         if (skip_host_mem_pin)
5235                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5236         else
5237                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5238                                                 addr, dir);
5239
5240         return rc;
5241 }
5242
5243 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5244                                 struct hl_cs_parser *parser,
5245                                 struct packet_lin_dma *user_dma_pkt)
5246 {
5247         bool src_in_host = false;
5248         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5249                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5250                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5251
5252         dev_dbg(hdev->dev, "DMA packet details:\n");
5253         dev_dbg(hdev->dev, "source == 0x%llx\n",
5254                                 le64_to_cpu(user_dma_pkt->src_addr));
5255         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5256         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5257
5258         /*
5259          * Special handling for DMA with size 0. Bypass all validations
5260          * because no transactions will be done except for WR_COMP, which
5261          * is not a security issue
5262          */
5263         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5264                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5265                 return 0;
5266         }
5267
5268         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5269                 src_in_host = true;
5270
5271         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5272                                                 src_in_host);
5273 }
5274
5275 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5276                                         struct hl_cs_parser *parser,
5277                                         struct packet_load_and_exe *user_pkt)
5278 {
5279         u32 cfg;
5280
5281         cfg = le32_to_cpu(user_pkt->cfg);
5282
5283         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5284                 dev_err(hdev->dev,
5285                         "User not allowed to use Load and Execute\n");
5286                 return -EPERM;
5287         }
5288
5289         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5290
5291         return 0;
5292 }
5293
5294 static int gaudi_validate_cb(struct hl_device *hdev,
5295                         struct hl_cs_parser *parser, bool is_mmu)
5296 {
5297         u32 cb_parsed_length = 0;
5298         int rc = 0;
5299
5300         parser->patched_cb_size = 0;
5301
5302         /* cb_user_size is more than 0 so loop will always be executed */
5303         while (cb_parsed_length < parser->user_cb_size) {
5304                 enum packet_id pkt_id;
5305                 u16 pkt_size;
5306                 struct gaudi_packet *user_pkt;
5307
5308                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5309
5310                 pkt_id = (enum packet_id) (
5311                                 (le64_to_cpu(user_pkt->header) &
5312                                 PACKET_HEADER_PACKET_ID_MASK) >>
5313                                         PACKET_HEADER_PACKET_ID_SHIFT);
5314
5315                 if (!validate_packet_id(pkt_id)) {
5316                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5317                         rc = -EINVAL;
5318                         break;
5319                 }
5320
5321                 pkt_size = gaudi_packet_sizes[pkt_id];
5322                 cb_parsed_length += pkt_size;
5323                 if (cb_parsed_length > parser->user_cb_size) {
5324                         dev_err(hdev->dev,
5325                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5326                         rc = -EINVAL;
5327                         break;
5328                 }
5329
5330                 switch (pkt_id) {
5331                 case PACKET_MSG_PROT:
5332                         dev_err(hdev->dev,
5333                                 "User not allowed to use MSG_PROT\n");
5334                         rc = -EPERM;
5335                         break;
5336
5337                 case PACKET_CP_DMA:
5338                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5339                         rc = -EPERM;
5340                         break;
5341
5342                 case PACKET_STOP:
5343                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5344                         rc = -EPERM;
5345                         break;
5346
5347                 case PACKET_WREG_BULK:
5348                         dev_err(hdev->dev,
5349                                 "User not allowed to use WREG_BULK\n");
5350                         rc = -EPERM;
5351                         break;
5352
5353                 case PACKET_LOAD_AND_EXE:
5354                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5355                                 (struct packet_load_and_exe *) user_pkt);
5356                         break;
5357
5358                 case PACKET_LIN_DMA:
5359                         parser->contains_dma_pkt = true;
5360                         if (is_mmu)
5361                                 parser->patched_cb_size += pkt_size;
5362                         else
5363                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5364                                         (struct packet_lin_dma *) user_pkt);
5365                         break;
5366
5367                 case PACKET_WREG_32:
5368                 case PACKET_MSG_LONG:
5369                 case PACKET_MSG_SHORT:
5370                 case PACKET_REPEAT:
5371                 case PACKET_FENCE:
5372                 case PACKET_NOP:
5373                 case PACKET_ARB_POINT:
5374                         parser->patched_cb_size += pkt_size;
5375                         break;
5376
5377                 default:
5378                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5379                                 pkt_id);
5380                         rc = -EINVAL;
5381                         break;
5382                 }
5383
5384                 if (rc)
5385                         break;
5386         }
5387
5388         /*
5389          * The new CB should have space at the end for two MSG_PROT packets:
5390          * 1. A packet that will act as a completion packet
5391          * 2. A packet that will generate MSI-X interrupt
5392          */
5393         if (parser->completion)
5394                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5395
5396         return rc;
5397 }
5398
5399 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5400                                 struct hl_cs_parser *parser,
5401                                 struct packet_lin_dma *user_dma_pkt,
5402                                 struct packet_lin_dma *new_dma_pkt,
5403                                 u32 *new_dma_pkt_size)
5404 {
5405         struct hl_userptr *userptr;
5406         struct scatterlist *sg, *sg_next_iter;
5407         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5408         u64 len, len_next;
5409         dma_addr_t dma_addr, dma_addr_next;
5410         u64 device_memory_addr, addr;
5411         enum dma_data_direction dir;
5412         struct sg_table *sgt;
5413         bool src_in_host = false;
5414         bool skip_host_mem_pin = false;
5415         bool user_memset;
5416
5417         ctl = le32_to_cpu(user_dma_pkt->ctl);
5418
5419         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5420                 src_in_host = true;
5421
5422         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5423                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5424
5425         if (src_in_host) {
5426                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5427                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5428                 dir = DMA_TO_DEVICE;
5429                 if (user_memset)
5430                         skip_host_mem_pin = true;
5431         } else {
5432                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5433                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5434                 dir = DMA_FROM_DEVICE;
5435         }
5436
5437         if ((!skip_host_mem_pin) &&
5438                 (!hl_userptr_is_pinned(hdev, addr,
5439                                         le32_to_cpu(user_dma_pkt->tsize),
5440                                         parser->job_userptr_list, &userptr))) {
5441                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5442                                 addr, user_dma_pkt->tsize);
5443                 return -EFAULT;
5444         }
5445
5446         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5447                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5448                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5449                 return 0;
5450         }
5451
5452         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5453
5454         sgt = userptr->sgt;
5455         dma_desc_cnt = 0;
5456
5457         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5458                 len = sg_dma_len(sg);
5459                 dma_addr = sg_dma_address(sg);
5460
5461                 if (len == 0)
5462                         break;
5463
5464                 while ((count + 1) < sgt->nents) {
5465                         sg_next_iter = sg_next(sg);
5466                         len_next = sg_dma_len(sg_next_iter);
5467                         dma_addr_next = sg_dma_address(sg_next_iter);
5468
5469                         if (len_next == 0)
5470                                 break;
5471
5472                         if ((dma_addr + len == dma_addr_next) &&
5473                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5474                                 len += len_next;
5475                                 count++;
5476                                 sg = sg_next_iter;
5477                         } else {
5478                                 break;
5479                         }
5480                 }
5481
5482                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5483                 if (likely(dma_desc_cnt))
5484                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5485                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5486                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5487                 new_dma_pkt->tsize = cpu_to_le32(len);
5488
5489                 if (dir == DMA_TO_DEVICE) {
5490                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5491                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5492                 } else {
5493                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5494                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5495                 }
5496
5497                 if (!user_memset)
5498                         device_memory_addr += len;
5499                 dma_desc_cnt++;
5500                 new_dma_pkt++;
5501         }
5502
5503         if (!dma_desc_cnt) {
5504                 dev_err(hdev->dev,
5505                         "Error of 0 SG entries when patching DMA packet\n");
5506                 return -EFAULT;
5507         }
5508
5509         /* Fix the last dma packet - wrcomp must be as user set it */
5510         new_dma_pkt--;
5511         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5512
5513         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5514
5515         return 0;
5516 }
5517
5518 static int gaudi_patch_cb(struct hl_device *hdev,
5519                                 struct hl_cs_parser *parser)
5520 {
5521         u32 cb_parsed_length = 0;
5522         u32 cb_patched_cur_length = 0;
5523         int rc = 0;
5524
5525         /* cb_user_size is more than 0 so loop will always be executed */
5526         while (cb_parsed_length < parser->user_cb_size) {
5527                 enum packet_id pkt_id;
5528                 u16 pkt_size;
5529                 u32 new_pkt_size = 0;
5530                 struct gaudi_packet *user_pkt, *kernel_pkt;
5531
5532                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5533                 kernel_pkt = parser->patched_cb->kernel_address +
5534                                         cb_patched_cur_length;
5535
5536                 pkt_id = (enum packet_id) (
5537                                 (le64_to_cpu(user_pkt->header) &
5538                                 PACKET_HEADER_PACKET_ID_MASK) >>
5539                                         PACKET_HEADER_PACKET_ID_SHIFT);
5540
5541                 if (!validate_packet_id(pkt_id)) {
5542                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5543                         rc = -EINVAL;
5544                         break;
5545                 }
5546
5547                 pkt_size = gaudi_packet_sizes[pkt_id];
5548                 cb_parsed_length += pkt_size;
5549                 if (cb_parsed_length > parser->user_cb_size) {
5550                         dev_err(hdev->dev,
5551                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5552                         rc = -EINVAL;
5553                         break;
5554                 }
5555
5556                 switch (pkt_id) {
5557                 case PACKET_LIN_DMA:
5558                         rc = gaudi_patch_dma_packet(hdev, parser,
5559                                         (struct packet_lin_dma *) user_pkt,
5560                                         (struct packet_lin_dma *) kernel_pkt,
5561                                         &new_pkt_size);
5562                         cb_patched_cur_length += new_pkt_size;
5563                         break;
5564
5565                 case PACKET_MSG_PROT:
5566                         dev_err(hdev->dev,
5567                                 "User not allowed to use MSG_PROT\n");
5568                         rc = -EPERM;
5569                         break;
5570
5571                 case PACKET_CP_DMA:
5572                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5573                         rc = -EPERM;
5574                         break;
5575
5576                 case PACKET_STOP:
5577                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5578                         rc = -EPERM;
5579                         break;
5580
5581                 case PACKET_WREG_32:
5582                 case PACKET_WREG_BULK:
5583                 case PACKET_MSG_LONG:
5584                 case PACKET_MSG_SHORT:
5585                 case PACKET_REPEAT:
5586                 case PACKET_FENCE:
5587                 case PACKET_NOP:
5588                 case PACKET_ARB_POINT:
5589                 case PACKET_LOAD_AND_EXE:
5590                         memcpy(kernel_pkt, user_pkt, pkt_size);
5591                         cb_patched_cur_length += pkt_size;
5592                         break;
5593
5594                 default:
5595                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5596                                 pkt_id);
5597                         rc = -EINVAL;
5598                         break;
5599                 }
5600
5601                 if (rc)
5602                         break;
5603         }
5604
5605         return rc;
5606 }
5607
5608 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5609                 struct hl_cs_parser *parser)
5610 {
5611         u64 patched_cb_handle;
5612         u32 patched_cb_size;
5613         struct hl_cb *user_cb;
5614         int rc;
5615
5616         /*
5617          * The new CB should have space at the end for two MSG_PROT pkt:
5618          * 1. A packet that will act as a completion packet
5619          * 2. A packet that will generate MSI interrupt
5620          */
5621         if (parser->completion)
5622                 parser->patched_cb_size = parser->user_cb_size +
5623                                 sizeof(struct packet_msg_prot) * 2;
5624         else
5625                 parser->patched_cb_size = parser->user_cb_size;
5626
5627         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5628                                 parser->patched_cb_size, false, false,
5629                                 &patched_cb_handle);
5630
5631         if (rc) {
5632                 dev_err(hdev->dev,
5633                         "Failed to allocate patched CB for DMA CS %d\n",
5634                         rc);
5635                 return rc;
5636         }
5637
5638         patched_cb_handle >>= PAGE_SHIFT;
5639         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5640                                 (u32) patched_cb_handle);
5641         /* hl_cb_get should never fail */
5642         if (!parser->patched_cb) {
5643                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5644                         (u32) patched_cb_handle);
5645                 rc = -EFAULT;
5646                 goto out;
5647         }
5648
5649         /*
5650          * The check that parser->user_cb_size <= parser->user_cb->size was done
5651          * in validate_queue_index().
5652          */
5653         memcpy(parser->patched_cb->kernel_address,
5654                 parser->user_cb->kernel_address,
5655                 parser->user_cb_size);
5656
5657         patched_cb_size = parser->patched_cb_size;
5658
5659         /* Validate patched CB instead of user CB */
5660         user_cb = parser->user_cb;
5661         parser->user_cb = parser->patched_cb;
5662         rc = gaudi_validate_cb(hdev, parser, true);
5663         parser->user_cb = user_cb;
5664
5665         if (rc) {
5666                 hl_cb_put(parser->patched_cb);
5667                 goto out;
5668         }
5669
5670         if (patched_cb_size != parser->patched_cb_size) {
5671                 dev_err(hdev->dev, "user CB size mismatch\n");
5672                 hl_cb_put(parser->patched_cb);
5673                 rc = -EINVAL;
5674                 goto out;
5675         }
5676
5677 out:
5678         /*
5679          * Always call cb destroy here because we still have 1 reference
5680          * to it by calling cb_get earlier. After the job will be completed,
5681          * cb_put will release it, but here we want to remove it from the
5682          * idr
5683          */
5684         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5685                                         patched_cb_handle << PAGE_SHIFT);
5686
5687         return rc;
5688 }
5689
5690 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5691                 struct hl_cs_parser *parser)
5692 {
5693         u64 patched_cb_handle;
5694         int rc;
5695
5696         rc = gaudi_validate_cb(hdev, parser, false);
5697
5698         if (rc)
5699                 goto free_userptr;
5700
5701         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5702                                 parser->patched_cb_size, false, false,
5703                                 &patched_cb_handle);
5704         if (rc) {
5705                 dev_err(hdev->dev,
5706                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5707                 goto free_userptr;
5708         }
5709
5710         patched_cb_handle >>= PAGE_SHIFT;
5711         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5712                                 (u32) patched_cb_handle);
5713         /* hl_cb_get should never fail here */
5714         if (!parser->patched_cb) {
5715                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5716                                 (u32) patched_cb_handle);
5717                 rc = -EFAULT;
5718                 goto out;
5719         }
5720
5721         rc = gaudi_patch_cb(hdev, parser);
5722
5723         if (rc)
5724                 hl_cb_put(parser->patched_cb);
5725
5726 out:
5727         /*
5728          * Always call cb destroy here because we still have 1 reference
5729          * to it by calling cb_get earlier. After the job will be completed,
5730          * cb_put will release it, but here we want to remove it from the
5731          * idr
5732          */
5733         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5734                                 patched_cb_handle << PAGE_SHIFT);
5735
5736 free_userptr:
5737         if (rc)
5738                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5739         return rc;
5740 }
5741
5742 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5743                                         struct hl_cs_parser *parser)
5744 {
5745         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5746         struct gaudi_device *gaudi = hdev->asic_specific;
5747         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5748                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5749
5750         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5751                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5752                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5753                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5754                                 parser->hw_queue_id);
5755                 return -EINVAL;
5756         }
5757
5758         /* For internal queue jobs just check if CB address is valid */
5759         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5760                                         parser->user_cb_size,
5761                                         asic_prop->sram_user_base_address,
5762                                         asic_prop->sram_end_address))
5763                 return 0;
5764
5765         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5766                                         parser->user_cb_size,
5767                                         asic_prop->dram_user_base_address,
5768                                         asic_prop->dram_end_address))
5769                 return 0;
5770
5771         /* PMMU and HPMMU addresses are equal, check only one of them */
5772         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5773                                         parser->user_cb_size,
5774                                         asic_prop->pmmu.start_addr,
5775                                         asic_prop->pmmu.end_addr))
5776                 return 0;
5777
5778         dev_err(hdev->dev,
5779                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5780                 parser->user_cb, parser->user_cb_size);
5781
5782         return -EFAULT;
5783 }
5784
5785 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5786 {
5787         struct gaudi_device *gaudi = hdev->asic_specific;
5788
5789         if (parser->queue_type == QUEUE_TYPE_INT)
5790                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5791
5792         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5793                 return gaudi_parse_cb_mmu(hdev, parser);
5794         else
5795                 return gaudi_parse_cb_no_mmu(hdev, parser);
5796 }
5797
5798 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5799                                         void *kernel_address, u32 len,
5800                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5801                                         bool eb)
5802 {
5803         struct gaudi_device *gaudi = hdev->asic_specific;
5804         struct packet_msg_prot *cq_pkt;
5805         u64 msi_addr;
5806         u32 tmp;
5807
5808         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5809
5810         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5811         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5812
5813         if (eb)
5814                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5815
5816         cq_pkt->ctl = cpu_to_le32(tmp);
5817         cq_pkt->value = cpu_to_le32(cq_val);
5818         cq_pkt->addr = cpu_to_le64(cq_addr);
5819
5820         cq_pkt++;
5821
5822         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5823         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5824         cq_pkt->ctl = cpu_to_le32(tmp);
5825         cq_pkt->value = cpu_to_le32(1);
5826
5827         if (gaudi->multi_msi_mode)
5828                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5829         else
5830                 msi_addr = mmPCIE_CORE_MSI_REQ;
5831
5832         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5833 }
5834
5835 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5836 {
5837         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5838 }
5839
5840 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5841                                         u32 size, u64 val)
5842 {
5843         struct packet_lin_dma *lin_dma_pkt;
5844         struct hl_cs_job *job;
5845         u32 cb_size, ctl, err_cause;
5846         struct hl_cb *cb;
5847         u64 id;
5848         int rc;
5849
5850         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5851         if (!cb)
5852                 return -EFAULT;
5853
5854         lin_dma_pkt = cb->kernel_address;
5855         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5856         cb_size = sizeof(*lin_dma_pkt);
5857
5858         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5859         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5860         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5861         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5862         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5863
5864         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5865         lin_dma_pkt->src_addr = cpu_to_le64(val);
5866         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5867         lin_dma_pkt->tsize = cpu_to_le32(size);
5868
5869         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5870         if (!job) {
5871                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5872                 rc = -ENOMEM;
5873                 goto release_cb;
5874         }
5875
5876         /* Verify DMA is OK */
5877         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5878         if (err_cause && !hdev->init_done) {
5879                 dev_dbg(hdev->dev,
5880                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5881                         err_cause);
5882                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5883         }
5884
5885         job->id = 0;
5886         job->user_cb = cb;
5887         atomic_inc(&job->user_cb->cs_cnt);
5888         job->user_cb_size = cb_size;
5889         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5890         job->patched_cb = job->user_cb;
5891         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5892
5893         hl_debugfs_add_job(hdev, job);
5894
5895         rc = gaudi_send_job_on_qman0(hdev, job);
5896         hl_debugfs_remove_job(hdev, job);
5897         kfree(job);
5898         atomic_dec(&cb->cs_cnt);
5899
5900         /* Verify DMA is OK */
5901         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5902         if (err_cause) {
5903                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5904                 rc = -EIO;
5905                 if (!hdev->init_done) {
5906                         dev_dbg(hdev->dev,
5907                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5908                                 err_cause);
5909                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5910                 }
5911         }
5912
5913 release_cb:
5914         id = cb->id;
5915         hl_cb_put(cb);
5916         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5917
5918         return rc;
5919 }
5920
5921 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5922                                         u32 num_regs, u32 val)
5923 {
5924         struct packet_msg_long *pkt;
5925         struct hl_cs_job *job;
5926         u32 cb_size, ctl;
5927         struct hl_cb *cb;
5928         int i, rc;
5929
5930         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5931
5932         if (cb_size > SZ_2M) {
5933                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5934                 return -ENOMEM;
5935         }
5936
5937         cb = hl_cb_kernel_create(hdev, cb_size, false);
5938         if (!cb)
5939                 return -EFAULT;
5940
5941         pkt = cb->kernel_address;
5942
5943         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5944         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5945         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5946         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5947         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5948
5949         for (i = 0; i < num_regs ; i++, pkt++) {
5950                 pkt->ctl = cpu_to_le32(ctl);
5951                 pkt->value = cpu_to_le32(val);
5952                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5953         }
5954
5955         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5956         if (!job) {
5957                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5958                 rc = -ENOMEM;
5959                 goto release_cb;
5960         }
5961
5962         job->id = 0;
5963         job->user_cb = cb;
5964         atomic_inc(&job->user_cb->cs_cnt);
5965         job->user_cb_size = cb_size;
5966         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5967         job->patched_cb = job->user_cb;
5968         job->job_cb_size = cb_size;
5969
5970         hl_debugfs_add_job(hdev, job);
5971
5972         rc = gaudi_send_job_on_qman0(hdev, job);
5973         hl_debugfs_remove_job(hdev, job);
5974         kfree(job);
5975         atomic_dec(&cb->cs_cnt);
5976
5977 release_cb:
5978         hl_cb_put(cb);
5979         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5980
5981         return rc;
5982 }
5983
5984 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5985 {
5986         u64 base_addr;
5987         u32 num_regs;
5988         int rc;
5989
5990         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5991         num_regs = NUM_OF_SOB_IN_BLOCK;
5992         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993         if (rc) {
5994                 dev_err(hdev->dev, "failed resetting SM registers");
5995                 return -ENOMEM;
5996         }
5997
5998         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5999         num_regs = NUM_OF_SOB_IN_BLOCK;
6000         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001         if (rc) {
6002                 dev_err(hdev->dev, "failed resetting SM registers");
6003                 return -ENOMEM;
6004         }
6005
6006         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6007         num_regs = NUM_OF_SOB_IN_BLOCK;
6008         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6009         if (rc) {
6010                 dev_err(hdev->dev, "failed resetting SM registers");
6011                 return -ENOMEM;
6012         }
6013
6014         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6015         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6016         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6017         if (rc) {
6018                 dev_err(hdev->dev, "failed resetting SM registers");
6019                 return -ENOMEM;
6020         }
6021
6022         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6023         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6024         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6025         if (rc) {
6026                 dev_err(hdev->dev, "failed resetting SM registers");
6027                 return -ENOMEM;
6028         }
6029
6030         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6031         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6032         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6033         if (rc) {
6034                 dev_err(hdev->dev, "failed resetting SM registers");
6035                 return -ENOMEM;
6036         }
6037
6038         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6039                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6040         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6041         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6042         if (rc) {
6043                 dev_err(hdev->dev, "failed resetting SM registers");
6044                 return -ENOMEM;
6045         }
6046
6047         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6048                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6049         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6050         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6051         if (rc) {
6052                 dev_err(hdev->dev, "failed resetting SM registers");
6053                 return -ENOMEM;
6054         }
6055
6056         return 0;
6057 }
6058
6059 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6060 {
6061         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6062                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6063         int i;
6064
6065         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6066                 u64 sob_addr = CFG_BASE +
6067                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6068                                 (i * sob_delta);
6069                 u32 dma_offset = i * DMA_CORE_OFFSET;
6070
6071                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6072                                 lower_32_bits(sob_addr));
6073                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6074                                 upper_32_bits(sob_addr));
6075                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6076
6077                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6078                  * modified by the user for SRAM reduction
6079                  */
6080                 if (i > 1)
6081                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6082                                                                 0x00000001);
6083         }
6084 }
6085
6086 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6087 {
6088         u32 qman_offset;
6089         int i;
6090
6091         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6092                 qman_offset = i * DMA_QMAN_OFFSET;
6093                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6094         }
6095
6096         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6097                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6098                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6099         }
6100
6101         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6102                 qman_offset = i * TPC_QMAN_OFFSET;
6103                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6104         }
6105
6106         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6107                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6108                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6109                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6110         }
6111 }
6112
6113 static int gaudi_restore_user_registers(struct hl_device *hdev)
6114 {
6115         int rc;
6116
6117         rc = gaudi_restore_sm_registers(hdev);
6118         if (rc)
6119                 return rc;
6120
6121         gaudi_restore_dma_registers(hdev);
6122         gaudi_restore_qm_registers(hdev);
6123
6124         return 0;
6125 }
6126
6127 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6128 {
6129         return 0;
6130 }
6131
6132 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6133 {
6134         struct asic_fixed_properties *prop = &hdev->asic_prop;
6135         struct gaudi_device *gaudi = hdev->asic_specific;
6136         u64 addr = prop->mmu_pgt_addr;
6137         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6138
6139         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6140                 return 0;
6141
6142         return gaudi_memset_device_memory(hdev, addr, size, 0);
6143 }
6144
6145 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6146 {
6147
6148 }
6149
6150 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6151                         bool user_address, u32 *val)
6152 {
6153         struct asic_fixed_properties *prop = &hdev->asic_prop;
6154         struct gaudi_device *gaudi = hdev->asic_specific;
6155         u64 hbm_bar_addr, host_phys_end;
6156         int rc = 0;
6157
6158         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6159
6160         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6161
6162                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6163                                 (hdev->clock_gating_mask &
6164                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6165
6166                         dev_err_ratelimited(hdev->dev,
6167                                 "Can't read register - clock gating is enabled!\n");
6168                         rc = -EFAULT;
6169                 } else {
6170                         *val = RREG32(addr - CFG_BASE);
6171                 }
6172
6173         } else if ((addr >= SRAM_BASE_ADDR) &&
6174                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6175                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6176                                 (addr - SRAM_BASE_ADDR));
6177         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6178                 u64 bar_base_addr = DRAM_PHYS_BASE +
6179                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6180
6181                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6182                 if (hbm_bar_addr != U64_MAX) {
6183                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6184                                                 (addr - bar_base_addr));
6185
6186                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6187                                                 hbm_bar_addr);
6188                 }
6189                 if (hbm_bar_addr == U64_MAX)
6190                         rc = -EIO;
6191         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6192                         user_address && !iommu_present(&pci_bus_type)) {
6193                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6194         } else {
6195                 rc = -EFAULT;
6196         }
6197
6198         return rc;
6199 }
6200
6201 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6202                         bool user_address, u32 val)
6203 {
6204         struct asic_fixed_properties *prop = &hdev->asic_prop;
6205         struct gaudi_device *gaudi = hdev->asic_specific;
6206         u64 hbm_bar_addr, host_phys_end;
6207         int rc = 0;
6208
6209         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6210
6211         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6212
6213                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6214                                 (hdev->clock_gating_mask &
6215                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6216
6217                         dev_err_ratelimited(hdev->dev,
6218                                 "Can't write register - clock gating is enabled!\n");
6219                         rc = -EFAULT;
6220                 } else {
6221                         WREG32(addr - CFG_BASE, val);
6222                 }
6223
6224         } else if ((addr >= SRAM_BASE_ADDR) &&
6225                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6226                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6227                                         (addr - SRAM_BASE_ADDR));
6228         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6229                 u64 bar_base_addr = DRAM_PHYS_BASE +
6230                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6231
6232                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6233                 if (hbm_bar_addr != U64_MAX) {
6234                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6235                                                 (addr - bar_base_addr));
6236
6237                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6238                                                 hbm_bar_addr);
6239                 }
6240                 if (hbm_bar_addr == U64_MAX)
6241                         rc = -EIO;
6242         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6243                         user_address && !iommu_present(&pci_bus_type)) {
6244                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6245         } else {
6246                 rc = -EFAULT;
6247         }
6248
6249         return rc;
6250 }
6251
6252 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6253                                 bool user_address, u64 *val)
6254 {
6255         struct asic_fixed_properties *prop = &hdev->asic_prop;
6256         struct gaudi_device *gaudi = hdev->asic_specific;
6257         u64 hbm_bar_addr, host_phys_end;
6258         int rc = 0;
6259
6260         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6261
6262         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6263
6264                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6265                                 (hdev->clock_gating_mask &
6266                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6267
6268                         dev_err_ratelimited(hdev->dev,
6269                                 "Can't read register - clock gating is enabled!\n");
6270                         rc = -EFAULT;
6271                 } else {
6272                         u32 val_l = RREG32(addr - CFG_BASE);
6273                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6274
6275                         *val = (((u64) val_h) << 32) | val_l;
6276                 }
6277
6278         } else if ((addr >= SRAM_BASE_ADDR) &&
6279                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6280                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6281                                 (addr - SRAM_BASE_ADDR));
6282         } else if (addr <=
6283                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6284                 u64 bar_base_addr = DRAM_PHYS_BASE +
6285                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6286
6287                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6288                 if (hbm_bar_addr != U64_MAX) {
6289                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6290                                                 (addr - bar_base_addr));
6291
6292                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6293                                                 hbm_bar_addr);
6294                 }
6295                 if (hbm_bar_addr == U64_MAX)
6296                         rc = -EIO;
6297         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6298                         user_address && !iommu_present(&pci_bus_type)) {
6299                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6300         } else {
6301                 rc = -EFAULT;
6302         }
6303
6304         return rc;
6305 }
6306
6307 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6308                                 bool user_address, u64 val)
6309 {
6310         struct asic_fixed_properties *prop = &hdev->asic_prop;
6311         struct gaudi_device *gaudi = hdev->asic_specific;
6312         u64 hbm_bar_addr, host_phys_end;
6313         int rc = 0;
6314
6315         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6316
6317         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6318
6319                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6320                                 (hdev->clock_gating_mask &
6321                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6322
6323                         dev_err_ratelimited(hdev->dev,
6324                                 "Can't write register - clock gating is enabled!\n");
6325                         rc = -EFAULT;
6326                 } else {
6327                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6328                         WREG32(addr + sizeof(u32) - CFG_BASE,
6329                                 upper_32_bits(val));
6330                 }
6331
6332         } else if ((addr >= SRAM_BASE_ADDR) &&
6333                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6334                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6335                                         (addr - SRAM_BASE_ADDR));
6336         } else if (addr <=
6337                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6338                 u64 bar_base_addr = DRAM_PHYS_BASE +
6339                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6340
6341                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6342                 if (hbm_bar_addr != U64_MAX) {
6343                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6344                                                 (addr - bar_base_addr));
6345
6346                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6347                                                 hbm_bar_addr);
6348                 }
6349                 if (hbm_bar_addr == U64_MAX)
6350                         rc = -EIO;
6351         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6352                         user_address && !iommu_present(&pci_bus_type)) {
6353                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6354         } else {
6355                 rc = -EFAULT;
6356         }
6357
6358         return rc;
6359 }
6360
6361 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6362                                         u32 size_to_dma, dma_addr_t dma_addr)
6363 {
6364         u32 err_cause, val;
6365         u64 dma_offset;
6366         int rc;
6367
6368         dma_offset = dma_id * DMA_CORE_OFFSET;
6369
6370         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6371         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6372         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6373         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6374         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6375         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6376                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6377
6378         rc = hl_poll_timeout(
6379                 hdev,
6380                 mmDMA0_CORE_STS0 + dma_offset,
6381                 val,
6382                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6383                 0,
6384                 1000000);
6385
6386         if (rc) {
6387                 dev_err(hdev->dev,
6388                         "DMA %d timed-out during reading of 0x%llx\n",
6389                         dma_id, addr);
6390                 return -EIO;
6391         }
6392
6393         /* Verify DMA is OK */
6394         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6395         if (err_cause) {
6396                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6397                 dev_dbg(hdev->dev,
6398                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6399                         err_cause);
6400                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6401
6402                 return -EIO;
6403         }
6404
6405         return 0;
6406 }
6407
6408 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6409                                 void *blob_addr)
6410 {
6411         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6412         struct gaudi_device *gaudi = hdev->asic_specific;
6413         u64 dma_offset, qm_offset;
6414         dma_addr_t dma_addr;
6415         void *kernel_addr;
6416         bool is_eng_idle;
6417         int rc = 0, dma_id;
6418
6419         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6420                                                 hdev, SZ_2M,
6421                                                 &dma_addr,
6422                                                 GFP_KERNEL | __GFP_ZERO);
6423
6424         if (!kernel_addr)
6425                 return -ENOMEM;
6426
6427         mutex_lock(&gaudi->clk_gate_mutex);
6428
6429         hdev->asic_funcs->disable_clock_gating(hdev);
6430
6431         hdev->asic_funcs->hw_queues_lock(hdev);
6432
6433         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6434         dma_offset = dma_id * DMA_CORE_OFFSET;
6435         qm_offset = dma_id * DMA_QMAN_OFFSET;
6436         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6437         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6438
6439         if (!is_eng_idle) {
6440                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6441                 dma_offset = dma_id * DMA_CORE_OFFSET;
6442                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6443                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6444                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6445
6446                 if (!is_eng_idle) {
6447                         dev_err_ratelimited(hdev->dev,
6448                                 "Can't read via DMA because it is BUSY\n");
6449                         rc = -EAGAIN;
6450                         goto out;
6451                 }
6452         }
6453
6454         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6455         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6456                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6457
6458         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6459          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6460          * ASID
6461          */
6462         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6463
6464         /* Verify DMA is OK */
6465         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466         if (err_cause) {
6467                 dev_dbg(hdev->dev,
6468                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6469                         err_cause);
6470                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6471         }
6472
6473         pos = 0;
6474         size_left = size;
6475         size_to_dma = SZ_2M;
6476
6477         while (size_left > 0) {
6478
6479                 if (size_left < SZ_2M)
6480                         size_to_dma = size_left;
6481
6482                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6483                                                 dma_addr);
6484                 if (rc)
6485                         break;
6486
6487                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6488
6489                 if (size_left <= SZ_2M)
6490                         break;
6491
6492                 pos += SZ_2M;
6493                 addr += SZ_2M;
6494                 size_left -= SZ_2M;
6495         }
6496
6497         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6498          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6499          * ASID
6500          */
6501         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6502                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6503
6504         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6505
6506 out:
6507         hdev->asic_funcs->hw_queues_unlock(hdev);
6508
6509         hdev->asic_funcs->set_clock_gating(hdev);
6510
6511         mutex_unlock(&gaudi->clk_gate_mutex);
6512
6513         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6514                                                 dma_addr);
6515
6516         return rc;
6517 }
6518
6519 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6520 {
6521         struct gaudi_device *gaudi = hdev->asic_specific;
6522
6523         if (hdev->hard_reset_pending)
6524                 return U64_MAX;
6525
6526         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6527                         (addr - gaudi->hbm_bar_cur_addr));
6528 }
6529
6530 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6531 {
6532         struct gaudi_device *gaudi = hdev->asic_specific;
6533
6534         if (hdev->hard_reset_pending)
6535                 return;
6536
6537         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6538                         (addr - gaudi->hbm_bar_cur_addr));
6539 }
6540
6541 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6542 {
6543         /* mask to zero the MMBP and ASID bits */
6544         WREG32_AND(reg, ~0x7FF);
6545         WREG32_OR(reg, asid);
6546 }
6547
6548 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6549 {
6550         struct gaudi_device *gaudi = hdev->asic_specific;
6551
6552         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6553                 return;
6554
6555         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6556                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6557                 return;
6558         }
6559
6560         mutex_lock(&gaudi->clk_gate_mutex);
6561
6562         hdev->asic_funcs->disable_clock_gating(hdev);
6563
6564         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569
6570         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575
6576         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581
6582         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587
6588         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593
6594         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6599
6600         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605
6606         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6611
6612         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6620
6621         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6628
6629         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6636
6637         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6644
6645         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6652
6653         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6657         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6660
6661         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6665         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6668
6669         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6671         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6672         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6673         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6676
6677         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6680         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6681         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6684
6685         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6686         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6687         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6688         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6689         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6690         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6691         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6692         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6693         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6694         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6695
6696         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6697         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6698         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6699         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6700         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6701         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6702         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6703         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6704         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6705         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6706         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6707         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6708
6709         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6710                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6711                                 asid);
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6713                                 asid);
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6719                                 asid);
6720         }
6721
6722         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6723                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6724                                 asid);
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6726                                 asid);
6727                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6728                                 asid);
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6732                                 asid);
6733         }
6734
6735         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6736                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6737                                 asid);
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6745                                 asid);
6746         }
6747
6748         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6749                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6750                                 asid);
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6752                                 asid);
6753                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6754                                 asid);
6755                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6756                                 asid);
6757                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6758                                 asid);
6759         }
6760
6761         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6762                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6763                                 asid);
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6765                                 asid);
6766                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6767                                 asid);
6768                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6769                                 asid);
6770                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6771                                 asid);
6772         }
6773
6774         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6775                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6776                                 asid);
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6778                                 asid);
6779                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6780                                 asid);
6781                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6782                                 asid);
6783                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6784                                 asid);
6785         }
6786
6787         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6788                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6789                                 asid);
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6791                                 asid);
6792                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6793                                 asid);
6794                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6795                                 asid);
6796                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6797                                 asid);
6798         }
6799
6800         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6801                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6802                                 asid);
6803                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6804                                 asid);
6805                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6806                                 asid);
6807                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6808                                 asid);
6809                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6810                                 asid);
6811         }
6812
6813         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6814                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6815                                 asid);
6816                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6817                                 asid);
6818                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6819                                 asid);
6820                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6821                                 asid);
6822                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6823                                 asid);
6824         }
6825
6826         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6827                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6828                                 asid);
6829                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6830                                 asid);
6831                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6832                                 asid);
6833                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6834                                 asid);
6835                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6836                                 asid);
6837         }
6838
6839         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6840         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6841
6842         hdev->asic_funcs->set_clock_gating(hdev);
6843
6844         mutex_unlock(&gaudi->clk_gate_mutex);
6845 }
6846
6847 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6848                 struct hl_cs_job *job)
6849 {
6850         struct packet_msg_prot *fence_pkt;
6851         u32 *fence_ptr;
6852         dma_addr_t fence_dma_addr;
6853         struct hl_cb *cb;
6854         u32 tmp, timeout, dma_offset;
6855         int rc;
6856
6857         if (hdev->pldm)
6858                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6859         else
6860                 timeout = HL_DEVICE_TIMEOUT_USEC;
6861
6862         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6863                 dev_err_ratelimited(hdev->dev,
6864                         "Can't send driver job on QMAN0 because the device is not idle\n");
6865                 return -EBUSY;
6866         }
6867
6868         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6869                                                         &fence_dma_addr);
6870         if (!fence_ptr) {
6871                 dev_err(hdev->dev,
6872                         "Failed to allocate fence memory for QMAN0\n");
6873                 return -ENOMEM;
6874         }
6875
6876         cb = job->patched_cb;
6877
6878         fence_pkt = cb->kernel_address +
6879                         job->job_cb_size - sizeof(struct packet_msg_prot);
6880
6881         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6882         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6883         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6884
6885         fence_pkt->ctl = cpu_to_le32(tmp);
6886         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6887         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6888
6889         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6890
6891         WREG32(mmDMA0_CORE_PROT + dma_offset,
6892                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6893
6894         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6895                                         job->job_cb_size, cb->bus_address);
6896         if (rc) {
6897                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6898                 goto free_fence_ptr;
6899         }
6900
6901         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6902                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6903                                 timeout, true);
6904
6905         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6906
6907         if (rc == -ETIMEDOUT) {
6908                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6909                 goto free_fence_ptr;
6910         }
6911
6912 free_fence_ptr:
6913         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6914
6915         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6916                                         fence_dma_addr);
6917         return rc;
6918 }
6919
6920 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6921 {
6922         if (event_type >= GAUDI_EVENT_SIZE)
6923                 goto event_not_supported;
6924
6925         if (!gaudi_irq_map_table[event_type].valid)
6926                 goto event_not_supported;
6927
6928         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6929
6930         return;
6931
6932 event_not_supported:
6933         snprintf(desc, size, "N/A");
6934 }
6935
6936 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6937                                                         u32 x_y, bool is_write)
6938 {
6939         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6940
6941         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6942                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6943
6944         switch (x_y) {
6945         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6946         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6947                 dma_id[0] = 0;
6948                 dma_id[1] = 2;
6949                 break;
6950         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6951         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6952                 dma_id[0] = 1;
6953                 dma_id[1] = 3;
6954                 break;
6955         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6956         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6957                 dma_id[0] = 4;
6958                 dma_id[1] = 6;
6959                 break;
6960         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6961         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6962                 dma_id[0] = 5;
6963                 dma_id[1] = 7;
6964                 break;
6965         default:
6966                 goto unknown_initiator;
6967         }
6968
6969         for (i = 0 ; i < 2 ; i++) {
6970                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6971                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6972         }
6973
6974         switch (x_y) {
6975         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6976         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6977                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6978                         return "DMA0";
6979                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6980                         return "DMA2";
6981                 else
6982                         return "DMA0 or DMA2";
6983         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6984         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6985                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6986                         return "DMA1";
6987                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6988                         return "DMA3";
6989                 else
6990                         return "DMA1 or DMA3";
6991         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6992         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6993                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6994                         return "DMA4";
6995                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6996                         return "DMA6";
6997                 else
6998                         return "DMA4 or DMA6";
6999         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7000         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7001                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7002                         return "DMA5";
7003                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7004                         return "DMA7";
7005                 else
7006                         return "DMA5 or DMA7";
7007         }
7008
7009 unknown_initiator:
7010         return "unknown initiator";
7011 }
7012
7013 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7014                                                         bool is_write)
7015 {
7016         u32 val, x_y, axi_id;
7017
7018         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7019                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
7020         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7021                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7022         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7023                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7024
7025         switch (x_y) {
7026         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7027                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7028                         return "TPC0";
7029                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7030                         return "NIC0";
7031                 break;
7032         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7033                 return "TPC1";
7034         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7035         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7036                 return "MME0";
7037         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7038         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7039                 return "MME1";
7040         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7041                 return "TPC2";
7042         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7043                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7044                         return "TPC3";
7045                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7046                         return "PCI";
7047                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7048                         return "CPU";
7049                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7050                         return "PSOC";
7051                 break;
7052         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7053         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7054         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7055         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7056         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7057         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7058         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7059         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7060                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7061         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7062                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7063                         return "TPC4";
7064                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7065                         return "NIC1";
7066                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7067                         return "NIC2";
7068                 break;
7069         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7070                 return "TPC5";
7071         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7072         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7073                 return "MME2";
7074         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7075         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7076                 return "MME3";
7077         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7078                 return "TPC6";
7079         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7080                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7081                         return "TPC7";
7082                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7083                         return "NIC4";
7084                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7085                         return "NIC5";
7086                 break;
7087         default:
7088                 break;
7089         }
7090
7091         dev_err(hdev->dev,
7092                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7093                 val,
7094                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7095                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7096                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7097                         RAZWI_INITIATOR_AXI_ID_MASK);
7098
7099         return "unknown initiator";
7100 }
7101
7102 static void gaudi_print_razwi_info(struct hl_device *hdev)
7103 {
7104         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7105                 dev_err_ratelimited(hdev->dev,
7106                         "RAZWI event caused by illegal write of %s\n",
7107                         gaudi_get_razwi_initiator_name(hdev, true));
7108                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7109         }
7110
7111         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7112                 dev_err_ratelimited(hdev->dev,
7113                         "RAZWI event caused by illegal read of %s\n",
7114                         gaudi_get_razwi_initiator_name(hdev, false));
7115                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7116         }
7117 }
7118
7119 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7120 {
7121         struct gaudi_device *gaudi = hdev->asic_specific;
7122         u64 addr;
7123         u32 val;
7124
7125         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7126                 return;
7127
7128         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7129         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7130                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7131                 addr <<= 32;
7132                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7133
7134                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7135                                         addr);
7136
7137                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7138         }
7139
7140         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7141         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7142                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7143                 addr <<= 32;
7144                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7145
7146                 dev_err_ratelimited(hdev->dev,
7147                                 "MMU access error on va 0x%llx\n", addr);
7148
7149                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7150         }
7151 }
7152
7153 /*
7154  *  +-------------------+------------------------------------------------------+
7155  *  | Configuration Reg |                     Description                      |
7156  *  |      Address      |                                                      |
7157  *  +-------------------+------------------------------------------------------+
7158  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7159  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7160  *  |                   |0xF34 memory wrappers 63:32                           |
7161  *  |                   |0xF38 memory wrappers 95:64                           |
7162  *  |                   |0xF3C memory wrappers 127:96                          |
7163  *  +-------------------+------------------------------------------------------+
7164  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7165  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7166  *  |                   |0xF44 memory wrappers 63:32                           |
7167  *  |                   |0xF48 memory wrappers 95:64                           |
7168  *  |                   |0xF4C memory wrappers 127:96                          |
7169  *  +-------------------+------------------------------------------------------+
7170  */
7171 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7172                 struct ecc_info_extract_params *params, u64 *ecc_address,
7173                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7174 {
7175         struct gaudi_device *gaudi = hdev->asic_specific;
7176         u32 i, num_mem_regs, reg, err_bit;
7177         u64 err_addr, err_word = 0;
7178         int rc = 0;
7179
7180         num_mem_regs = params->num_memories / 32 +
7181                         ((params->num_memories % 32) ? 1 : 0);
7182
7183         if (params->block_address >= CFG_BASE)
7184                 params->block_address -= CFG_BASE;
7185
7186         if (params->derr)
7187                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7188         else
7189                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7190
7191         if (params->disable_clock_gating) {
7192                 mutex_lock(&gaudi->clk_gate_mutex);
7193                 hdev->asic_funcs->disable_clock_gating(hdev);
7194         }
7195
7196         /* Set invalid wrapper index */
7197         *memory_wrapper_idx = 0xFF;
7198
7199         /* Iterate through memory wrappers, a single bit must be set */
7200         for (i = 0 ; i < num_mem_regs ; i++) {
7201                 err_addr += i * 4;
7202                 err_word = RREG32(err_addr);
7203                 if (err_word) {
7204                         err_bit = __ffs(err_word);
7205                         *memory_wrapper_idx = err_bit + (32 * i);
7206                         break;
7207                 }
7208         }
7209
7210         if (*memory_wrapper_idx == 0xFF) {
7211                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7212                 rc = -EINVAL;
7213                 goto enable_clk_gate;
7214         }
7215
7216         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7217                         *memory_wrapper_idx);
7218
7219         *ecc_address =
7220                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7221         *ecc_syndrom =
7222                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7223
7224         /* Clear error indication */
7225         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7226         if (params->derr)
7227                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7228         else
7229                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7230
7231         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7232
7233 enable_clk_gate:
7234         if (params->disable_clock_gating) {
7235                 hdev->asic_funcs->set_clock_gating(hdev);
7236
7237                 mutex_unlock(&gaudi->clk_gate_mutex);
7238         }
7239
7240         return rc;
7241 }
7242
7243 /*
7244  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7245  *
7246  * @idx: the current pi/ci value
7247  * @q_len: the queue length (power of 2)
7248  *
7249  * @return the cyclically decremented index
7250  */
7251 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7252 {
7253         u32 mask = q_len - 1;
7254
7255         /*
7256          * modular decrement is equivalent to adding (queue_size -1)
7257          * later we take LSBs to make sure the value is in the
7258          * range [0, queue_len - 1]
7259          */
7260         return (idx + q_len - 1) & mask;
7261 }
7262
7263 /**
7264  * gaudi_print_sw_config_stream_data - print SW config stream data
7265  *
7266  * @hdev: pointer to the habanalabs device structure
7267  * @stream: the QMAN's stream
7268  * @qman_base: base address of QMAN registers block
7269  */
7270 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7271                                                 u64 qman_base)
7272 {
7273         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7274         u32 cq_ptr_lo_off, size;
7275
7276         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7277
7278         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7279                                                 stream * cq_ptr_lo_off;
7280         cq_ptr_hi = cq_ptr_lo +
7281                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7282         cq_tsize = cq_ptr_lo +
7283                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284
7285         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7286         size = RREG32(cq_tsize);
7287         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7288                                                         stream, cq_ptr, size);
7289 }
7290
7291 /**
7292  * gaudi_print_last_pqes_on_err - print last PQEs on error
7293  *
7294  * @hdev: pointer to the habanalabs device structure
7295  * @qid_base: first QID of the QMAN (out of 4 streams)
7296  * @stream: the QMAN's stream
7297  * @qman_base: base address of QMAN registers block
7298  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7299  */
7300 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7301                                                 u32 stream, u64 qman_base,
7302                                                 bool pr_sw_conf)
7303 {
7304         u32 ci, qm_ci_stream_off, queue_len;
7305         struct hl_hw_queue *q;
7306         u64 pq_ci;
7307         int i;
7308
7309         q = &hdev->kernel_queues[qid_base + stream];
7310
7311         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7312         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7313                                                 stream * qm_ci_stream_off;
7314
7315         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7316                                         q->int_queue_len : HL_QUEUE_LENGTH;
7317
7318         hdev->asic_funcs->hw_queues_lock(hdev);
7319
7320         if (pr_sw_conf)
7321                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7322
7323         ci = RREG32(pq_ci);
7324
7325         /* we should start printing form ci -1 */
7326         ci = gaudi_queue_idx_dec(ci, queue_len);
7327
7328         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7329                 struct hl_bd *bd;
7330                 u64 addr;
7331                 u32 len;
7332
7333                 bd = q->kernel_address;
7334                 bd += ci;
7335
7336                 len = le32_to_cpu(bd->len);
7337                 /* len 0 means uninitialized entry- break */
7338                 if (!len)
7339                         break;
7340
7341                 addr = le64_to_cpu(bd->ptr);
7342
7343                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7344                                                         stream, ci, addr, len);
7345
7346                 /* get previous ci, wrap if needed */
7347                 ci = gaudi_queue_idx_dec(ci, queue_len);
7348         }
7349
7350         hdev->asic_funcs->hw_queues_unlock(hdev);
7351 }
7352
7353 /**
7354  * print_qman_data_on_err - extract QMAN data on error
7355  *
7356  * @hdev: pointer to the habanalabs device structure
7357  * @qid_base: first QID of the QMAN (out of 4 streams)
7358  * @stream: the QMAN's stream
7359  * @qman_base: base address of QMAN registers block
7360  *
7361  * This function attempt to exatract as much data as possible on QMAN error.
7362  * On upper CP print the SW config stream data and last 8 PQEs.
7363  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7364  */
7365 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7366                                                 u32 stream, u64 qman_base)
7367 {
7368         u32 i;
7369
7370         if (stream != QMAN_STREAMS) {
7371                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7372                                                                         true);
7373                 return;
7374         }
7375
7376         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7377
7378         for (i = 0; i < QMAN_STREAMS; i++)
7379                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7380                                                                         false);
7381 }
7382
7383 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7384                                           const char *qm_name,
7385                                           u64 qman_base,
7386                                           u32 qid_base)
7387 {
7388         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7389         u64 glbl_sts_addr, arb_err_addr;
7390         char reg_desc[32];
7391
7392         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7393         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7394
7395         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7396         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7397                 glbl_sts_clr_val = 0;
7398                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7399
7400                 if (!glbl_sts_val)
7401                         continue;
7402
7403                 if (i == QMAN_STREAMS)
7404                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7405                 else
7406                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7407
7408                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7409                         if (glbl_sts_val & BIT(j)) {
7410                                 dev_err_ratelimited(hdev->dev,
7411                                                 "%s %s. err cause: %s\n",
7412                                                 qm_name, reg_desc,
7413                                                 gaudi_qman_error_cause[j]);
7414                                 glbl_sts_clr_val |= BIT(j);
7415                         }
7416                 }
7417
7418                 /* Write 1 clear errors */
7419                 if (!hdev->stop_on_err)
7420                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7421                 else
7422                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7423         }
7424
7425         arb_err_val = RREG32(arb_err_addr);
7426
7427         if (!arb_err_val)
7428                 return;
7429
7430         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7431                 if (arb_err_val & BIT(j)) {
7432                         dev_err_ratelimited(hdev->dev,
7433                                         "%s ARB_ERR. err cause: %s\n",
7434                                         qm_name,
7435                                         gaudi_qman_arb_error_cause[j]);
7436                 }
7437         }
7438 }
7439
7440 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7441                 struct hl_eq_sm_sei_data *sei_data)
7442 {
7443         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7444
7445         /* Flip the bits as the enum is ordered in the opposite way */
7446         index = (index ^ 0x3) & 0x3;
7447
7448         switch (sei_data->sei_cause) {
7449         case SM_SEI_SO_OVERFLOW:
7450                 dev_err_ratelimited(hdev->dev,
7451                         "%s SEI Error: SOB Group %u overflow/underflow",
7452                         gaudi_sync_manager_names[index],
7453                         le32_to_cpu(sei_data->sei_log));
7454                 break;
7455         case SM_SEI_LBW_4B_UNALIGNED:
7456                 dev_err_ratelimited(hdev->dev,
7457                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7458                         gaudi_sync_manager_names[index],
7459                         le32_to_cpu(sei_data->sei_log));
7460                 break;
7461         case SM_SEI_AXI_RESPONSE_ERR:
7462                 dev_err_ratelimited(hdev->dev,
7463                         "%s SEI Error: AXI ID %u response error",
7464                         gaudi_sync_manager_names[index],
7465                         le32_to_cpu(sei_data->sei_log));
7466                 break;
7467         default:
7468                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7469                                 le32_to_cpu(sei_data->sei_log));
7470                 break;
7471         }
7472 }
7473
7474 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7475                 struct hl_eq_ecc_data *ecc_data)
7476 {
7477         struct ecc_info_extract_params params;
7478         u64 ecc_address = 0, ecc_syndrom = 0;
7479         u8 index, memory_wrapper_idx = 0;
7480         bool extract_info_from_fw;
7481         int rc;
7482
7483         if (hdev->asic_prop.fw_security_enabled) {
7484                 extract_info_from_fw = true;
7485                 goto extract_ecc_info;
7486         }
7487
7488         switch (event_type) {
7489         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7490         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7491                 extract_info_from_fw = true;
7492                 break;
7493         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7494                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7495                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7496                 params.num_memories = 90;
7497                 params.derr = false;
7498                 params.disable_clock_gating = true;
7499                 extract_info_from_fw = false;
7500                 break;
7501         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7502                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7503                 params.block_address =
7504                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7505                 params.num_memories = 90;
7506                 params.derr = true;
7507                 params.disable_clock_gating = true;
7508                 extract_info_from_fw = false;
7509                 break;
7510         case GAUDI_EVENT_MME0_ACC_SERR:
7511         case GAUDI_EVENT_MME1_ACC_SERR:
7512         case GAUDI_EVENT_MME2_ACC_SERR:
7513         case GAUDI_EVENT_MME3_ACC_SERR:
7514                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7515                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7516                 params.num_memories = 128;
7517                 params.derr = false;
7518                 params.disable_clock_gating = true;
7519                 extract_info_from_fw = false;
7520                 break;
7521         case GAUDI_EVENT_MME0_ACC_DERR:
7522         case GAUDI_EVENT_MME1_ACC_DERR:
7523         case GAUDI_EVENT_MME2_ACC_DERR:
7524         case GAUDI_EVENT_MME3_ACC_DERR:
7525                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7526                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7527                 params.num_memories = 128;
7528                 params.derr = true;
7529                 params.disable_clock_gating = true;
7530                 extract_info_from_fw = false;
7531                 break;
7532         case GAUDI_EVENT_MME0_SBAB_SERR:
7533         case GAUDI_EVENT_MME1_SBAB_SERR:
7534         case GAUDI_EVENT_MME2_SBAB_SERR:
7535         case GAUDI_EVENT_MME3_SBAB_SERR:
7536                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7537                 params.block_address =
7538                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7539                 params.num_memories = 33;
7540                 params.derr = false;
7541                 params.disable_clock_gating = true;
7542                 extract_info_from_fw = false;
7543                 break;
7544         case GAUDI_EVENT_MME0_SBAB_DERR:
7545         case GAUDI_EVENT_MME1_SBAB_DERR:
7546         case GAUDI_EVENT_MME2_SBAB_DERR:
7547         case GAUDI_EVENT_MME3_SBAB_DERR:
7548                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7549                 params.block_address =
7550                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7551                 params.num_memories = 33;
7552                 params.derr = true;
7553                 params.disable_clock_gating = true;
7554                 extract_info_from_fw = false;
7555                 break;
7556         default:
7557                 return;
7558         }
7559
7560 extract_ecc_info:
7561         if (extract_info_from_fw) {
7562                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7563                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7564                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7565         } else {
7566                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7567                                 &ecc_syndrom, &memory_wrapper_idx);
7568                 if (rc)
7569                         return;
7570         }
7571
7572         dev_err(hdev->dev,
7573                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7574                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7575 }
7576
7577 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7578 {
7579         u64 qman_base;
7580         char desc[32];
7581         u32 qid_base;
7582         u8 index;
7583
7584         switch (event_type) {
7585         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7586                 index = event_type - GAUDI_EVENT_TPC0_QM;
7587                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7588                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7589                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7590                 break;
7591         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7592                 index = event_type - GAUDI_EVENT_MME0_QM;
7593                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7594                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7595                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7596                 break;
7597         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7598                 index = event_type - GAUDI_EVENT_DMA0_QM;
7599                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7600                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7601                 if (index > 1)
7602                         qid_base++;
7603                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7604                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7605                 break;
7606         case GAUDI_EVENT_NIC0_QM0:
7607                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7608                 qman_base = mmNIC0_QM0_BASE;
7609                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7610                 break;
7611         case GAUDI_EVENT_NIC0_QM1:
7612                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7613                 qman_base = mmNIC0_QM1_BASE;
7614                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7615                 break;
7616         case GAUDI_EVENT_NIC1_QM0:
7617                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7618                 qman_base = mmNIC1_QM0_BASE;
7619                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7620                 break;
7621         case GAUDI_EVENT_NIC1_QM1:
7622                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7623                 qman_base = mmNIC1_QM1_BASE;
7624                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7625                 break;
7626         case GAUDI_EVENT_NIC2_QM0:
7627                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7628                 qman_base = mmNIC2_QM0_BASE;
7629                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7630                 break;
7631         case GAUDI_EVENT_NIC2_QM1:
7632                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7633                 qman_base = mmNIC2_QM1_BASE;
7634                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7635                 break;
7636         case GAUDI_EVENT_NIC3_QM0:
7637                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7638                 qman_base = mmNIC3_QM0_BASE;
7639                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7640                 break;
7641         case GAUDI_EVENT_NIC3_QM1:
7642                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7643                 qman_base = mmNIC3_QM1_BASE;
7644                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7645                 break;
7646         case GAUDI_EVENT_NIC4_QM0:
7647                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7648                 qman_base = mmNIC4_QM0_BASE;
7649                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7650                 break;
7651         case GAUDI_EVENT_NIC4_QM1:
7652                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7653                 qman_base = mmNIC4_QM1_BASE;
7654                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7655                 break;
7656         default:
7657                 return;
7658         }
7659
7660         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7661 }
7662
7663 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7664                                         bool razwi)
7665 {
7666         char desc[64] = "";
7667
7668         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7669         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7670                 event_type, desc);
7671
7672         if (razwi) {
7673                 gaudi_print_razwi_info(hdev);
7674                 gaudi_print_mmu_error_info(hdev);
7675         }
7676 }
7677
7678 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7679                                         struct cpucp_pkt_sync_err *sync_err)
7680 {
7681         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7682
7683         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7684                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7685 }
7686
7687 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7688                                         struct hl_eq_fw_alive *fw_alive)
7689 {
7690         dev_err(hdev->dev,
7691                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7692                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7693                 "Minor" : "Critical", fw_alive->process_id,
7694                 fw_alive->thread_id, fw_alive->uptime_seconds);
7695 }
7696
7697 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7698 {
7699         struct gaudi_device *gaudi = hdev->asic_specific;
7700
7701         /* Unmask all IRQs since some could have been received
7702          * during the soft reset
7703          */
7704         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7705 }
7706
7707 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7708                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7709 {
7710         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7711         int rc = 0;
7712
7713         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7714                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7715                 if (!hbm_ecc_data) {
7716                         dev_err(hdev->dev, "No FW ECC data");
7717                         return 0;
7718                 }
7719
7720                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7721                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7722                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7723                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7725                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7727                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7729                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7731                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7733                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734
7735                 dev_err(hdev->dev,
7736                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7737                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7738                 dev_err(hdev->dev,
7739                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7740                         device, ch, hbm_ecc_data->first_addr, type,
7741                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7742                         hbm_ecc_data->dec_cnt);
7743                 return 0;
7744         }
7745
7746         if (hdev->asic_prop.fw_security_enabled) {
7747                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7748                 return 0;
7749         }
7750
7751         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7752         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7753                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7754                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7755                 if (val) {
7756                         rc = -EIO;
7757                         dev_err(hdev->dev,
7758                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7759                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7760                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7761                                 (val >> 4) & 0x1);
7762
7763                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7764                         dev_err(hdev->dev,
7765                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7766                                 device, ch * 2,
7767                                 RREG32(base + ch * 0x1000 + 0x064),
7768                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7769                                 (val2 & 0xFF0000) >> 16,
7770                                 (val2 & 0xFF000000) >> 24);
7771                 }
7772
7773                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7774                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7775                 if (val) {
7776                         rc = -EIO;
7777                         dev_err(hdev->dev,
7778                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7779                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7780                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7781                                 (val >> 4) & 0x1);
7782
7783                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7784                         dev_err(hdev->dev,
7785                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7786                                 device, ch * 2 + 1,
7787                                 RREG32(base + ch * 0x1000 + 0x074),
7788                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7789                                 (val2 & 0xFF0000) >> 16,
7790                                 (val2 & 0xFF000000) >> 24);
7791                 }
7792
7793                 /* Clear interrupts */
7794                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7795                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7796                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7797                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7798                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7799                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7800         }
7801
7802         val  = RREG32(base + 0x8F30);
7803         val2 = RREG32(base + 0x8F34);
7804         if (val | val2) {
7805                 rc = -EIO;
7806                 dev_err(hdev->dev,
7807                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7808                         device, val, val2);
7809         }
7810         val  = RREG32(base + 0x8F40);
7811         val2 = RREG32(base + 0x8F44);
7812         if (val | val2) {
7813                 rc = -EIO;
7814                 dev_err(hdev->dev,
7815                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7816                         device, val, val2);
7817         }
7818
7819         return rc;
7820 }
7821
7822 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7823 {
7824         switch (hbm_event_type) {
7825         case GAUDI_EVENT_HBM0_SPI_0:
7826         case GAUDI_EVENT_HBM0_SPI_1:
7827                 return 0;
7828         case GAUDI_EVENT_HBM1_SPI_0:
7829         case GAUDI_EVENT_HBM1_SPI_1:
7830                 return 1;
7831         case GAUDI_EVENT_HBM2_SPI_0:
7832         case GAUDI_EVENT_HBM2_SPI_1:
7833                 return 2;
7834         case GAUDI_EVENT_HBM3_SPI_0:
7835         case GAUDI_EVENT_HBM3_SPI_1:
7836                 return 3;
7837         default:
7838                 break;
7839         }
7840
7841         /* Should never happen */
7842         return 0;
7843 }
7844
7845 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7846                                         char *interrupt_name)
7847 {
7848         struct gaudi_device *gaudi = hdev->asic_specific;
7849         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7850         bool soft_reset_required = false;
7851
7852         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7853          * gating, and thus cannot be done in CPU-CP and should be done instead
7854          * by the driver.
7855          */
7856
7857         mutex_lock(&gaudi->clk_gate_mutex);
7858
7859         hdev->asic_funcs->disable_clock_gating(hdev);
7860
7861         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7862                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7863
7864         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7865                 if (tpc_interrupts_cause & BIT(i)) {
7866                         dev_err_ratelimited(hdev->dev,
7867                                         "TPC%d_%s interrupt cause: %s\n",
7868                                         tpc_id, interrupt_name,
7869                                         gaudi_tpc_interrupts_cause[i]);
7870                         /* If this is QM error, we need to soft-reset */
7871                         if (i == 15)
7872                                 soft_reset_required = true;
7873                 }
7874
7875         /* Clear interrupts */
7876         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7877
7878         hdev->asic_funcs->set_clock_gating(hdev);
7879
7880         mutex_unlock(&gaudi->clk_gate_mutex);
7881
7882         return soft_reset_required;
7883 }
7884
7885 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7886 {
7887         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7888 }
7889
7890 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7891 {
7892         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7893 }
7894
7895 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7896                                         u16 event_type)
7897 {
7898         switch (event_type) {
7899         case GAUDI_EVENT_FIX_POWER_ENV_S:
7900                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7901                 dev_info_ratelimited(hdev->dev,
7902                         "Clock throttling due to power consumption\n");
7903                 break;
7904
7905         case GAUDI_EVENT_FIX_POWER_ENV_E:
7906                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7907                 dev_info_ratelimited(hdev->dev,
7908                         "Power envelop is safe, back to optimal clock\n");
7909                 break;
7910
7911         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7912                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7913                 dev_info_ratelimited(hdev->dev,
7914                         "Clock throttling due to overheating\n");
7915                 break;
7916
7917         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7918                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7919                 dev_info_ratelimited(hdev->dev,
7920                         "Thermal envelop is safe, back to optimal clock\n");
7921                 break;
7922
7923         default:
7924                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7925                         event_type);
7926                 break;
7927         }
7928 }
7929
7930 static void gaudi_handle_eqe(struct hl_device *hdev,
7931                                 struct hl_eq_entry *eq_entry)
7932 {
7933         struct gaudi_device *gaudi = hdev->asic_specific;
7934         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7935         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7936                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7937         bool reset_required;
7938         u8 cause;
7939         int rc;
7940
7941         if (event_type >= GAUDI_EVENT_SIZE) {
7942                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7943                                 event_type, GAUDI_EVENT_SIZE - 1);
7944                 return;
7945         }
7946
7947         gaudi->events_stat[event_type]++;
7948         gaudi->events_stat_aggregate[event_type]++;
7949
7950         switch (event_type) {
7951         case GAUDI_EVENT_PCIE_CORE_DERR:
7952         case GAUDI_EVENT_PCIE_IF_DERR:
7953         case GAUDI_EVENT_PCIE_PHY_DERR:
7954         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7955         case GAUDI_EVENT_MME0_ACC_DERR:
7956         case GAUDI_EVENT_MME0_SBAB_DERR:
7957         case GAUDI_EVENT_MME1_ACC_DERR:
7958         case GAUDI_EVENT_MME1_SBAB_DERR:
7959         case GAUDI_EVENT_MME2_ACC_DERR:
7960         case GAUDI_EVENT_MME2_SBAB_DERR:
7961         case GAUDI_EVENT_MME3_ACC_DERR:
7962         case GAUDI_EVENT_MME3_SBAB_DERR:
7963         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7964                 fallthrough;
7965         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7966         case GAUDI_EVENT_PSOC_MEM_DERR:
7967         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7968         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7969         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7970         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7971         case GAUDI_EVENT_MMU_DERR:
7972         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7973                 gaudi_print_irq_info(hdev, event_type, true);
7974                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7975                 goto reset_device;
7976
7977         case GAUDI_EVENT_GIC500:
7978         case GAUDI_EVENT_AXI_ECC:
7979         case GAUDI_EVENT_L2_RAM_ECC:
7980         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7981                 gaudi_print_irq_info(hdev, event_type, false);
7982                 goto reset_device;
7983
7984         case GAUDI_EVENT_HBM0_SPI_0:
7985         case GAUDI_EVENT_HBM1_SPI_0:
7986         case GAUDI_EVENT_HBM2_SPI_0:
7987         case GAUDI_EVENT_HBM3_SPI_0:
7988                 gaudi_print_irq_info(hdev, event_type, false);
7989                 gaudi_hbm_read_interrupts(hdev,
7990                                 gaudi_hbm_event_to_dev(event_type),
7991                                 &eq_entry->hbm_ecc_data);
7992                 goto reset_device;
7993
7994         case GAUDI_EVENT_HBM0_SPI_1:
7995         case GAUDI_EVENT_HBM1_SPI_1:
7996         case GAUDI_EVENT_HBM2_SPI_1:
7997         case GAUDI_EVENT_HBM3_SPI_1:
7998                 gaudi_print_irq_info(hdev, event_type, false);
7999                 gaudi_hbm_read_interrupts(hdev,
8000                                 gaudi_hbm_event_to_dev(event_type),
8001                                 &eq_entry->hbm_ecc_data);
8002                 hl_fw_unmask_irq(hdev, event_type);
8003                 break;
8004
8005         case GAUDI_EVENT_TPC0_DEC:
8006         case GAUDI_EVENT_TPC1_DEC:
8007         case GAUDI_EVENT_TPC2_DEC:
8008         case GAUDI_EVENT_TPC3_DEC:
8009         case GAUDI_EVENT_TPC4_DEC:
8010         case GAUDI_EVENT_TPC5_DEC:
8011         case GAUDI_EVENT_TPC6_DEC:
8012         case GAUDI_EVENT_TPC7_DEC:
8013                 gaudi_print_irq_info(hdev, event_type, true);
8014                 reset_required = gaudi_tpc_read_interrupts(hdev,
8015                                         tpc_dec_event_to_tpc_id(event_type),
8016                                         "AXI_SLV_DEC_Error");
8017                 if (reset_required) {
8018                         dev_err(hdev->dev, "reset required due to %s\n",
8019                                 gaudi_irq_map_table[event_type].name);
8020
8021                         hl_device_reset(hdev, 0);
8022                 } else {
8023                         hl_fw_unmask_irq(hdev, event_type);
8024                 }
8025                 break;
8026
8027         case GAUDI_EVENT_TPC0_KRN_ERR:
8028         case GAUDI_EVENT_TPC1_KRN_ERR:
8029         case GAUDI_EVENT_TPC2_KRN_ERR:
8030         case GAUDI_EVENT_TPC3_KRN_ERR:
8031         case GAUDI_EVENT_TPC4_KRN_ERR:
8032         case GAUDI_EVENT_TPC5_KRN_ERR:
8033         case GAUDI_EVENT_TPC6_KRN_ERR:
8034         case GAUDI_EVENT_TPC7_KRN_ERR:
8035                 gaudi_print_irq_info(hdev, event_type, true);
8036                 reset_required = gaudi_tpc_read_interrupts(hdev,
8037                                         tpc_krn_event_to_tpc_id(event_type),
8038                                         "KRN_ERR");
8039                 if (reset_required) {
8040                         dev_err(hdev->dev, "reset required due to %s\n",
8041                                 gaudi_irq_map_table[event_type].name);
8042
8043                         hl_device_reset(hdev, 0);
8044                 } else {
8045                         hl_fw_unmask_irq(hdev, event_type);
8046                 }
8047                 break;
8048
8049         case GAUDI_EVENT_PCIE_CORE_SERR:
8050         case GAUDI_EVENT_PCIE_IF_SERR:
8051         case GAUDI_EVENT_PCIE_PHY_SERR:
8052         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8053         case GAUDI_EVENT_MME0_ACC_SERR:
8054         case GAUDI_EVENT_MME0_SBAB_SERR:
8055         case GAUDI_EVENT_MME1_ACC_SERR:
8056         case GAUDI_EVENT_MME1_SBAB_SERR:
8057         case GAUDI_EVENT_MME2_ACC_SERR:
8058         case GAUDI_EVENT_MME2_SBAB_SERR:
8059         case GAUDI_EVENT_MME3_ACC_SERR:
8060         case GAUDI_EVENT_MME3_SBAB_SERR:
8061         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8062         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8063         case GAUDI_EVENT_PSOC_MEM_SERR:
8064         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8065         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8066         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8067         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8068                 fallthrough;
8069         case GAUDI_EVENT_MMU_SERR:
8070                 gaudi_print_irq_info(hdev, event_type, true);
8071                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8072                 hl_fw_unmask_irq(hdev, event_type);
8073                 break;
8074
8075         case GAUDI_EVENT_PCIE_DEC:
8076         case GAUDI_EVENT_MME0_WBC_RSP:
8077         case GAUDI_EVENT_MME0_SBAB0_RSP:
8078         case GAUDI_EVENT_MME1_WBC_RSP:
8079         case GAUDI_EVENT_MME1_SBAB0_RSP:
8080         case GAUDI_EVENT_MME2_WBC_RSP:
8081         case GAUDI_EVENT_MME2_SBAB0_RSP:
8082         case GAUDI_EVENT_MME3_WBC_RSP:
8083         case GAUDI_EVENT_MME3_SBAB0_RSP:
8084         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8085         case GAUDI_EVENT_PSOC_AXI_DEC:
8086         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8087         case GAUDI_EVENT_MMU_PAGE_FAULT:
8088         case GAUDI_EVENT_MMU_WR_PERM:
8089         case GAUDI_EVENT_RAZWI_OR_ADC:
8090         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8091         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8092         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8093                 fallthrough;
8094         case GAUDI_EVENT_NIC0_QM0:
8095         case GAUDI_EVENT_NIC0_QM1:
8096         case GAUDI_EVENT_NIC1_QM0:
8097         case GAUDI_EVENT_NIC1_QM1:
8098         case GAUDI_EVENT_NIC2_QM0:
8099         case GAUDI_EVENT_NIC2_QM1:
8100         case GAUDI_EVENT_NIC3_QM0:
8101         case GAUDI_EVENT_NIC3_QM1:
8102         case GAUDI_EVENT_NIC4_QM0:
8103         case GAUDI_EVENT_NIC4_QM1:
8104         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8105                 gaudi_print_irq_info(hdev, event_type, true);
8106                 gaudi_handle_qman_err(hdev, event_type);
8107                 hl_fw_unmask_irq(hdev, event_type);
8108                 break;
8109
8110         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8111                 gaudi_print_irq_info(hdev, event_type, true);
8112                 goto reset_device;
8113
8114         case GAUDI_EVENT_TPC0_BMON_SPMU:
8115         case GAUDI_EVENT_TPC1_BMON_SPMU:
8116         case GAUDI_EVENT_TPC2_BMON_SPMU:
8117         case GAUDI_EVENT_TPC3_BMON_SPMU:
8118         case GAUDI_EVENT_TPC4_BMON_SPMU:
8119         case GAUDI_EVENT_TPC5_BMON_SPMU:
8120         case GAUDI_EVENT_TPC6_BMON_SPMU:
8121         case GAUDI_EVENT_TPC7_BMON_SPMU:
8122         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8123                 gaudi_print_irq_info(hdev, event_type, false);
8124                 hl_fw_unmask_irq(hdev, event_type);
8125                 break;
8126
8127         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8128                 gaudi_print_irq_info(hdev, event_type, false);
8129                 gaudi_print_sm_sei_info(hdev, event_type,
8130                                         &eq_entry->sm_sei_data);
8131                 rc = hl_state_dump(hdev);
8132                 if (rc)
8133                         dev_err(hdev->dev,
8134                                 "Error during system state dump %d\n", rc);
8135                 hl_fw_unmask_irq(hdev, event_type);
8136                 break;
8137
8138         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8139                 gaudi_print_clk_change_info(hdev, event_type);
8140                 hl_fw_unmask_irq(hdev, event_type);
8141                 break;
8142
8143         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8144                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8145                 dev_err(hdev->dev,
8146                         "Received high temp H/W interrupt %d (cause %d)\n",
8147                         event_type, cause);
8148                 break;
8149
8150         case GAUDI_EVENT_DEV_RESET_REQ:
8151                 gaudi_print_irq_info(hdev, event_type, false);
8152                 goto reset_device;
8153
8154         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8155                 gaudi_print_irq_info(hdev, event_type, false);
8156                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8157                 goto reset_device;
8158
8159         case GAUDI_EVENT_FW_ALIVE_S:
8160                 gaudi_print_irq_info(hdev, event_type, false);
8161                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8162                 goto reset_device;
8163
8164         default:
8165                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8166                                 event_type);
8167                 break;
8168         }
8169
8170         return;
8171
8172 reset_device:
8173         if (hdev->asic_prop.fw_security_enabled)
8174                 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
8175         else if (hdev->hard_reset_on_fw_events)
8176                 hl_device_reset(hdev, HL_RESET_HARD);
8177         else
8178                 hl_fw_unmask_irq(hdev, event_type);
8179 }
8180
8181 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8182                                         u32 *size)
8183 {
8184         struct gaudi_device *gaudi = hdev->asic_specific;
8185
8186         if (aggregate) {
8187                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8188                 return gaudi->events_stat_aggregate;
8189         }
8190
8191         *size = (u32) sizeof(gaudi->events_stat);
8192         return gaudi->events_stat;
8193 }
8194
8195 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8196                                         u32 flags)
8197 {
8198         struct gaudi_device *gaudi = hdev->asic_specific;
8199         u32 status, timeout_usec;
8200         int rc;
8201
8202         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8203                 hdev->hard_reset_pending)
8204                 return 0;
8205
8206         if (hdev->pldm)
8207                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8208         else
8209                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8210
8211         /* L0 & L1 invalidation */
8212         WREG32(mmSTLB_INV_PS, 3);
8213         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8214         WREG32(mmSTLB_INV_PS, 2);
8215
8216         rc = hl_poll_timeout(
8217                 hdev,
8218                 mmSTLB_INV_PS,
8219                 status,
8220                 !status,
8221                 1000,
8222                 timeout_usec);
8223
8224         WREG32(mmSTLB_INV_SET, 0);
8225
8226         if (rc) {
8227                 dev_err_ratelimited(hdev->dev,
8228                                         "MMU cache invalidation timeout\n");
8229                 hl_device_reset(hdev, HL_RESET_HARD);
8230         }
8231
8232         return rc;
8233 }
8234
8235 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8236                                                 bool is_hard, u32 flags,
8237                                                 u32 asid, u64 va, u64 size)
8238 {
8239         /* Treat as invalidate all because there is no range invalidation
8240          * in Gaudi
8241          */
8242         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8243 }
8244
8245 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8246                                         u32 asid, u64 phys_addr)
8247 {
8248         u32 status, timeout_usec;
8249         int rc;
8250
8251         if (hdev->pldm)
8252                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8253         else
8254                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8255
8256         WREG32(MMU_ASID, asid);
8257         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8258         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8259         WREG32(MMU_BUSY, 0x80000000);
8260
8261         rc = hl_poll_timeout(
8262                 hdev,
8263                 MMU_BUSY,
8264                 status,
8265                 !(status & 0x80000000),
8266                 1000,
8267                 timeout_usec);
8268
8269         if (rc) {
8270                 dev_err(hdev->dev,
8271                         "Timeout during MMU hop0 config of asid %d\n", asid);
8272                 return rc;
8273         }
8274
8275         return 0;
8276 }
8277
8278 static int gaudi_send_heartbeat(struct hl_device *hdev)
8279 {
8280         struct gaudi_device *gaudi = hdev->asic_specific;
8281
8282         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8283                 return 0;
8284
8285         return hl_fw_send_heartbeat(hdev);
8286 }
8287
8288 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8289 {
8290         struct gaudi_device *gaudi = hdev->asic_specific;
8291         struct asic_fixed_properties *prop = &hdev->asic_prop;
8292         int rc;
8293
8294         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8295                 return 0;
8296
8297         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8298                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8299                                         mmCPU_BOOT_ERR1);
8300         if (rc)
8301                 return rc;
8302
8303         if (!strlen(prop->cpucp_info.card_name))
8304                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8305                                 CARD_NAME_MAX_LEN);
8306
8307         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8308
8309         set_default_power_values(hdev);
8310
8311         hdev->max_power = prop->max_power_default;
8312
8313         return 0;
8314 }
8315
8316 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8317                                         u8 mask_len, struct seq_file *s)
8318 {
8319         struct gaudi_device *gaudi = hdev->asic_specific;
8320         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8321         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8322         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8323         unsigned long *mask = (unsigned long *)mask_arr;
8324         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8325         bool is_idle = true, is_eng_idle, is_slave;
8326         u64 offset;
8327         int i, dma_id, port;
8328
8329         mutex_lock(&gaudi->clk_gate_mutex);
8330
8331         hdev->asic_funcs->disable_clock_gating(hdev);
8332
8333         if (s)
8334                 seq_puts(s,
8335                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8336                         "---  -------  ------------  ----------  -------------\n");
8337
8338         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8339                 dma_id = gaudi_dma_assignment[i];
8340                 offset = dma_id * DMA_QMAN_OFFSET;
8341
8342                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8343                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8344                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8345                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8346                                 IS_DMA_IDLE(dma_core_sts0);
8347                 is_idle &= is_eng_idle;
8348
8349                 if (mask && !is_eng_idle)
8350                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8351                 if (s)
8352                         seq_printf(s, fmt, dma_id,
8353                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8354                                 qm_cgm_sts, dma_core_sts0);
8355         }
8356
8357         if (s)
8358                 seq_puts(s,
8359                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8360                         "---  -------  ------------  ----------  ----------\n");
8361
8362         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8363                 offset = i * TPC_QMAN_OFFSET;
8364                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8365                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8366                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8367                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8368                                 IS_TPC_IDLE(tpc_cfg_sts);
8369                 is_idle &= is_eng_idle;
8370
8371                 if (mask && !is_eng_idle)
8372                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8373                 if (s)
8374                         seq_printf(s, fmt, i,
8375                                 is_eng_idle ? "Y" : "N",
8376                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8377         }
8378
8379         if (s)
8380                 seq_puts(s,
8381                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8382                         "---  -------  ------------  ----------  -----------\n");
8383
8384         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8385                 offset = i * MME_QMAN_OFFSET;
8386                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8387                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8388
8389                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8390                 is_slave = i % 2;
8391                 if (!is_slave) {
8392                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8393                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8394                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8395                 }
8396
8397                 is_idle &= is_eng_idle;
8398
8399                 if (mask && !is_eng_idle)
8400                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8401                 if (s) {
8402                         if (!is_slave)
8403                                 seq_printf(s, fmt, i,
8404                                         is_eng_idle ? "Y" : "N",
8405                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8406                         else
8407                                 seq_printf(s, mme_slave_fmt, i,
8408                                         is_eng_idle ? "Y" : "N", "-",
8409                                         "-", mme_arch_sts);
8410                 }
8411         }
8412
8413         if (s)
8414                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8415                                 "---  -------  ------------  ----------\n");
8416
8417         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8418                 offset = i * NIC_MACRO_QMAN_OFFSET;
8419                 port = 2 * i;
8420                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8421                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8422                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8423                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8424                         is_idle &= is_eng_idle;
8425
8426                         if (mask && !is_eng_idle)
8427                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8428                         if (s)
8429                                 seq_printf(s, nic_fmt, port,
8430                                                 is_eng_idle ? "Y" : "N",
8431                                                 qm_glbl_sts0, qm_cgm_sts);
8432                 }
8433
8434                 port = 2 * i + 1;
8435                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8436                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8437                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8438                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8439                         is_idle &= is_eng_idle;
8440
8441                         if (mask && !is_eng_idle)
8442                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8443                         if (s)
8444                                 seq_printf(s, nic_fmt, port,
8445                                                 is_eng_idle ? "Y" : "N",
8446                                                 qm_glbl_sts0, qm_cgm_sts);
8447                 }
8448         }
8449
8450         if (s)
8451                 seq_puts(s, "\n");
8452
8453         hdev->asic_funcs->set_clock_gating(hdev);
8454
8455         mutex_unlock(&gaudi->clk_gate_mutex);
8456
8457         return is_idle;
8458 }
8459
8460 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8461         __acquires(&gaudi->hw_queues_lock)
8462 {
8463         struct gaudi_device *gaudi = hdev->asic_specific;
8464
8465         spin_lock(&gaudi->hw_queues_lock);
8466 }
8467
8468 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8469         __releases(&gaudi->hw_queues_lock)
8470 {
8471         struct gaudi_device *gaudi = hdev->asic_specific;
8472
8473         spin_unlock(&gaudi->hw_queues_lock);
8474 }
8475
8476 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8477 {
8478         return hdev->pdev->device;
8479 }
8480
8481 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8482                                 size_t max_size)
8483 {
8484         struct gaudi_device *gaudi = hdev->asic_specific;
8485
8486         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8487                 return 0;
8488
8489         return hl_fw_get_eeprom_data(hdev, data, max_size);
8490 }
8491
8492 /*
8493  * this function should be used only during initialization and/or after reset,
8494  * when there are no active users.
8495  */
8496 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8497                                 u32 tpc_id)
8498 {
8499         struct gaudi_device *gaudi = hdev->asic_specific;
8500         u64 kernel_timeout;
8501         u32 status, offset;
8502         int rc;
8503
8504         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8505
8506         if (hdev->pldm)
8507                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8508         else
8509                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8510
8511         mutex_lock(&gaudi->clk_gate_mutex);
8512
8513         hdev->asic_funcs->disable_clock_gating(hdev);
8514
8515         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8516                         lower_32_bits(tpc_kernel));
8517         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8518                         upper_32_bits(tpc_kernel));
8519
8520         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8521                         lower_32_bits(tpc_kernel));
8522         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8523                         upper_32_bits(tpc_kernel));
8524         /* set a valid LUT pointer, content is of no significance */
8525         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8526                         lower_32_bits(tpc_kernel));
8527         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8528                         upper_32_bits(tpc_kernel));
8529
8530         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8531                         lower_32_bits(CFG_BASE +
8532                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8533
8534         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8535                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8536                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8537         /* wait a bit for the engine to start executing */
8538         usleep_range(1000, 1500);
8539
8540         /* wait until engine has finished executing */
8541         rc = hl_poll_timeout(
8542                 hdev,
8543                 mmTPC0_CFG_STATUS + offset,
8544                 status,
8545                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8546                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8547                 1000,
8548                 kernel_timeout);
8549
8550         if (rc) {
8551                 dev_err(hdev->dev,
8552                         "Timeout while waiting for TPC%d icache prefetch\n",
8553                         tpc_id);
8554                 hdev->asic_funcs->set_clock_gating(hdev);
8555                 mutex_unlock(&gaudi->clk_gate_mutex);
8556                 return -EIO;
8557         }
8558
8559         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8560                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8561
8562         /* wait a bit for the engine to start executing */
8563         usleep_range(1000, 1500);
8564
8565         /* wait until engine has finished executing */
8566         rc = hl_poll_timeout(
8567                 hdev,
8568                 mmTPC0_CFG_STATUS + offset,
8569                 status,
8570                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8571                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8572                 1000,
8573                 kernel_timeout);
8574
8575         if (rc) {
8576                 dev_err(hdev->dev,
8577                         "Timeout while waiting for TPC%d vector pipe\n",
8578                         tpc_id);
8579                 hdev->asic_funcs->set_clock_gating(hdev);
8580                 mutex_unlock(&gaudi->clk_gate_mutex);
8581                 return -EIO;
8582         }
8583
8584         rc = hl_poll_timeout(
8585                 hdev,
8586                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8587                 status,
8588                 (status == 0),
8589                 1000,
8590                 kernel_timeout);
8591
8592         hdev->asic_funcs->set_clock_gating(hdev);
8593         mutex_unlock(&gaudi->clk_gate_mutex);
8594
8595         if (rc) {
8596                 dev_err(hdev->dev,
8597                         "Timeout while waiting for TPC%d kernel to execute\n",
8598                         tpc_id);
8599                 return -EIO;
8600         }
8601
8602         return 0;
8603 }
8604
8605 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8606                 struct hl_ctx *ctx)
8607 {
8608         struct gaudi_device *gaudi = hdev->asic_specific;
8609         int min_alloc_order, rc, collective_cb_size;
8610
8611         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8612                 return 0;
8613
8614         hdev->internal_cb_pool_virt_addr =
8615                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8616                                         HOST_SPACE_INTERNAL_CB_SZ,
8617                                         &hdev->internal_cb_pool_dma_addr,
8618                                         GFP_KERNEL | __GFP_ZERO);
8619
8620         if (!hdev->internal_cb_pool_virt_addr)
8621                 return -ENOMEM;
8622
8623         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8624                         sizeof(struct packet_fence);
8625         min_alloc_order = ilog2(collective_cb_size);
8626
8627         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8628         if (!hdev->internal_cb_pool) {
8629                 dev_err(hdev->dev,
8630                         "Failed to create internal CB pool\n");
8631                 rc = -ENOMEM;
8632                 goto free_internal_cb_pool;
8633         }
8634
8635         rc = gen_pool_add(hdev->internal_cb_pool,
8636                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8637                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8638         if (rc) {
8639                 dev_err(hdev->dev,
8640                         "Failed to add memory to internal CB pool\n");
8641                 rc = -EFAULT;
8642                 goto destroy_internal_cb_pool;
8643         }
8644
8645         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8646                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8647                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8648
8649         if (!hdev->internal_cb_va_base) {
8650                 rc = -ENOMEM;
8651                 goto destroy_internal_cb_pool;
8652         }
8653
8654         mutex_lock(&ctx->mmu_lock);
8655         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8656                         hdev->internal_cb_pool_dma_addr,
8657                         HOST_SPACE_INTERNAL_CB_SZ);
8658
8659         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8660         mutex_unlock(&ctx->mmu_lock);
8661
8662         if (rc)
8663                 goto unreserve_internal_cb_pool;
8664
8665         return 0;
8666
8667 unreserve_internal_cb_pool:
8668         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8669                         HOST_SPACE_INTERNAL_CB_SZ);
8670 destroy_internal_cb_pool:
8671         gen_pool_destroy(hdev->internal_cb_pool);
8672 free_internal_cb_pool:
8673         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8674                         HOST_SPACE_INTERNAL_CB_SZ,
8675                         hdev->internal_cb_pool_virt_addr,
8676                         hdev->internal_cb_pool_dma_addr);
8677
8678         return rc;
8679 }
8680
8681 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8682                 struct hl_ctx *ctx)
8683 {
8684         struct gaudi_device *gaudi = hdev->asic_specific;
8685
8686         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8687                 return;
8688
8689         mutex_lock(&ctx->mmu_lock);
8690         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8691                         HOST_SPACE_INTERNAL_CB_SZ);
8692         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8693                         HOST_SPACE_INTERNAL_CB_SZ);
8694         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8695         mutex_unlock(&ctx->mmu_lock);
8696
8697         gen_pool_destroy(hdev->internal_cb_pool);
8698
8699         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8700                         HOST_SPACE_INTERNAL_CB_SZ,
8701                         hdev->internal_cb_pool_virt_addr,
8702                         hdev->internal_cb_pool_dma_addr);
8703 }
8704
8705 static int gaudi_ctx_init(struct hl_ctx *ctx)
8706 {
8707         int rc;
8708
8709         if (ctx->asid == HL_KERNEL_ASID_ID)
8710                 return 0;
8711
8712         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8713         if (rc)
8714                 return rc;
8715
8716         rc = gaudi_restore_user_registers(ctx->hdev);
8717         if (rc)
8718                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8719
8720         return rc;
8721 }
8722
8723 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8724 {
8725         if (ctx->asid == HL_KERNEL_ASID_ID)
8726                 return;
8727
8728         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8729 }
8730
8731 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8732 {
8733         return gaudi_cq_assignment[cq_idx];
8734 }
8735
8736 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8737 {
8738         return sizeof(struct packet_msg_short) +
8739                         sizeof(struct packet_msg_prot) * 2;
8740 }
8741
8742 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8743 {
8744         return sizeof(struct packet_msg_short) * 4 +
8745                         sizeof(struct packet_fence) +
8746                         sizeof(struct packet_msg_prot) * 2;
8747 }
8748
8749 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8750 {
8751         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8752 }
8753
8754 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8755                                 u32 size, bool eb)
8756 {
8757         struct hl_cb *cb = (struct hl_cb *) data;
8758         struct packet_msg_short *pkt;
8759         u32 value, ctl, pkt_size = sizeof(*pkt);
8760
8761         pkt = cb->kernel_address + size;
8762         memset(pkt, 0, pkt_size);
8763
8764         /* Inc by 1, Mode ADD */
8765         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8766         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8767
8768         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8769         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8770         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8771         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8772         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8773         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8774         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8775
8776         pkt->value = cpu_to_le32(value);
8777         pkt->ctl = cpu_to_le32(ctl);
8778
8779         return size + pkt_size;
8780 }
8781
8782 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8783                                         u16 addr)
8784 {
8785         u32 ctl, pkt_size = sizeof(*pkt);
8786
8787         memset(pkt, 0, pkt_size);
8788
8789         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8790         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8791         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8792         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8793         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8794         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8795
8796         pkt->value = cpu_to_le32(value);
8797         pkt->ctl = cpu_to_le32(ctl);
8798
8799         return pkt_size;
8800 }
8801
8802 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8803                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8804                 u16 sob_val, u16 mon_id)
8805 {
8806         u64 monitor_base;
8807         u32 ctl, value, pkt_size = sizeof(*pkt);
8808         u16 msg_addr_offset;
8809         u8 mask;
8810
8811         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8812                 dev_err(hdev->dev,
8813                         "sob_base %u (mask %#x) is not valid\n",
8814                         sob_base, sob_mask);
8815                 return 0;
8816         }
8817
8818         /*
8819          * monitor_base should be the content of the base0 address registers,
8820          * so it will be added to the msg short offsets
8821          */
8822         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8823
8824         msg_addr_offset =
8825                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8826                                 monitor_base;
8827
8828         memset(pkt, 0, pkt_size);
8829
8830         /* Monitor config packet: bind the monitor to a sync object */
8831         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8832         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8833         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8834                         0); /* GREATER OR EQUAL*/
8835         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8836
8837         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8838         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8839         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8840         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8841         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8842         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8843         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8844
8845         pkt->value = cpu_to_le32(value);
8846         pkt->ctl = cpu_to_le32(ctl);
8847
8848         return pkt_size;
8849 }
8850
8851 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8852 {
8853         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8854
8855         memset(pkt, 0, pkt_size);
8856
8857         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8858         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8859         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8860
8861         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8862         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8863         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8864         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8865
8866         pkt->cfg = cpu_to_le32(cfg);
8867         pkt->ctl = cpu_to_le32(ctl);
8868
8869         return pkt_size;
8870 }
8871
8872 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8873 {
8874         u32 offset, nic_index;
8875
8876         switch (queue_id) {
8877         case GAUDI_QUEUE_ID_DMA_0_0:
8878                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8879                 break;
8880         case GAUDI_QUEUE_ID_DMA_0_1:
8881                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8882                 break;
8883         case GAUDI_QUEUE_ID_DMA_0_2:
8884                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8885                 break;
8886         case GAUDI_QUEUE_ID_DMA_0_3:
8887                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8888                 break;
8889         case GAUDI_QUEUE_ID_DMA_1_0:
8890                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8891                 break;
8892         case GAUDI_QUEUE_ID_DMA_1_1:
8893                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8894                 break;
8895         case GAUDI_QUEUE_ID_DMA_1_2:
8896                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8897                 break;
8898         case GAUDI_QUEUE_ID_DMA_1_3:
8899                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8900                 break;
8901         case GAUDI_QUEUE_ID_DMA_5_0:
8902                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8903                 break;
8904         case GAUDI_QUEUE_ID_DMA_5_1:
8905                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8906                 break;
8907         case GAUDI_QUEUE_ID_DMA_5_2:
8908                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8909                 break;
8910         case GAUDI_QUEUE_ID_DMA_5_3:
8911                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8912                 break;
8913         case GAUDI_QUEUE_ID_TPC_7_0:
8914                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8915                 break;
8916         case GAUDI_QUEUE_ID_TPC_7_1:
8917                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8918                 break;
8919         case GAUDI_QUEUE_ID_TPC_7_2:
8920                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8921                 break;
8922         case GAUDI_QUEUE_ID_TPC_7_3:
8923                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8924                 break;
8925         case GAUDI_QUEUE_ID_NIC_0_0:
8926         case GAUDI_QUEUE_ID_NIC_1_0:
8927         case GAUDI_QUEUE_ID_NIC_2_0:
8928         case GAUDI_QUEUE_ID_NIC_3_0:
8929         case GAUDI_QUEUE_ID_NIC_4_0:
8930         case GAUDI_QUEUE_ID_NIC_5_0:
8931         case GAUDI_QUEUE_ID_NIC_6_0:
8932         case GAUDI_QUEUE_ID_NIC_7_0:
8933         case GAUDI_QUEUE_ID_NIC_8_0:
8934         case GAUDI_QUEUE_ID_NIC_9_0:
8935                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8936                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8937                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8938                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8939                 break;
8940         case GAUDI_QUEUE_ID_NIC_0_1:
8941         case GAUDI_QUEUE_ID_NIC_1_1:
8942         case GAUDI_QUEUE_ID_NIC_2_1:
8943         case GAUDI_QUEUE_ID_NIC_3_1:
8944         case GAUDI_QUEUE_ID_NIC_4_1:
8945         case GAUDI_QUEUE_ID_NIC_5_1:
8946         case GAUDI_QUEUE_ID_NIC_6_1:
8947         case GAUDI_QUEUE_ID_NIC_7_1:
8948         case GAUDI_QUEUE_ID_NIC_8_1:
8949         case GAUDI_QUEUE_ID_NIC_9_1:
8950                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8951                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8952                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8953                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8954                 break;
8955         case GAUDI_QUEUE_ID_NIC_0_2:
8956         case GAUDI_QUEUE_ID_NIC_1_2:
8957         case GAUDI_QUEUE_ID_NIC_2_2:
8958         case GAUDI_QUEUE_ID_NIC_3_2:
8959         case GAUDI_QUEUE_ID_NIC_4_2:
8960         case GAUDI_QUEUE_ID_NIC_5_2:
8961         case GAUDI_QUEUE_ID_NIC_6_2:
8962         case GAUDI_QUEUE_ID_NIC_7_2:
8963         case GAUDI_QUEUE_ID_NIC_8_2:
8964         case GAUDI_QUEUE_ID_NIC_9_2:
8965                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8966                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8967                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8968                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8969                 break;
8970         case GAUDI_QUEUE_ID_NIC_0_3:
8971         case GAUDI_QUEUE_ID_NIC_1_3:
8972         case GAUDI_QUEUE_ID_NIC_2_3:
8973         case GAUDI_QUEUE_ID_NIC_3_3:
8974         case GAUDI_QUEUE_ID_NIC_4_3:
8975         case GAUDI_QUEUE_ID_NIC_5_3:
8976         case GAUDI_QUEUE_ID_NIC_6_3:
8977         case GAUDI_QUEUE_ID_NIC_7_3:
8978         case GAUDI_QUEUE_ID_NIC_8_3:
8979         case GAUDI_QUEUE_ID_NIC_9_3:
8980                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8981                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8982                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8983                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8984                 break;
8985         default:
8986                 return -EINVAL;
8987         }
8988
8989         *addr = CFG_BASE + offset;
8990
8991         return 0;
8992 }
8993
8994 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8995 {
8996         u64 monitor_base;
8997         u32 size = 0;
8998         u16 msg_addr_offset;
8999
9000         /*
9001          * monitor_base should be the content of the base0 address registers,
9002          * so it will be added to the msg short offsets
9003          */
9004         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9005
9006         /* First monitor config packet: low address of the sync */
9007         msg_addr_offset =
9008                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9009                                 monitor_base;
9010
9011         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9012                                         msg_addr_offset);
9013
9014         /* Second monitor config packet: high address of the sync */
9015         msg_addr_offset =
9016                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9017                                 monitor_base;
9018
9019         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9020                                         msg_addr_offset);
9021
9022         /*
9023          * Third monitor config packet: the payload, i.e. what to write when the
9024          * sync triggers
9025          */
9026         msg_addr_offset =
9027                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9028                                 monitor_base;
9029
9030         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9031
9032         return size;
9033 }
9034
9035 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9036                                 struct hl_gen_wait_properties *prop)
9037 {
9038         struct hl_cb *cb = (struct hl_cb *) prop->data;
9039         void *buf = cb->kernel_address;
9040         u64 fence_addr = 0;
9041         u32 size = prop->size;
9042
9043         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9044                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9045                                 prop->q_idx);
9046                 return 0;
9047         }
9048
9049         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9050         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9051                         prop->sob_mask, prop->sob_val, prop->mon_id);
9052         size += gaudi_add_fence_pkt(buf + size);
9053
9054         return size;
9055 }
9056
9057 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9058 {
9059         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9060
9061         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9062                 hw_sob->sob_id);
9063
9064         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9065                         hw_sob->sob_id * 4, 0);
9066
9067         kref_init(&hw_sob->kref);
9068 }
9069
9070 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9071 {
9072         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9073                                                         HL_POWER9_HOST_MAGIC) {
9074                 hdev->power9_64bit_dma_enable = 1;
9075                 hdev->dma_mask = 64;
9076         } else {
9077                 hdev->power9_64bit_dma_enable = 0;
9078                 hdev->dma_mask = 48;
9079         }
9080 }
9081
9082 static u64 gaudi_get_device_time(struct hl_device *hdev)
9083 {
9084         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9085
9086         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9087 }
9088
9089 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9090                                 u32 *block_size, u32 *block_id)
9091 {
9092         return -EPERM;
9093 }
9094
9095 static int gaudi_block_mmap(struct hl_device *hdev,
9096                                 struct vm_area_struct *vma,
9097                                 u32 block_id, u32 block_size)
9098 {
9099         return -EPERM;
9100 }
9101
9102 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9103 {
9104         struct cpu_dyn_regs *dyn_regs =
9105                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9106         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9107                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9108                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9109
9110         WREG32(irq_handler_offset,
9111                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9112 }
9113
9114 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9115 {
9116         switch (pll_idx) {
9117         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9118         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9119         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9120         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9121         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9122         case HL_GAUDI_MME_PLL: return MME_PLL;
9123         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9124         case HL_GAUDI_IF_PLL: return IF_PLL;
9125         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9126         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9127         default: return -EINVAL;
9128         }
9129 }
9130
9131 static int gaudi_add_sync_to_engine_map_entry(
9132         struct hl_sync_to_engine_map *map, u32 reg_value,
9133         enum hl_sync_engine_type engine_type, u32 engine_id)
9134 {
9135         struct hl_sync_to_engine_map_entry *entry;
9136
9137         /* Reg value represents a partial address of sync object,
9138          * it is used as unique identifier. For this we need to
9139          * clear the cutoff cfg base bits from the value.
9140          */
9141         if (reg_value == 0 || reg_value == 0xffffffff)
9142                 return 0;
9143         reg_value -= (u32)CFG_BASE;
9144
9145         /* create a new hash entry */
9146         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9147         if (!entry)
9148                 return -ENOMEM;
9149         entry->engine_type = engine_type;
9150         entry->engine_id = engine_id;
9151         entry->sync_id = reg_value;
9152         hash_add(map->tb, &entry->node, reg_value);
9153
9154         return 0;
9155 }
9156
9157 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9158                                 struct hl_sync_to_engine_map *map)
9159 {
9160         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9161         struct gaudi_device *gaudi = hdev->asic_specific;
9162         int i, j, rc;
9163         u32 reg_value;
9164
9165         /* Iterate over TPC engines */
9166         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9167                 /* TPC registered must be accessed with clock gating disabled */
9168                 mutex_lock(&gaudi->clk_gate_mutex);
9169                 hdev->asic_funcs->disable_clock_gating(hdev);
9170
9171                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9172                                         sds->props[SP_NEXT_TPC] * i);
9173
9174                 /* We can reenable clock_gating */
9175                 hdev->asic_funcs->set_clock_gating(hdev);
9176                 mutex_unlock(&gaudi->clk_gate_mutex);
9177
9178                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9179                                                         ENGINE_TPC, i);
9180                 if (rc)
9181                         goto free_sync_to_engine_map;
9182         }
9183
9184         /* Iterate over MME engines */
9185         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9186                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9187                         /* MME registered must be accessed with clock gating
9188                          * disabled
9189                          */
9190                         mutex_lock(&gaudi->clk_gate_mutex);
9191                         hdev->asic_funcs->disable_clock_gating(hdev);
9192
9193                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9194                                                 sds->props[SP_NEXT_MME] * i +
9195                                                 j * sizeof(u32));
9196
9197                         /* We can reenable clock_gating */
9198                         hdev->asic_funcs->set_clock_gating(hdev);
9199                         mutex_unlock(&gaudi->clk_gate_mutex);
9200
9201                         rc = gaudi_add_sync_to_engine_map_entry(
9202                                 map, reg_value, ENGINE_MME,
9203                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9204                         if (rc)
9205                                 goto free_sync_to_engine_map;
9206                 }
9207         }
9208
9209         /* Iterate over DMA engines */
9210         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9211                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9212                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9213                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9214                                                         ENGINE_DMA, i);
9215                 if (rc)
9216                         goto free_sync_to_engine_map;
9217         }
9218
9219         return 0;
9220
9221 free_sync_to_engine_map:
9222         hl_state_dump_free_sync_to_engine_map(map);
9223
9224         return rc;
9225 }
9226
9227 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9228 {
9229         return FIELD_GET(
9230                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9231                 mon->status);
9232 }
9233
9234 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9235 {
9236         const size_t max_write = 10;
9237         u32 gid, mask, sob;
9238         int i, offset;
9239
9240         /* Sync object ID is calculated as follows:
9241          * (8 * group_id + cleared bits in mask)
9242          */
9243         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9244                         mon->arm_data);
9245         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9246                         mon->arm_data);
9247
9248         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9249                 max_write; mask >>= 1, i++) {
9250                 if (!(mask & 1)) {
9251                         sob = gid * MONITOR_MAX_SOBS + i;
9252
9253                         if (offset > 0)
9254                                 offset += snprintf(sobs + offset, max_write,
9255                                                         ", ");
9256
9257                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9258                 }
9259         }
9260 }
9261
9262 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9263                                 struct hl_device *hdev,
9264                                 struct hl_mon_state_dump *mon)
9265 {
9266         const char *name;
9267         char scratch_buf1[BIN_REG_STRING_SIZE],
9268                 scratch_buf2[BIN_REG_STRING_SIZE];
9269         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9270
9271         name = hl_state_dump_get_monitor_name(hdev, mon);
9272         if (!name)
9273                 name = "";
9274
9275         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9276
9277         return hl_snprintf_resize(
9278                 buf, size, offset,
9279                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9280                 mon->id, name,
9281                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9282                                 mon->arm_data),
9283                 hl_format_as_binary(
9284                         scratch_buf1, sizeof(scratch_buf1),
9285                         FIELD_GET(
9286                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9287                                 mon->arm_data)),
9288                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9289                                 mon->arm_data),
9290                 mon->wr_data,
9291                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9292                 hl_format_as_binary(
9293                         scratch_buf2, sizeof(scratch_buf2),
9294                         FIELD_GET(
9295                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9296                                 mon->status)),
9297                 monitored_sobs);
9298 }
9299
9300
9301 static int gaudi_print_fences_single_engine(
9302         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9303         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9304         size_t *size, size_t *offset)
9305 {
9306         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9307         int rc = -ENOMEM, i;
9308         u32 *statuses, *fences;
9309
9310         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9311                         sizeof(*statuses), GFP_KERNEL);
9312         if (!statuses)
9313                 goto out;
9314
9315         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9316                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9317                          sizeof(*fences), GFP_KERNEL);
9318         if (!fences)
9319                 goto free_status;
9320
9321         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9322                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9323
9324         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9325                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9326                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9327
9328         /* The actual print */
9329         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9330                 u32 fence_id;
9331                 u64 fence_cnt, fence_rdata;
9332                 const char *engine_name;
9333
9334                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9335                         statuses[i]))
9336                         continue;
9337
9338                 fence_id =
9339                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9340                 fence_cnt = base_offset + CFG_BASE +
9341                         sizeof(u32) *
9342                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9343                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9344                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9345                 engine_name = hl_sync_engine_to_string(engine_type);
9346
9347                 rc = hl_snprintf_resize(
9348                         buf, size, offset,
9349                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9350                         engine_name, engine_id,
9351                         i, fence_id,
9352                         fence_cnt, engine_name, engine_id, fence_id, i,
9353                         fence_rdata, engine_name, engine_id, fence_id, i,
9354                         fences[fence_id],
9355                         statuses[i]);
9356                 if (rc)
9357                         goto free_fences;
9358         }
9359
9360         rc = 0;
9361
9362 free_fences:
9363         kfree(fences);
9364 free_status:
9365         kfree(statuses);
9366 out:
9367         return rc;
9368 }
9369
9370
9371 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9372         .monitor_valid = gaudi_monitor_valid,
9373         .print_single_monitor = gaudi_print_single_monitor,
9374         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9375         .print_fences_single_engine = gaudi_print_fences_single_engine,
9376 };
9377
9378 static void gaudi_state_dump_init(struct hl_device *hdev)
9379 {
9380         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9381         int i;
9382
9383         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9384                 hash_add(sds->so_id_to_str_tb,
9385                         &gaudi_so_id_to_str[i].node,
9386                         gaudi_so_id_to_str[i].id);
9387
9388         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9389                 hash_add(sds->monitor_id_to_str_tb,
9390                         &gaudi_monitor_id_to_str[i].node,
9391                         gaudi_monitor_id_to_str[i].id);
9392
9393         sds->props = gaudi_state_dump_specs_props;
9394
9395         sds->sync_namager_names = gaudi_sync_manager_names;
9396
9397         sds->funcs = gaudi_state_dump_funcs;
9398 }
9399
9400 static u32 *gaudi_get_stream_master_qid_arr(void)
9401 {
9402         return gaudi_stream_master;
9403 }
9404
9405 static const struct hl_asic_funcs gaudi_funcs = {
9406         .early_init = gaudi_early_init,
9407         .early_fini = gaudi_early_fini,
9408         .late_init = gaudi_late_init,
9409         .late_fini = gaudi_late_fini,
9410         .sw_init = gaudi_sw_init,
9411         .sw_fini = gaudi_sw_fini,
9412         .hw_init = gaudi_hw_init,
9413         .hw_fini = gaudi_hw_fini,
9414         .halt_engines = gaudi_halt_engines,
9415         .suspend = gaudi_suspend,
9416         .resume = gaudi_resume,
9417         .mmap = gaudi_mmap,
9418         .ring_doorbell = gaudi_ring_doorbell,
9419         .pqe_write = gaudi_pqe_write,
9420         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9421         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9422         .scrub_device_mem = gaudi_scrub_device_mem,
9423         .get_int_queue_base = gaudi_get_int_queue_base,
9424         .test_queues = gaudi_test_queues,
9425         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9426         .asic_dma_pool_free = gaudi_dma_pool_free,
9427         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9428         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9429         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9430         .cs_parser = gaudi_cs_parser,
9431         .asic_dma_map_sg = gaudi_dma_map_sg,
9432         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9433         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9434         .update_eq_ci = gaudi_update_eq_ci,
9435         .context_switch = gaudi_context_switch,
9436         .restore_phase_topology = gaudi_restore_phase_topology,
9437         .debugfs_read32 = gaudi_debugfs_read32,
9438         .debugfs_write32 = gaudi_debugfs_write32,
9439         .debugfs_read64 = gaudi_debugfs_read64,
9440         .debugfs_write64 = gaudi_debugfs_write64,
9441         .debugfs_read_dma = gaudi_debugfs_read_dma,
9442         .add_device_attr = gaudi_add_device_attr,
9443         .handle_eqe = gaudi_handle_eqe,
9444         .set_pll_profile = gaudi_set_pll_profile,
9445         .get_events_stat = gaudi_get_events_stat,
9446         .read_pte = gaudi_read_pte,
9447         .write_pte = gaudi_write_pte,
9448         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9449         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9450         .send_heartbeat = gaudi_send_heartbeat,
9451         .set_clock_gating = gaudi_set_clock_gating,
9452         .disable_clock_gating = gaudi_disable_clock_gating,
9453         .debug_coresight = gaudi_debug_coresight,
9454         .is_device_idle = gaudi_is_device_idle,
9455         .soft_reset_late_init = gaudi_soft_reset_late_init,
9456         .hw_queues_lock = gaudi_hw_queues_lock,
9457         .hw_queues_unlock = gaudi_hw_queues_unlock,
9458         .get_pci_id = gaudi_get_pci_id,
9459         .get_eeprom_data = gaudi_get_eeprom_data,
9460         .send_cpu_message = gaudi_send_cpu_message,
9461         .pci_bars_map = gaudi_pci_bars_map,
9462         .init_iatu = gaudi_init_iatu,
9463         .rreg = hl_rreg,
9464         .wreg = hl_wreg,
9465         .halt_coresight = gaudi_halt_coresight,
9466         .ctx_init = gaudi_ctx_init,
9467         .ctx_fini = gaudi_ctx_fini,
9468         .get_clk_rate = gaudi_get_clk_rate,
9469         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9470         .load_firmware_to_device = gaudi_load_firmware_to_device,
9471         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9472         .get_signal_cb_size = gaudi_get_signal_cb_size,
9473         .get_wait_cb_size = gaudi_get_wait_cb_size,
9474         .gen_signal_cb = gaudi_gen_signal_cb,
9475         .gen_wait_cb = gaudi_gen_wait_cb,
9476         .reset_sob = gaudi_reset_sob,
9477         .reset_sob_group = gaudi_reset_sob_group,
9478         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9479         .get_device_time = gaudi_get_device_time,
9480         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9481         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9482         .scramble_addr = hl_mmu_scramble_addr,
9483         .descramble_addr = hl_mmu_descramble_addr,
9484         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9485         .get_hw_block_id = gaudi_get_hw_block_id,
9486         .hw_block_mmap = gaudi_block_mmap,
9487         .enable_events_from_fw = gaudi_enable_events_from_fw,
9488         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9489         .init_firmware_loader = gaudi_init_firmware_loader,
9490         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9491         .state_dump_init = gaudi_state_dump_init,
9492         .get_sob_addr = gaudi_get_sob_addr,
9493         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9494         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9495 };
9496
9497 /**
9498  * gaudi_set_asic_funcs - set GAUDI function pointers
9499  *
9500  * @hdev: pointer to hl_device structure
9501  *
9502  */
9503 void gaudi_set_asic_funcs(struct hl_device *hdev)
9504 {
9505         hdev->asic_funcs = &gaudi_funcs;
9506 }