habanalabs/gaudi: split host irq interfaces towards FW
authorOfir Bitton <obitton@habana.ai>
Tue, 25 May 2021 19:09:13 +0000 (22:09 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Fri, 18 Jun 2021 12:23:41 +0000 (15:23 +0300)
Current implementation uses a single interrupt interface towards
FW, this interface is causing races between interrupt types.
We split this interface to interface per interrupt type.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/include/common/hl_boot_if.h
drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h

index 4cc6690..40e9198 100644 (file)
@@ -1782,7 +1782,8 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
 
        /* Read boot_cpu status bits */
        if (prop->fw_cpu_boot_dev_sts0_valid) {
-               prop->fw_bootfit_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
+               prop->fw_bootfit_cpu_boot_dev_sts0 =
+                               RREG32(cpu_boot_dev_sts0_reg);
 
                if (prop->fw_bootfit_cpu_boot_dev_sts0 &
                                CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
@@ -1793,7 +1794,8 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
        }
 
        if (prop->fw_cpu_boot_dev_sts1_valid) {
-               prop->fw_bootfit_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
+               prop->fw_bootfit_cpu_boot_dev_sts1 =
+                               RREG32(cpu_boot_dev_sts1_reg);
 
                dev_dbg(hdev->dev, "Firmware boot CPU status1 %#x\n",
                                        prop->fw_bootfit_cpu_boot_dev_sts1);
@@ -1803,6 +1805,24 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
                        prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 }
 
+static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
+{
+       struct cpu_dyn_regs *dyn_regs =
+                       &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
+
+       /* Check whether all 3 interrupt interfaces are set, if not use a
+        * single interface
+        */
+       if (!hdev->asic_prop.gic_interrupts_enable &&
+                       !(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+                               CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
+               dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl;
+               dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl;
+
+               dev_warn(hdev->dev,
+                       "Using a single interrupt interface towards cpucp");
+       }
+}
 /**
  * hl_fw_dynamic_load_image - load FW image using dynamic protocol
  *
@@ -2150,6 +2170,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
        hl_fw_linux_update_state(hdev, le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
                                le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
 
+       hl_fw_dynamic_update_linux_interrupt_if(hdev);
+
        return 0;
 
 protocol_err:
index e0e3e0f..ee1ab71 100644 (file)
@@ -3962,7 +3962,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 
        irq_handler_offset = prop->gic_interrupts_enable ?
                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-                       le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
+                       le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
 
        WREG32(irq_handler_offset, GAUDI_EVENT_PI_UPDATE);
 
@@ -4148,7 +4148,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
        if (hdev->fw_loader.linux_loaded) {
                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-                               le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
+                               le32_to_cpu(dyn_regs->gic_host_halt_irq);
 
                WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
        } else {
@@ -4681,7 +4681,7 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 
                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-                               le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
+                               le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
 
                WREG32(irq_handler_offset, GAUDI_EVENT_PI_UPDATE);
        }
@@ -8909,7 +8909,7 @@ static void gaudi_enable_events_from_fw(struct hl_device *hdev)
                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
        u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-                       le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
+                       le32_to_cpu(dyn_regs->gic_host_ints_irq);
 
        WREG32(irq_handler_offset, GAUDI_EVENT_INTS_REGISTER);
 }
index 6d0c1dd..89ac802 100644 (file)
  *                                     was not served before.
  *                                     Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN  Use multiple scratchpad interfaces to
+ *                                     prevent IRQs overriding each other.
+ *                                     Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED           Device status register enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the device status
 #define CPU_BOOT_DEV_STS0_DYN_PLL_EN                   (1 << 19)
 #define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN            (1 << 20)
 #define CPU_BOOT_DEV_STS0_EQ_INDEX_EN                  (1 << 21)
+#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN            (1 << 22)
 #define CPU_BOOT_DEV_STS0_ENABLED                      (1 << 31)
 #define CPU_BOOT_DEV_STS1_ENABLED                      (1 << 31)
 
@@ -308,13 +313,18 @@ struct cpu_dyn_regs {
        __le32 hw_state;
        __le32 kmd_msg_to_cpu;
        __le32 cpu_cmd_status_to_host;
-       __le32 gic_host_irq_ctrl;
+       union {
+               __le32 gic_host_irq_ctrl;
+               __le32 gic_host_pi_upd_irq;
+       };
        __le32 gic_tpc_qm_irq_ctrl;
        __le32 gic_mme_qm_irq_ctrl;
        __le32 gic_dma_qm_irq_ctrl;
        __le32 gic_nic_qm_irq_ctrl;
        __le32 gic_dma_core_irq_ctrl;
-       __le32 reserved1[26];           /* reserve for future use */
+       __le32 gic_host_halt_irq;
+       __le32 gic_host_ints_irq;
+       __le32 reserved1[24];           /* reserve for future use */
 };
 
 /* TODO: remove the desc magic after the code is updated to use message */
index cd69d34..d95d416 100644 (file)
  * PSOC scratch-pad registers
  */
 #define mmHW_STATE                     mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
+/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
 #define mmGIC_HOST_IRQ_CTRL_POLL_REG   mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
+#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
 #define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
 #define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
 #define mmGIC_DMA_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
 #define mmGIC_NIC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
 #define mmGIC_DMA_CR_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
+#define mmGIC_HOST_HALT_IRQ_POLL_REG   mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
+#define mmGIC_HOST_INTS_IRQ_POLL_REG   mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
 #define mmCPU_BOOT_DEV_STS0            mmPSOC_GLOBAL_CONF_SCRATCHPAD_20
 #define mmCPU_BOOT_DEV_STS1            mmPSOC_GLOBAL_CONF_SCRATCHPAD_21
 #define mmFUSE_VER_OFFSET              mmPSOC_GLOBAL_CONF_SCRATCHPAD_22