habanalabs: fixes to the poll-timeout macros
authorOhad Sharabi <osharabi@habana.ai>
Mon, 4 Jul 2022 05:33:57 +0000 (08:33 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Tue, 12 Jul 2022 06:09:30 +0000 (09:09 +0300)
- use conventional internal macro variables (double underscore prefix)
- adjust address casting
- on register poll using ELBI use ELBI read rather than BAR read on
  error condition
- remove unused macro

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/habanalabs.h

index 72cb12f2068a2f8ce57d4f9104a247d7abbf2adb..3c51eaca521ca7bc05e2a73892f6b589f76c5144 100644 (file)
@@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 /* Timeout should be longer when working with simulator but cap the
  * increased timeout to some maximum
  */
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \
 ({ \
        ktime_t __timeout; \
+       u32 __elbi_read; \
+       int __rc = 0; \
        if (hdev->pdev) \
                __timeout = ktime_add_us(ktime_get(), timeout_us); \
        else \
@@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
                                        (u64) HL_SIM_MAX_TIMEOUT_US)); \
        might_sleep_if(sleep_us); \
        for (;;) { \
-               (val) = RREG32(addr); \
+               if (elbi) { \
+                       __rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+                       if (__rc) \
+                               break; \
+                       (val) = __elbi_read; \
+               } else {\
+                       (val) = RREG32((u32)addr); \
+               } \
                if (cond) \
                        break; \
                if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-                       (val) = RREG32(addr); \
+                       if (elbi) { \
+                               __rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+                               if (__rc) \
+                                       break; \
+                               (val) = __elbi_read; \
+                       } else {\
+                               (val) = RREG32((u32)addr); \
+                       } \
                        break; \
                } \
                if (sleep_us) \
                        usleep_range((sleep_us >> 2) + 1, sleep_us); \
        } \
-       (cond) ? 0 : -ETIMEDOUT; \
+       __rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
 })
 
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+               hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false)
+
+#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \
+               hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true)
+
+/*
+ * poll array of register addresses.
+ * condition is satisfied if all registers values match the expected value.
+ * once some register in the array satisfies the condition it will not be polled again,
+ * this is done both for efficiency and due to some registers are "clear on read".
+ * TODO: use read from PCI bar in other places in the code (SW-91406)
+ */
+#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+                                               timeout_us, elbi) \
+({ \
+       ktime_t __timeout; \
+       u64 __elem_bitmask; \
+       u32 __read_val; \
+       u8 __arr_idx;   \
+       int __rc = 0; \
+       \
+       if (hdev->pdev) \
+               __timeout = ktime_add_us(ktime_get(), timeout_us); \
+       else \
+               __timeout = ktime_add_us(ktime_get(),\
+                               min(((u64)timeout_us * 10), \
+                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
+       \
+       might_sleep_if(sleep_us); \
+       if (arr_size >= 64) \
+               __rc = -EINVAL; \
+       else \
+               __elem_bitmask = BIT_ULL(arr_size) - 1; \
+       for (;;) { \
+               if (__rc) \
+                       break; \
+               for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) {      \
+                       if (!(__elem_bitmask & BIT_ULL(__arr_idx)))     \
+                               continue;       \
+                       if (elbi) { \
+                               __rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \
+                               if (__rc) \
+                                       break; \
+                       } else { \
+                               __read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
+                       } \
+                       if (__read_val == (expected_val))       \
+                               __elem_bitmask &= ~BIT_ULL(__arr_idx);  \
+               }       \
+               if (__rc || (__elem_bitmask == 0)) \
+                       break; \
+               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
+                       break; \
+               if (sleep_us) \
+                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
+       } \
+       __rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
+})
+
+#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+                                       timeout_us) \
+       hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+                                               timeout_us, false)
+
+#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+                                       timeout_us) \
+       hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+                                               timeout_us, true)
+
 /*
  * address in this macro points always to a memory location in the
  * host's (server's) memory. That location is updated asynchronously
@@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
        (cond) ? 0 : -ETIMEDOUT; \
 })
 
-#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
-                                       timeout_us) \
-({ \
-       ktime_t __timeout; \
-       if (hdev->pdev) \
-               __timeout = ktime_add_us(ktime_get(), timeout_us); \
-       else \
-               __timeout = ktime_add_us(ktime_get(),\
-                               min((u64)(timeout_us * 10), \
-                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
-       might_sleep_if(sleep_us); \
-       for (;;) { \
-               (val) = readl(addr); \
-               if (cond) \
-                       break; \
-               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-                       (val) = readl(addr); \
-                       break; \
-               } \
-               if (sleep_us) \
-                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
-       } \
-       (cond) ? 0 : -ETIMEDOUT; \
-})
-
 #define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
 ({ \
        struct user_mapped_block *p = blk; \