static void cs_timedout(struct work_struct *work)
{
struct hl_device *hdev;
+ u64 event_mask;
int rc;
struct hl_cs *cs = container_of(work, struct hl_cs,
work_tdr.work);
- bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
+ bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false;
rc = cs_get_unless_zero(cs);
if (!rc)
return;
}
- /* Mark the CS is timed out so we won't try to cancel its TDR */
- if (likely(!skip_reset_on_timeout))
+ if (likely(!skip_reset_on_timeout)) {
+ if (hdev->reset_on_lockup)
+ device_reset = true;
+ else
+ hdev->reset_info.needs_reset = true;
+
+ /* Mark the CS is timed out so we won't try to cancel its TDR */
cs->timedout = true;
+ }
hdev = cs->ctx->hdev;
if (rc) {
hdev->last_error.cs_timeout.timestamp = ktime_get();
hdev->last_error.cs_timeout.seq = cs->sequence;
+
+ event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
+ HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
+
+ hl_notifier_event_send_all(hdev, event_mask);
}
switch (cs->type) {
cs_put(cs);
- if (likely(!skip_reset_on_timeout)) {
- if (hdev->reset_on_lockup)
- hl_device_reset(hdev, HL_DRV_RESET_TDR);
- else
- hdev->reset_info.needs_reset = true;
- }
+ if (device_reset)
+ hl_device_reset(hdev, HL_DRV_RESET_TDR);
}
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
* HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event
* HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code
* HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset
+ * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error
*/
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0)
#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1)
#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2)
+#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3)
/*
* Various information operations such as: