On Goya and Gaudi, the stop-on-error configuration can be set via
debugfs. However, in future devices, this configuration will always be
enabled.
Modify the debugfs node to be allowed only for ASICs that support this
dynamic configuration.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Contact: ogabbay@kernel.org
Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
+ Relevant only for GOYA and GAUDI.
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date: Sep 2021
char tmp_buf[200];
ssize_t rc;
+ if (!hdev->asic_prop.configurable_stop_on_err)
+ return -EOPNOTSUPP;
+
if (*ppos)
return 0;
u32 value;
ssize_t rc;
+ if (!hdev->asic_prop.configurable_stop_on_err)
+ return -EOPNOTSUPP;
+
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
"Can't change stop on error during reset\n");
* use-case of doing soft-reset in training (due
* to the fact that training runs on multiple
* devices)
+ * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
u8 use_get_power_for_reset_history;
u8 supports_soft_reset;
u8 allow_inference_soft_reset;
+ u8 configurable_stop_on_err;
};
/**
prop->use_get_power_for_reset_history = true;
+ prop->configurable_stop_on_err = true;
+
return 0;
}
prop->use_get_power_for_reset_history = true;
+ prop->configurable_stop_on_err = true;
+
return 0;
}