summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/device.c6
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c38
-rw-r--r--drivers/misc/habanalabs/goya/goya.c1
-rw-r--r--drivers/misc/habanalabs/habanalabs.h2
-rw-r--r--drivers/misc/habanalabs/sysfs.c5
5 files changed, 35 insertions, 17 deletions
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 4b6c8de46dd8..4a4a446f479e 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -801,6 +801,7 @@ static void device_hard_reset_pending(struct work_struct *work)
* @hdev: pointer to habanalabs device structure
* @hard_reset: should we do hard reset to all engines or just reset the
* compute/dma engines
+ * @from_hard_reset_thread: is the caller the hard-reset thread
*
* Block future CS and wait for pending CS to be enqueued
* Call ASIC H/W fini
@@ -823,6 +824,11 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
return 0;
}
+ if ((!hard_reset) && (!hdev->supports_soft_reset)) {
+ dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
+ hard_reset = true;
+ }
+
/*
* Prevent concurrency in this function - only one reset should be
* done at any given time. Only need to perform this if we didn't
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 3d4a569914d3..92a5130f06fb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5774,7 +5774,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
>> EQ_CTL_EVENT_TYPE_SHIFT);
u8 cause;
- bool soft_reset_required;
+ bool reset_required;
gaudi->events_stat[event_type]++;
gaudi->events_stat_aggregate[event_type]++;
@@ -5840,16 +5840,18 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_TPC6_DEC:
case GAUDI_EVENT_TPC7_DEC:
gaudi_print_irq_info(hdev, event_type, true);
- soft_reset_required = gaudi_tpc_read_interrupts(hdev,
+ reset_required = gaudi_tpc_read_interrupts(hdev,
tpc_dec_event_to_tpc_id(event_type),
"AXI_SLV_DEC_Error");
- if (soft_reset_required) {
- dev_err_ratelimited(hdev->dev,
- "soft reset required due to %s\n",
- gaudi_irq_map_table[event_type].name);
- hl_device_reset(hdev, false, false);
+ if (reset_required) {
+ dev_err(hdev->dev, "hard reset required due to %s\n",
+ gaudi_irq_map_table[event_type].name);
+
+ if (hdev->hard_reset_on_fw_events)
+ hl_device_reset(hdev, true, false);
+ } else {
+ hl_fw_unmask_irq(hdev, event_type);
}
- hl_fw_unmask_irq(hdev, event_type);
break;
case GAUDI_EVENT_TPC0_KRN_ERR:
@@ -5861,16 +5863,18 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_TPC6_KRN_ERR:
case GAUDI_EVENT_TPC7_KRN_ERR:
gaudi_print_irq_info(hdev, event_type, true);
- soft_reset_required = gaudi_tpc_read_interrupts(hdev,
+ reset_required = gaudi_tpc_read_interrupts(hdev,
tpc_krn_event_to_tpc_id(event_type),
"KRN_ERR");
- if (soft_reset_required) {
- dev_err_ratelimited(hdev->dev,
- "soft reset required due to %s\n",
- gaudi_irq_map_table[event_type].name);
- hl_device_reset(hdev, false, false);
+ if (reset_required) {
+ dev_err(hdev->dev, "hard reset required due to %s\n",
+ gaudi_irq_map_table[event_type].name);
+
+ if (hdev->hard_reset_on_fw_events)
+ hl_device_reset(hdev, true, false);
+ } else {
+ hl_fw_unmask_irq(hdev, event_type);
}
- hl_fw_unmask_irq(hdev, event_type);
break;
case GAUDI_EVENT_PCIE_CORE_SERR:
@@ -5921,8 +5925,8 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
gaudi_print_irq_info(hdev, event_type, true);
- hl_device_reset(hdev, false, false);
- hl_fw_unmask_irq(hdev, event_type);
+ if (hdev->hard_reset_on_fw_events)
+ hl_device_reset(hdev, true, false);
break;
case GAUDI_EVENT_TPC0_BMON_SPMU:
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 15b6c3228e37..152418dfe20c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -752,6 +752,7 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
+ hdev->supports_soft_reset = true;
return 0;
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 5a855b7edf43..0f0691875298 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1436,6 +1436,7 @@ struct hl_device_idle_busy_ts {
* @stop_on_err: true if engines should stop on error.
* @supports_sync_stream: is sync stream supported.
* @supports_coresight: is CoreSight supported.
+ * @supports_soft_reset: is soft reset supported.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1522,6 +1523,7 @@ struct hl_device {
u8 stop_on_err;
u8 supports_sync_stream;
u8 supports_coresight;
+ u8 supports_soft_reset;
/* Parameters for bring-up */
u8 mmu_enable;
diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c
index e4454414d0e1..5d78d5e1c782 100644
--- a/drivers/misc/habanalabs/sysfs.c
+++ b/drivers/misc/habanalabs/sysfs.c
@@ -183,6 +183,11 @@ static ssize_t soft_reset_store(struct device *dev,
goto out;
}
+ if (!hdev->supports_soft_reset) {
+ dev_err(hdev->dev, "Device does not support soft-reset\n");
+ goto out;
+ }
+
dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
hl_device_reset(hdev, false, false);