summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOded Gabbay <ogabbay@kernel.org>2021-05-23 19:00:49 +0300
committerOded Gabbay <ogabbay@kernel.org>2021-06-18 15:23:40 +0300
commita60d075c81f0730b62b277d9a94842a3737a4a42 (patch)
tree2626fd96ebbbbb9aeb571e13bad6d385310f0194
parent4cb4508c86d700bdf243e013630ba1af93a01892 (diff)
habanalabs/gaudi: refactor reset code
After all the latest changes to the reset code, there were some redundancy and errors in the flows. If the Linux FIT is loaded to the ASIC CPU, we need to communicate with it only via GIC. If it is not loaded, we need to either use COMMS protocol (for newer f/w) or MSG_TO_CPU register (for older f/w). In addition, if we halted the device CPU then we need to mark that the driver will do the reset, regardless of the capabilities. Also, to prevent false errors, we need to keep track whether the device CPU was already halted. If so, we shouldn't try to halt it again. Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c47
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h5
2 files changed, 34 insertions, 18 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index a272dfc6b8a6..4d89313f58ea 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1931,11 +1931,11 @@ static void gaudi_disable_msi(struct hl_device *hdev)
gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
}
-static void gaudi_fw_hard_reset(struct hl_device *hdev)
+static void gaudi_ask_hard_reset_without_linux(struct hl_device *hdev)
{
int rc;
- if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+ if (hdev->asic_prop.dynamic_fw_load) {
rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
COMMS_RST_DEV, 0, false,
hdev->fw_loader.cpu_timeout);
@@ -1946,12 +1946,16 @@ static void gaudi_fw_hard_reset(struct hl_device *hdev)
}
}
-static void gaudi_fw_halt_cpu(struct hl_device *hdev)
+static void gaudi_ask_halt_machine_without_linux(struct hl_device *hdev)
{
+ struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
+ if (gaudi && gaudi->device_cpu_is_halted)
+ return;
+
/* Stop device CPU to make sure nothing bad happens */
- if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+ if (hdev->asic_prop.dynamic_fw_load) {
rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
COMMS_GOTO_WFE, 0, true,
hdev->fw_loader.cpu_timeout);
@@ -1961,6 +1965,9 @@ static void gaudi_fw_halt_cpu(struct hl_device *hdev)
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
msleep(GAUDI_CPU_RESET_WAIT_MSEC);
}
+
+ if (gaudi)
+ gaudi->device_cpu_is_halted = true;
}
static void gaudi_init_scrambler_sram(struct hl_device *hdev)
@@ -4110,8 +4117,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
- struct gaudi_device *gaudi = hdev->asic_specific;
u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ bool driver_performs_reset;
if (!hard_reset) {
dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
@@ -4126,32 +4134,34 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
}
+ driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
+ !hdev->asic_prop.hard_reset_done_by_fw);
+
/* Set device to handle FLR by H/W as we will put the device CPU to
* halt mode
*/
- if (!hdev->asic_prop.fw_security_enabled &&
- !hdev->asic_prop.hard_reset_done_by_fw)
+ if (driver_performs_reset)
WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
- /* I don't know what is the state of the CPU so make sure it is
- * stopped in any means necessary
+ /* If linux is loaded in the device CPU we need to communicate with it
+ * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
+ * registers in case of old F/Ws
*/
- if (hdev->asic_prop.hard_reset_done_by_fw)
- gaudi_fw_hard_reset(hdev);
- else
- gaudi_fw_halt_cpu(hdev);
-
if (hdev->fw_loader.linux_loaded) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
+ } else {
+ if (hdev->asic_prop.hard_reset_done_by_fw)
+ gaudi_ask_hard_reset_without_linux(hdev);
+ else
+ gaudi_ask_halt_machine_without_linux(hdev);
}
- if (!hdev->asic_prop.fw_security_enabled &&
- !hdev->asic_prop.hard_reset_done_by_fw) {
+ if (driver_performs_reset) {
/* Configure the reset registers. Must be done as early as
* possible in case we fail during H/W initialization
@@ -4185,8 +4195,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
/* Restart BTL/BLR upon hard-reset */
- if (!hdev->asic_prop.fw_security_enabled)
- WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
+ WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
@@ -4223,6 +4232,8 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
HW_CAP_CLK_GATE);
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
+
+ gaudi->device_cpu_is_halted = false;
}
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 5929be81ec23..48637a6343bb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -314,6 +314,10 @@ struct gaudi_internal_qman_info {
* Multi MSI is possible only with IOMMU enabled.
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
* 8-bit value so use u8.
+ * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
+ * halted. We can't halt it again because the COMMS
+ * protocol will throw an error. Relevant only for
+ * cases where Linux was not loaded to device CPU
*/
struct gaudi_device {
int (*cpucp_info_get)(struct hl_device *hdev);
@@ -335,6 +339,7 @@ struct gaudi_device {
u32 hw_cap_initialized;
u8 multi_msi_mode;
u8 mmu_cache_inv_pi;
+ u8 device_cpu_is_halted;
};
void gaudi_init_security(struct hl_device *hdev);