summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/gaudi
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2020-07-03 20:46:12 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-07-10 19:53:03 +0300
commite38bfd30e08802d9661efffb8c048bd53a3acfc4 (patch)
tree7c4e53e360089c57d1d89b848ea8f1eb13d6f168 /drivers/misc/habanalabs/gaudi
parent2edc66e22ba1af33020ff8b75fe1a2b055cdb73f (diff)
habanalabs: set clock gating per engine
For debugging purposes, we need to allow the root user better control of the clock gating feature of the DMA and compute engines. Therefore, change the clock gating debugfs interface to be bitmask instead of true/false. Each bit represents a different engine, according to gaudi_engine_id enum. See debugfs documentation for more details. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
Diffstat (limited to 'drivers/misc/habanalabs/gaudi')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c112
1 files changed, 74 insertions, 38 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index e22206527164..9d6aebef8854 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -98,6 +98,11 @@
#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
+#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
+ BIT(GAUDI_ENGINE_ID_MME_0) |\
+ BIT(GAUDI_ENGINE_ID_MME_2) |\
+ GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
+
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -106,14 +111,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
};
static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
- [GAUDI_PCI_DMA_1] = 0,
- [GAUDI_PCI_DMA_2] = 1,
- [GAUDI_PCI_DMA_3] = 5,
- [GAUDI_HBM_DMA_1] = 2,
- [GAUDI_HBM_DMA_2] = 3,
- [GAUDI_HBM_DMA_3] = 4,
- [GAUDI_HBM_DMA_4] = 6,
- [GAUDI_HBM_DMA_5] = 7
+ [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
+ [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
+ [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
+ [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
+ [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
+ [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
+ [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
+ [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
};
static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
@@ -1819,7 +1824,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
gaudi_init_rate_limiter(hdev);
- gaudi_disable_clock_gating(hdev);
+ hdev->asic_funcs->disable_clock_gating(hdev);
for (tpc_id = 0, tpc_offset = 0;
tpc_id < TPC_NUMBER_OF_ENGINES;
@@ -2531,46 +2536,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
}
-static void gaudi_enable_clock_gating(struct hl_device *hdev)
+static void gaudi_set_clock_gating(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 qman_offset;
int i;
- if (!hdev->clock_gating)
- return;
-
- if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
- return;
-
/* In case we are during debug session, don't enable the clock gate
* as it may interfere
*/
if (hdev->in_debug)
return;
- for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
+ for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
+ if (!(hdev->clock_gating_mask &
+ (BIT_ULL(gaudi_dma_assignment[i]))))
+ continue;
+
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
QMAN_UPPER_CP_CGM_PWR_GATE_EN);
}
- for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
+ for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
+ if (!(hdev->clock_gating_mask &
+ (BIT_ULL(gaudi_dma_assignment[i]))))
+ continue;
+
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
}
- WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
- WREG32(mmMME0_QM_CGM_CFG,
- QMAN_COMMON_CP_CGM_PWR_GATE_EN);
- WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
- WREG32(mmMME2_QM_CGM_CFG,
- QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+ if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) {
+ WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
+ WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+ }
+
+ if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) {
+ WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
+ WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+ }
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
+ if (!(hdev->clock_gating_mask &
+ (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))))
+ continue;
+
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
QMAN_CGM1_PWR_GATE_EN);
WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
@@ -2663,7 +2677,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
gaudi_stop_hbm_dma_qmans(hdev);
gaudi_stop_pci_dma_qmans(hdev);
- gaudi_disable_clock_gating(hdev);
+ hdev->asic_funcs->disable_clock_gating(hdev);
msleep(wait_timeout_ms);
@@ -3003,7 +3017,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
gaudi_init_tpc_qmans(hdev);
- gaudi_enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
gaudi_enable_timestamp(hdev);
@@ -3112,7 +3126,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
HW_CAP_HBM_DMA | HW_CAP_PLL |
HW_CAP_MMU |
HW_CAP_SRAM_SCRAMBLER |
- HW_CAP_HBM_SCRAMBLER);
+ HW_CAP_HBM_SCRAMBLER |
+ HW_CAP_CLK_GATE);
+
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
}
@@ -4526,13 +4542,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
- if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+ if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+ (hdev->clock_gating_mask &
+ GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
dev_err_ratelimited(hdev->dev,
"Can't read register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
*val = RREG32(addr - CFG_BASE);
}
+
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4568,13 +4589,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
- if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+ if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+ (hdev->clock_gating_mask &
+ GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
dev_err_ratelimited(hdev->dev,
"Can't write register - clock gating is enabled!\n");
rc = -EFAULT;
} else {
WREG32(addr - CFG_BASE, val);
}
+
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4610,7 +4636,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
- if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+ if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+ (hdev->clock_gating_mask &
+ GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
dev_err_ratelimited(hdev->dev,
"Can't read register - clock gating is enabled!\n");
rc = -EFAULT;
@@ -4620,6 +4650,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
*val = (((u64) val_h) << 32) | val_l;
}
+
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4656,7 +4687,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
int rc = 0;
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
- if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
+
+ if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
+ (hdev->clock_gating_mask &
+ GAUDI_CLK_GATE_DEBUGFS_MASK)) {
+
dev_err_ratelimited(hdev->dev,
"Can't write register - clock gating is enabled!\n");
rc = -EFAULT;
@@ -4665,6 +4700,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
WREG32(addr + sizeof(u32) - CFG_BASE,
upper_32_bits(val));
}
+
} else if ((addr >= SRAM_BASE_ADDR) &&
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4886,7 +4922,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
- hdev->asic_funcs->enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
@@ -5267,7 +5303,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
}
if (disable_clock_gating) {
- hdev->asic_funcs->enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
}
}
@@ -5754,7 +5790,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
/* Clear interrupts */
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
- hdev->asic_funcs->enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
@@ -6270,7 +6306,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
if (s)
seq_puts(s, "\n");
- hdev->asic_funcs->enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
@@ -6371,7 +6407,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
dev_err(hdev->dev,
"Timeout while waiting for TPC%d icache prefetch\n",
tpc_id);
- hdev->asic_funcs->enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
return -EIO;
}
@@ -6400,7 +6436,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
1000,
kernel_timeout);
- hdev->asic_funcs->enable_clock_gating(hdev);
+ hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
if (rc) {
@@ -6741,7 +6777,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
.send_heartbeat = gaudi_send_heartbeat,
- .enable_clock_gating = gaudi_enable_clock_gating,
+ .set_clock_gating = gaudi_set_clock_gating,
.disable_clock_gating = gaudi_disable_clock_gating,
.debug_coresight = gaudi_debug_coresight,
.is_device_idle = gaudi_is_device_idle,