summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorHawking Zhang <Hawking.Zhang@amd.com>2020-01-08 23:28:05 +0800
committerAlex Deucher <alexander.deucher@amd.com>2020-01-14 10:18:08 -0500
commit5e62db9df684673f4ce7187c3c02e6a995c5cde9 (patch)
tree009fc4488581d46065bec241dec967689d3a03e4 /drivers/gpu
parent1dd5ead2940903b2cf36f6725f1d6670abd6f14b (diff)
drm/amdgpu: read sdma edc counter to clear the counters
SDMA edc counter registers were added in gfx edc counters array. When querying gfx error counter in that array, there is no way to differentiate sdma instance number for different asic and then results to NULL pointer access when trying to read sdma register base address for instances greater than 2 on Vega20. In addition, this also results to wrong gfx error counters since it actually added sdma edc counters. Therefore, sdma edc counter registers should be separated from gfx edc counter regsiter array and only get initialized when driver tries to enable sdma ras. Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c7
2 files changed, 8 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a5492e375f29..89c04cfcfe12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4021,14 +4021,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
{ SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1},
- { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1},
};
static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
@@ -4092,7 +4084,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
int sgpr_work_group_size = 5;
int gpr_reg_size = compute_dim_x / 16 + 6;
- int sec_ded_counter_reg_size = adev->sdma.num_instances + 34;
/* only support when RAS is enabled */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
@@ -4232,7 +4223,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* read back registers to clear the counters */
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < sec_ded_counter_reg_size; i++) {
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
gfx_v9_0_select_se_sh(adev, j, 0x0, k);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 1e0767e88d19..ec9d7873ed42 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1801,6 +1801,13 @@ static int sdma_v4_0_late_init(void *handle)
struct ras_ih_if ih_info = {
.cb = sdma_v4_0_process_ras_data_cb,
};
+ int i;
+
+ /* read back edc counter registers to clear the counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
+ }
return adev->sdma.funcs->ras_late_init(adev, &ih_info);
}