From bd68fb94b3d4601b45a7e4558b9de0eebdb85ec1 Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 3 Jan 2020 11:55:42 +0800 Subject: drm/amdgpu: resolve bug in UMC 6 error counter query iterate over all error counter registers in SMN space removed support error counter access via MMIO Reviewed-by: Guchun Chen Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 35 ---------- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 119 +++++++++++++++++--------------- 2 files changed, 64 insertions(+), 90 deletions(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 3283032a78e5..a615a1eb750b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,38 +21,6 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ -/* implement 64 bits REG operations via 32 bits interface */ -#define RREG64_UMC(reg) (RREG32(reg) | \ - ((uint64_t)RREG32((reg) + 1) << 32)) -#define WREG64_UMC(reg, v) \ - do { \ - WREG32((reg), lower_32_bits(v)); \ - WREG32((reg) + 1, upper_32_bits(v)); \ - } while (0) - -/* - * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, - * uint32_t umc_reg_offset, uint32_t channel_index) - */ -#define amdgpu_umc_for_each_channel(func) \ - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \ - uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \ - for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \ - /* enable the index mode to query eror count per channel */ \ - adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \ - for (channel_inst = 0; \ - channel_inst < adev->umc.channel_inst_num; \ - channel_inst++) { \ - /* calc the register offset according to channel instance */ \ - umc_reg_offset = adev->umc.channel_offs * channel_inst; \ - /* get channel index of interleaved memory */ \ - channel_index = adev->umc.channel_idx_tbl[ \ - umc_inst * adev->umc.channel_inst_num + channel_inst]; \ - (func)(adev, err_data, umc_reg_offset, channel_index); \ - } \ - } \ - adev->umc.funcs->disable_umc_index_mode(adev); - struct amdgpu_umc_funcs { void (*err_cnt_init)(struct amdgpu_device *adev); int (*ras_late_init)(struct amdgpu_device *adev); @@ -60,9 +28,6 @@ struct amdgpu_umc_funcs { void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); - void (*enable_umc_index_mode)(struct amdgpu_device *adev, - uint32_t umc_instance); - void (*disable_umc_index_mode)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 23178399667c..25e9e8b7d5fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -32,11 +32,13 @@ #define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 +#define UMC_6_INST_DIST 0x40000 + /* * (addr / 256) * 8192, the higher 26 bits in ErrorAddr * is the index of 8KB block */ -#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) /* channel index is the index of 256B block */ #define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) /* offset in 256B block */ @@ -50,41 +52,11 @@ const uint32_t {9, 25, 0, 16}, {15, 31, 6, 22} }; -static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, - uint32_t umc_instance) -{ - uint32_t rsmu_umc_index; - - rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_MODE_EN, 1); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE, umc_instance); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_WREN, 1 << umc_instance); - WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - rsmu_umc_index); -} - -static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) -{ - WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_MODE_EN, 0); -} - -static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev) +static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) { - uint32_t rsmu_umc_index; - - rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); - return REG_GET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE); + return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; } static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, @@ -174,25 +146,36 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } -static void umc_v6_1_query_error_count(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint32_t umc_reg_offset, - uint32_t channel_index) -{ - umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, - &(err_data->ce_count)); - umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, - &(err_data->ue_count)); -} - static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { + for (ch_inst = 0; ch_inst < adev->umc.channel_inst_num; ch_inst++) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + } } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t channel_index) + uint32_t umc_reg_offset, + uint32_t channel_index, + uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page; @@ -244,7 +227,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; err_rec->cu = 0; err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev); + err_rec->mcumc_id = umc_inst; err_data->err_addr_cnt++; } @@ -257,12 +240,30 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { + for (ch_inst = 0; ch_inst < adev->umc.channel_inst_num; ch_inst++) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_error_address(adev, + err_data, + umc_reg_offset, + ch_inst, + umc_inst); + } + } + } static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t channel_index) + uint32_t umc_reg_offset) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt_addr; @@ -301,9 +302,19 @@ static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) { - void *ras_error_status = NULL; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { + for (ch_inst = 0; ch_inst < adev->umc.channel_inst_num; ch_inst++) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); - amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); + umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); + } + } } const struct amdgpu_umc_funcs umc_v6_1_funcs = { @@ -311,6 +322,4 @@ const struct amdgpu_umc_funcs umc_v6_1_funcs = { .ras_late_init = amdgpu_umc_ras_late_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, - .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, - .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, }; -- cgit v1.2.3