From be0aec23bf4624fd55650629fe8df20483487049 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Mon, 7 Mar 2016 14:02:18 +0100 Subject: x86/mce/AMD, EDAC: Enable error decoding of Scalable MCA errors For Scalable MCA enabled processors, errors are listed per IP block. And since it is not required for an IP to map to a particular bank, we need to use HWID and McaType values from the MCx_IPID register to figure out which IP a given bank represents. We also have a new bit (TCC) in the MCx_STATUS register to indicate Task context is corrupt. Add logic here to decode errors from all known IP blocks for Fam17h Model 00-0fh and to print TCC errors. [ Minor fixups. ] Signed-off-by: Aravind Gopalakrishnan Signed-off-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1457021458-2522-3-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 59 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 ++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 80ba0a8d6d06..9c467fe00551 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -42,6 +42,18 @@ /* AMD-specific bits */ #define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ + +/* + * McaX field if set indicates a given bank supports MCA extensions: + * - Deferred error interrupt type is specifiable by bank. + * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers, + * But should not be used to determine MSR numbers. + * - TCC bit is present in MCx_STATUS. + */ +#define MCI_CONFIG_MCAX 0x1 +#define MCI_IPID_MCATYPE 0xFFFF0000 +#define MCI_IPID_HWID 0xFFF /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is @@ -93,7 +105,9 @@ /* AMD Scalable MCA */ #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 +#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) /* * This structure contains all data related to the MCE log. Also @@ -291,4 +305,49 @@ struct cper_sec_mem_err; extern void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); +/* + * Enumerate new IP types and HWID values in AMD processors which support + * Scalable MCA. + */ +#ifdef CONFIG_X86_MCE_AMD +enum amd_ip_types { + SMCA_F17H_CORE = 0, /* Core errors */ + SMCA_DF, /* Data Fabric */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_SMU, /* System Management Unit */ + N_AMD_IP_TYPES +}; + +struct amd_hwid { + const char *name; + unsigned int hwid; +}; + +extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; + +enum amd_core_mca_blocks { + SMCA_LS = 0, /* Load Store */ + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 cache */ + SMCA_DE, /* Decoder unit */ + RES, /* Reserved */ + SMCA_EX, /* Execution unit */ + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 cache */ + N_CORE_MCA_BLOCKS +}; + +extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; + +enum amd_df_mca_blocks { + SMCA_CS = 0, /* Coherent Slave */ + SMCA_PIE, /* Power management, Interrupts, etc */ + N_DF_BLOCKS +}; + +extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; +#endif + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 88de27bd5797..ee487a93ebe7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -71,6 +71,35 @@ static const char * const th_names[] = { "execution_unit", }; +/* Define HWID to IP type mappings for Scalable MCA */ +struct amd_hwid amd_hwids[] = { + [SMCA_F17H_CORE] = { "f17h_core", 0xB0 }, + [SMCA_DF] = { "data_fabric", 0x2E }, + [SMCA_UMC] = { "umc", 0x96 }, + [SMCA_PB] = { "param_block", 0x5 }, + [SMCA_PSP] = { "psp", 0xFF }, + [SMCA_SMU] = { "smu", 0x1 }, +}; +EXPORT_SYMBOL_GPL(amd_hwids); + +const char * const amd_core_mcablock_names[] = { + [SMCA_LS] = "load_store", + [SMCA_IF] = "insn_fetch", + [SMCA_L2_CACHE] = "l2_cache", + [SMCA_DE] = "decode_unit", + [RES] = "", + [SMCA_EX] = "execution_unit", + [SMCA_FP] = "floating_point", + [SMCA_L3_CACHE] = "l3_cache", +}; +EXPORT_SYMBOL_GPL(amd_core_mcablock_names); + +const char * const amd_df_mcablock_names[] = { + [SMCA_CS] = "coherent_slave", + [SMCA_PIE] = "pie", +}; +EXPORT_SYMBOL_GPL(amd_df_mcablock_names); + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ -- cgit v1.2.3