summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c56
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c9
2 files changed, 65 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8bb433043a7f..e16f3f201e06 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -186,12 +186,68 @@ static int error_context(struct mce *m)
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
}
+/*
+ * See AMD Error Scope Hierarchy table in a newer BKDG. For example
+ * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
+ */
+static int mce_severity_amd(struct mce *m, enum context ctx)
+{
+ /* Processor Context Corrupt, no need to fumble too much, die! */
+ if (m->status & MCI_STATUS_PCC)
+ return MCE_PANIC_SEVERITY;
+
+ if (m->status & MCI_STATUS_UC) {
+
+ /*
+ * On older systems where overflow_recov flag is not present, we
+ * should simply panic if an error overflow occurs. If
+ * overflow_recov flag is present and set, then software can try
+ * to at least kill process to prolong system operation.
+ */
+ if (mce_flags.overflow_recov) {
+ /* software can try to contain */
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
+ if (ctx == IN_KERNEL)
+ return MCE_PANIC_SEVERITY;
+
+ /* kill current process */
+ return MCE_AR_SEVERITY;
+ } else {
+ /* at least one error was not logged */
+ if (m->status & MCI_STATUS_OVER)
+ return MCE_PANIC_SEVERITY;
+ }
+
+ /*
+ * For any other case, return MCE_UC_SEVERITY so that we log the
+ * error and exit #MC handler.
+ */
+ return MCE_UC_SEVERITY;
+ }
+
+ /*
+ * deferred error: poll handler catches these and adds to mce_ring so
+ * memory-failure can take recovery actions.
+ */
+ if (m->status & MCI_STATUS_DEFERRED)
+ return MCE_DEFERRED_SEVERITY;
+
+ /*
+ * corrected error: poll handler catches these and passes responsibility
+ * of decoding the error to EDAC
+ */
+ return MCE_KEEP_SEVERITY;
+}
+
int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m);
struct severity *s;
+ if (m->cpuvendor == X86_VENDOR_AMD)
+ return mce_severity_amd(m, ctx);
+
for (s = severities;; s++) {
if ((m->status & s->mask) != s->result)
continue;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8548b714a16b..1189f1150a19 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -64,6 +64,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
DEFINE_PER_CPU(unsigned, mce_exception_count);
struct mce_bank *mce_banks __read_mostly;
+struct mce_vendor_flags mce_flags __read_mostly;
struct mca_config mca_cfg __read_mostly = {
.bootlog = -1,
@@ -1535,6 +1536,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
mce_banks[0].ctl = 0;
/*
+ * overflow_recov is supported for F15h Models 00h-0fh
+ * even though we don't have a CPUID bit for it.
+ */
+ if (c->x86 == 0x15 && c->x86_model <= 0xf)
+ mce_flags.overflow_recov = 1;
+
+ /*
* Turn off MC4_MISC thresholding banks on those models since
* they're not supported there.
*/
@@ -1633,6 +1641,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_AMD:
mce_amd_feature_init(c);
+ mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
break;
default:
break;