From a47473939db20e3961b200eb00acf5fcf084d755 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2012 14:53:11 +0200 Subject: perf/x86: Make hardware event translations available in sysfs Add support to display hardware events translations available through the sysfs. Add 'events' group attribute under the sysfs x86 PMU record with attribute/file for each hardware event. This patch adds only backbone for PMUs to display config under 'events' directory. The specific PMU support itself will come in next patches, however this is how the sysfs group will look like: # ls /sys/devices/cpu/events/ branch-instructions branch-misses bus-cycles cache-misses cache-references cpu-cycles instructions ref-cycles stalled-cycles-backend stalled-cycles-frontend The file - hw event ID mapping is: file hw event ID --------------------------------------------------------------- cpu-cycles PERF_COUNT_HW_CPU_CYCLES instructions PERF_COUNT_HW_INSTRUCTIONS cache-references PERF_COUNT_HW_CACHE_REFERENCES cache-misses PERF_COUNT_HW_CACHE_MISSES branch-instructions PERF_COUNT_HW_BRANCH_INSTRUCTIONS branch-misses PERF_COUNT_HW_BRANCH_MISSES bus-cycles PERF_COUNT_HW_BUS_CYCLES stalled-cycles-frontend PERF_COUNT_HW_STALLED_CYCLES_FRONTEND stalled-cycles-backend PERF_COUNT_HW_STALLED_CYCLES_BACKEND ref-cycles PERF_COUNT_HW_REF_CPU_CYCLES Each file in the 'events' directory contains the term translation for the symbolic hw event for the currently running cpu model. # cat /sys/devices/cpu/events/stalled-cycles-backend event=0xb1,umask=0x01,inv,cmask=0x01 Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Stephane Eranian Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1349873598-12583-2-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 60 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event.h | 2 ++ 2 files changed, 62 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4a3374e61a93..9fa4c45ecad9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1316,6 +1316,62 @@ static struct attribute_group x86_pmu_format_group = { .attrs = NULL, }; +struct perf_pmu_events_attr { + struct device_attribute attr; + u64 id; +}; + +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr = \ + container_of(attr, struct perf_pmu_events_attr, attr); + + u64 config = x86_pmu.event_map(pmu_attr->id); + return x86_pmu.events_sysfs_show(page, config); +} + +#define EVENT_VAR(_id) event_attr_##_id +#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + +#define EVENT_ATTR(_name, _id) \ +static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = PERF_COUNT_HW_##_id, \ +}; + +EVENT_ATTR(cpu-cycles, CPU_CYCLES ); +EVENT_ATTR(instructions, INSTRUCTIONS ); +EVENT_ATTR(cache-references, CACHE_REFERENCES ); +EVENT_ATTR(cache-misses, CACHE_MISSES ); +EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS ); +EVENT_ATTR(branch-misses, BRANCH_MISSES ); +EVENT_ATTR(bus-cycles, BUS_CYCLES ); +EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND ); +EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND ); +EVENT_ATTR(ref-cycles, REF_CPU_CYCLES ); + +static struct attribute *empty_attrs; + +struct attribute *events_attr[] = { + EVENT_PTR(CPU_CYCLES), + EVENT_PTR(INSTRUCTIONS), + EVENT_PTR(CACHE_REFERENCES), + EVENT_PTR(CACHE_MISSES), + EVENT_PTR(BRANCH_INSTRUCTIONS), + EVENT_PTR(BRANCH_MISSES), + EVENT_PTR(BUS_CYCLES), + EVENT_PTR(STALLED_CYCLES_FRONTEND), + EVENT_PTR(STALLED_CYCLES_BACKEND), + EVENT_PTR(REF_CPU_CYCLES), + NULL, +}; + +static struct attribute_group x86_pmu_events_group = { + .name = "events", + .attrs = events_attr, +}; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1362,6 +1418,9 @@ static int __init init_hw_perf_events(void) x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; + if (!x86_pmu.events_sysfs_show) + x86_pmu_events_group.attrs = &empty_attrs; + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -1651,6 +1710,7 @@ static struct attribute_group x86_pmu_attr_group = { static const struct attribute_group *x86_pmu_attr_groups[] = { &x86_pmu_attr_group, &x86_pmu_format_group, + &x86_pmu_events_group, NULL, }; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 271d25700297..6f75b6a7f37c 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -354,6 +354,8 @@ struct x86_pmu { int attr_rdpmc; struct attribute **format_attrs; + ssize_t (*events_sysfs_show)(char *page, u64 config); + /* * CPU Hotplug hooks */ -- cgit v1.2.3 From 8300daa26755c9a194776778bd822acf1fa2dbf6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2012 14:53:12 +0200 Subject: perf/x86: Filter out undefined events from sysfs events attribute The sysfs events group attribute currently shows all hw events, including also undefined ones. This patch filters out all undefined events out of the sysfs events group attribute, so they don't even show up. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1349873598-12583-3-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9fa4c45ecad9..39737a678a86 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1321,6 +1321,26 @@ struct perf_pmu_events_attr { u64 id; }; +/* + * Remove all undefined events (x86_pmu.event_map(id) == 0) + * out of events_attr attributes. + */ +static void __init filter_events(struct attribute **attrs) +{ + int i, j; + + for (i = 0; attrs[i]; i++) { + if (x86_pmu.event_map(i)) + continue; + + for (j = i; attrs[j]; j++) + attrs[j] = attrs[j + 1]; + + /* Check the shifted attr. */ + i--; + } +} + ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { @@ -1420,6 +1440,8 @@ static int __init init_hw_perf_events(void) if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; + else + filter_events(x86_pmu_events_group.attrs); pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); -- cgit v1.2.3 From 43c032febde48aabcf6d59f47cdcb7b5debbdc63 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2012 14:53:13 +0200 Subject: perf/x86: Add hardware events translations for Intel cpus Add support for Intel processors to display 'events' sysfs directory (/sys/devices/cpu/events/) with hw event translations: # ls /sys/devices/cpu/events/ branch-instructions branch-misses bus-cycles cache-misses cache-references cpu-cycles instructions ref-cycles stalled-cycles-backend stalled-cycles-frontend Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Stephane Eranian Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1349873598-12583-4-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 40 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event.h | 2 ++ arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ 3 files changed, 44 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 39737a678a86..8a1fa23452d6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1392,6 +1392,46 @@ static struct attribute_group x86_pmu_events_group = { .attrs = events_attr, }; +ssize_t x86_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); + u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; + u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24; + bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE); + bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL); + bool any = (config & ARCH_PERFMON_EVENTSEL_ANY); + bool inv = (config & ARCH_PERFMON_EVENTSEL_INV); + ssize_t ret; + + /* + * We have whole page size to spend and just little data + * to write, so we can safely use sprintf. + */ + ret = sprintf(page, "event=0x%02llx", event); + + if (umask) + ret += sprintf(page + ret, ",umask=0x%02llx", umask); + + if (edge) + ret += sprintf(page + ret, ",edge"); + + if (pc) + ret += sprintf(page + ret, ",pc"); + + if (any) + ret += sprintf(page + ret, ",any"); + + if (inv) + ret += sprintf(page + ret, ",inv"); + + if (cmask) + ret += sprintf(page + ret, ",cmask=0x%02llx", cmask); + + ret += sprintf(page + ret, "\n"); + + return ret; +} + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6f75b6a7f37c..f8aa2f6677f2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -538,6 +538,8 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +ssize_t x86_event_sysfs_show(char *page, u64 config); + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 324bb523d9d9..6106d3b44aa1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1628,6 +1628,7 @@ static __initconst const struct x86_pmu core_pmu = { .event_constraints = intel_core_event_constraints, .guest_get_msrs = core_guest_get_msrs, .format_attrs = intel_arch_formats_attr, + .events_sysfs_show = x86_event_sysfs_show, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1766,6 +1767,7 @@ static __initconst const struct x86_pmu intel_pmu = { .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, + .events_sysfs_show = x86_event_sysfs_show, .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, -- cgit v1.2.3 From 0bf79d44133de42af01a70a1700b8bb4b6d3fb92 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2012 14:53:14 +0200 Subject: perf/x86: Add hardware events translations for AMD cpus Add support for AMD processors to display 'events' sysfs directory (/sys/devices/cpu/events/) with hw event translations: # ls /sys/devices/cpu/events/ branch-instructions branch-misses bus-cycles cache-misses cache-references cpu-cycles instructions ref-cycles stalled-cycles-backend stalled-cycles-frontend Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Stephane Eranian Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1349873598-12583-5-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +-- arch/x86/kernel/cpu/perf_event.h | 2 +- arch/x86/kernel/cpu/perf_event_amd.c | 9 +++++++++ arch/x86/kernel/cpu/perf_event_intel.c | 11 +++++++++-- 4 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8a1fa23452d6..0a55ab2ff849 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1392,9 +1392,8 @@ static struct attribute_group x86_pmu_events_group = { .attrs = events_attr, }; -ssize_t x86_event_sysfs_show(char *page, u64 config) +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) { - u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24; bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index f8aa2f6677f2..21419b9178b4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -538,7 +538,7 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } -ssize_t x86_event_sysfs_show(char *page, u64 config); +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 4528ae7b6ec4..c93bc4e813a0 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -568,6 +568,14 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev } } +static ssize_t amd_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | + (config & AMD64_EVENTSEL_EVENT) >> 24; + + return x86_event_sysfs_show(page, config, event); +} + static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = x86_pmu_handle_irq, @@ -591,6 +599,7 @@ static __initconst const struct x86_pmu amd_pmu = { .put_event_constraints = amd_put_event_constraints, .format_attrs = amd_format_attr, + .events_sysfs_show = amd_event_sysfs_show, .cpu_prepare = amd_pmu_cpu_prepare, .cpu_starting = amd_pmu_cpu_starting, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6106d3b44aa1..93b9e1181f83 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1603,6 +1603,13 @@ static struct attribute *intel_arch_formats_attr[] = { NULL, }; +ssize_t intel_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); + + return x86_event_sysfs_show(page, config, event); +} + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -1628,7 +1635,7 @@ static __initconst const struct x86_pmu core_pmu = { .event_constraints = intel_core_event_constraints, .guest_get_msrs = core_guest_get_msrs, .format_attrs = intel_arch_formats_attr, - .events_sysfs_show = x86_event_sysfs_show, + .events_sysfs_show = intel_event_sysfs_show, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1767,7 +1774,7 @@ static __initconst const struct x86_pmu intel_pmu = { .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, - .events_sysfs_show = x86_event_sysfs_show, + .events_sysfs_show = intel_event_sysfs_show, .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, -- cgit v1.2.3 From 20550a434583c78f8ff9a2819639e2bacbe58574 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2012 14:53:15 +0200 Subject: perf/x86: Add hardware events translations for Intel P6 cpus Add support for Intel P6 processors to display 'events' sysfs directory (/sys/devices/cpu/events/) with hw event translations: # ls /sys/devices/cpu/events/ branch-instructions branch-misses bus-cycles cache-misses cache-references cpu-cycles instructions ref-cycles stalled-cycles-backend stalled-cycles-frontend Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Vince Weaver Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1349873598-12583-6-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_p6.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 21419b9178b4..115c1ea97746 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -539,6 +539,7 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) } ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); +ssize_t intel_event_sysfs_show(char *page, u64 config); #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index e4dd0f7a0453..900b76b5d6ef 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -134,6 +134,8 @@ static __initconst const struct x86_pmu p6_pmu = { .event_constraints = p6_event_constraints, .format_attrs = intel_p6_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + }; __init int p6_pmu_init(void) -- cgit v1.2.3 From d203f0b82481abc048e134ee4d0ea3efbee77bb1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 15 Oct 2012 18:03:57 +0200 Subject: x86, MCA: Convert dont_log_ce, banks and tolerant Move those MCA configuration variables into struct mca_config and adjust the places they're used accordingly. Signed-off-by: Borislav Petkov Acked-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 97 ++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 44 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 46cbf8689692..10f4d256d9e8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -66,20 +66,10 @@ atomic_t mce_entry; DEFINE_PER_CPU(unsigned, mce_exception_count); -/* - * Tolerant levels: - * 0: always panic on uncorrected errors, log corrected errors - * 1: panic or SIGBUS on uncorrected errors, log corrected errors - * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors - * 3: never panic or SIGBUS, log all errors (for testing only) - */ -static int tolerant __read_mostly = 1; -static int banks __read_mostly; static int rip_msr __read_mostly; static int mce_bootlog __read_mostly = -1; static int monarch_timeout __read_mostly = -1; static int mce_panic_timeout __read_mostly; -static int mce_dont_log_ce __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; @@ -87,6 +77,17 @@ int mce_bios_cmci_threshold __read_mostly; struct mce_bank *mce_banks __read_mostly; +struct mca_config mca_cfg __read_mostly = { + /* + * Tolerant levels: + * 0: always panic on uncorrected errors, log corrected errors + * 1: panic or SIGBUS on uncorrected errors, log corrected errors + * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors + * 3: never panic or SIGBUS, log all errors (for testing only) + */ + .tolerant = 1 +}; + /* User mode helper program triggered by machine check event */ static unsigned long mce_need_notify; static char mce_helper[128]; @@ -599,7 +600,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -631,7 +632,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) + if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) mce_log(&m); /* @@ -658,14 +659,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, { int i, ret = 0; - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); if (m->status & MCI_STATUS_VAL) { __set_bit(i, validp); if (quirk_no_way_out) quirk_no_way_out(i, m, regs); } - if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) + if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY) ret = 1; } return ret; @@ -700,7 +701,7 @@ static int mce_timed_out(u64 *t) goto out; if ((s64)*t < SPINUNIT) { /* CHECKME: Make panic default for 1 too? */ - if (tolerant < 1) + if (mca_cfg.tolerant < 1) mce_panic("Timeout synchronizing machine check over CPUs", NULL, NULL); cpu_missing = 1; @@ -750,7 +751,8 @@ static void mce_reign(void) * Grade the severity of the errors of all the CPUs. */ for_each_possible_cpu(cpu) { - int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, + int severity = mce_severity(&per_cpu(mces_seen, cpu), + mca_cfg.tolerant, &nmsg); if (severity > global_worst) { msg = nmsg; @@ -764,7 +766,7 @@ static void mce_reign(void) * This dumps all the mces in the log buffer and stops the * other CPUs. */ - if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) + if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) mce_panic("Fatal Machine check", m, msg); /* @@ -777,7 +779,7 @@ static void mce_reign(void) * No machine check event found. Must be some external * source or one CPU is hung. Panic. */ - if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) + if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) mce_panic("Machine check from unknown source", NULL, NULL); /* @@ -946,7 +948,7 @@ static void mce_clear_state(unsigned long *toclear) { int i; - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { if (test_bit(i, toclear)) mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); } @@ -1022,7 +1024,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) int order; /* * If no_way_out gets set, there is no safe way to recover from this - * MCE. If tolerant is cranked up, we'll try anyway. + * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. */ int no_way_out = 0; /* @@ -1038,7 +1040,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) this_cpu_inc(mce_exception_count); - if (!banks) + if (!mca_cfg.banks) goto out; mce_gather_info(&m, regs); @@ -1065,7 +1067,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * because the first one to see it will clear it. */ order = mce_start(&no_way_out); - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; @@ -1093,7 +1095,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); - severity = mce_severity(&m, tolerant, NULL); + severity = mce_severity(&m, mca_cfg.tolerant, NULL); /* * When machine check was for corrected handler don't touch, @@ -1117,7 +1119,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * When the ring overflows we just ignore the AO error. * RED-PEN add some logging mechanism when * usable_address or mce_add_ring fails. - * RED-PEN don't ignore overflow for tolerant == 0 + * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0 */ if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) mce_ring_add(m.addr >> PAGE_SHIFT); @@ -1149,7 +1151,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * issues we try to recover, or limit damage to the current * process. */ - if (tolerant < 3) { + if (mca_cfg.tolerant < 3) { if (no_way_out) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { @@ -1377,11 +1379,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; + u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < banks; i++) { + + for (i = 0; i < num_banks; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1401,7 +1405,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!banks) + if (!mca_cfg.banks) pr_info("CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { @@ -1411,8 +1415,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void) } /* Don't support asymmetric configurations today */ - WARN_ON(banks != 0 && b != banks); - banks = b; + WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); + mca_cfg.banks = b; + if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); @@ -1448,7 +1453,7 @@ static void __mcheck_cpu_init_generic(void) if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; if (!b->init) @@ -1489,6 +1494,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) /* Add per CPU specific workarounds here */ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { + struct mca_config *cfg = &mca_cfg; + if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("unknown CPU type - not enabling MCE support\n"); return -EOPNOTSUPP; @@ -1496,7 +1503,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) { + if (c->x86 == 15 && cfg->banks > 4) { /* * disable GART TBL walk error reporting, which * trips off incorrectly with the IOMMU & 3ware @@ -1515,7 +1522,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && banks > 0) + if (c->x86 == 6 && cfg->banks > 0) mce_banks[0].ctl = 0; /* @@ -1566,7 +1573,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) + if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) mce_banks[0].init = 0; /* @@ -1951,6 +1958,8 @@ static struct miscdevice mce_chrdev_device = { */ static int __init mcheck_enable(char *str) { + struct mca_config *cfg = &mca_cfg; + if (*str == 0) { enable_p5_mce(); return 1; @@ -1962,7 +1971,7 @@ static int __init mcheck_enable(char *str) else if (!strcmp(str, "no_cmci")) mce_cmci_disabled = 1; else if (!strcmp(str, "dont_log_ce")) - mce_dont_log_ce = 1; + cfg->dont_log_ce = true; else if (!strcmp(str, "ignore_ce")) mce_ignore_ce = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) @@ -1970,7 +1979,7 @@ static int __init mcheck_enable(char *str) else if (!strcmp(str, "bios_cmci_threshold")) mce_bios_cmci_threshold = 1; else if (isdigit(str[0])) { - get_option(&str, &tolerant); + get_option(&str, &(cfg->tolerant)); if (*str == ',') { ++str; get_option(&str, &monarch_timeout); @@ -2002,7 +2011,7 @@ static int mce_disable_error_reporting(void) { int i; - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2190,9 +2199,9 @@ static ssize_t store_int_with_restart(struct device *s, } static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); -static DEVICE_INT_ATTR(tolerant, 0644, tolerant); +static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant); static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); -static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); +static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce); static struct dev_ext_attribute dev_attr_check_interval = { __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), @@ -2253,7 +2262,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) if (err) goto error; } - for (j = 0; j < banks; j++) { + for (j = 0; j < mca_cfg.banks; j++) { err = device_create_file(dev, &mce_banks[j].attr); if (err) goto error2; @@ -2285,7 +2294,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu) for (i = 0; mce_device_attrs[i]; i++) device_remove_file(dev, mce_device_attrs[i]); - for (i = 0; i < banks; i++) + for (i = 0; i < mca_cfg.banks; i++) device_remove_file(dev, &mce_banks[i].attr); device_unregister(dev); @@ -2304,7 +2313,7 @@ static void __cpuinit mce_disable_cpu(void *h) if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2322,7 +2331,7 @@ static void __cpuinit mce_reenable_cpu(void *h) if (!(action & CPU_TASKS_FROZEN)) cmci_reenable(); - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2375,7 +2384,7 @@ static __init void mce_init_banks(void) { int i; - for (i = 0; i < banks; i++) { + for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; struct device_attribute *a = &b->attr; -- cgit v1.2.3 From 84c2559dee2d69606f1fd4ce6563e79e7611a7b7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 15 Oct 2012 19:59:18 +0200 Subject: x86, MCA: Convert rip_msr, mce_bootlog, monarch_timeout Move above configuration variables into struct mca_config and adjust usage places accordingly. Signed-off-by: Borislav Petkov Acked-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 51 +++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 10f4d256d9e8..aa11019eeb0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -66,9 +66,6 @@ atomic_t mce_entry; DEFINE_PER_CPU(unsigned, mce_exception_count); -static int rip_msr __read_mostly; -static int mce_bootlog __read_mostly = -1; -static int monarch_timeout __read_mostly = -1; static int mce_panic_timeout __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; @@ -78,6 +75,7 @@ int mce_bios_cmci_threshold __read_mostly; struct mce_bank *mce_banks __read_mostly; struct mca_config mca_cfg __read_mostly = { + .bootlog = -1, /* * Tolerant levels: * 0: always panic on uncorrected errors, log corrected errors @@ -85,7 +83,8 @@ struct mca_config mca_cfg __read_mostly = { * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors * 3: never panic or SIGBUS, log all errors (for testing only) */ - .tolerant = 1 + .tolerant = 1, + .monarch_timeout = -1 }; /* User mode helper program triggered by machine check event */ @@ -373,7 +372,7 @@ static int msr_to_offset(u32 msr) { unsigned bank = __this_cpu_read(injectm.bank); - if (msr == rip_msr) + if (msr == mca_cfg.rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) return offsetof(struct mce, status); @@ -452,8 +451,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) m->cs |= 3; } /* Use accurate RIP reporting if available. */ - if (rip_msr) - m->ip = mce_rdmsrl(rip_msr); + if (mca_cfg.rip_msr) + m->ip = mce_rdmsrl(mca_cfg.rip_msr); } } @@ -697,7 +696,7 @@ static int mce_timed_out(u64 *t) rmb(); if (atomic_read(&mce_paniced)) wait_for_panic(); - if (!monarch_timeout) + if (!mca_cfg.monarch_timeout) goto out; if ((s64)*t < SPINUNIT) { /* CHECKME: Make panic default for 1 too? */ @@ -803,7 +802,7 @@ static int mce_start(int *no_way_out) { int order; int cpus = num_online_cpus(); - u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; + u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; if (!timeout) return -1; @@ -867,7 +866,7 @@ static int mce_start(int *no_way_out) static int mce_end(int order) { int ret = -1; - u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; + u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; if (!timeout) goto reset; @@ -1427,7 +1426,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) /* Use accurate RIP reporting if available. */ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) - rip_msr = MSR_IA32_MCG_EIP; + mca_cfg.rip_msr = MSR_IA32_MCG_EIP; if (cap & MCG_SER_P) mce_ser = 1; @@ -1437,15 +1436,19 @@ static int __cpuinit __mcheck_cpu_cap_init(void) static void __mcheck_cpu_init_generic(void) { + enum mcp_flags m_fl = 0; mce_banks_t all_banks; u64 cap; int i; + if (!mca_cfg.bootlog) + m_fl = MCP_DONTLOG; + /* * Log the machine checks left over from the previous reset. */ bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); + machine_check_poll(MCP_UC | m_fl, &all_banks); set_in_cr4(X86_CR4_MCE); @@ -1511,12 +1514,12 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) */ clear_bit(10, (unsigned long *)&mce_banks[4].ctl); } - if (c->x86 <= 17 && mce_bootlog < 0) { + if (c->x86 <= 17 && cfg->bootlog < 0) { /* * Lots of broken BIOS around that don't clear them * by default and leave crap in there. Don't log: */ - mce_bootlog = 0; + cfg->bootlog = 0; } /* * Various K7s with broken bank 0 around. Always disable @@ -1581,22 +1584,22 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * synchronization with a one second timeout. */ if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - monarch_timeout < 0) - monarch_timeout = USEC_PER_SEC; + cfg->monarch_timeout < 0) + cfg->monarch_timeout = USEC_PER_SEC; /* * There are also broken BIOSes on some Pentium M and * earlier systems: */ - if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) - mce_bootlog = 0; + if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) + cfg->bootlog = 0; if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; } - if (monarch_timeout < 0) - monarch_timeout = 0; - if (mce_bootlog != 0) + if (cfg->monarch_timeout < 0) + cfg->monarch_timeout = 0; + if (cfg->bootlog != 0) mce_panic_timeout = 30; return 0; @@ -1975,14 +1978,14 @@ static int __init mcheck_enable(char *str) else if (!strcmp(str, "ignore_ce")) mce_ignore_ce = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) - mce_bootlog = (str[0] == 'b'); + cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) mce_bios_cmci_threshold = 1; else if (isdigit(str[0])) { get_option(&str, &(cfg->tolerant)); if (*str == ',') { ++str; - get_option(&str, &monarch_timeout); + get_option(&str, &(cfg->monarch_timeout)); } } else { pr_info("mce argument %s ignored. Please use /sys\n", str); @@ -2200,7 +2203,7 @@ static ssize_t store_int_with_restart(struct device *s, static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant); -static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); +static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout); static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce); static struct dev_ext_attribute dev_attr_check_interval = { -- cgit v1.2.3 From 7af19e4afdafa4adb5fffc569d5bb1c5e568ba98 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 15 Oct 2012 20:25:17 +0200 Subject: x86, MCA: Convert the next three variables batch Move them into the mca_config struct and adjust code touching them accordingly. Signed-off-by: Borislav Petkov Acked-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 35 ++++++++++++++++------------------ arch/x86/kernel/cpu/mcheck/mce_intel.c | 2 +- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index aa11019eeb0e..8c7a90d89852 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -66,9 +66,6 @@ atomic_t mce_entry; DEFINE_PER_CPU(unsigned, mce_exception_count); -static int mce_panic_timeout __read_mostly; -int mce_cmci_disabled __read_mostly; -int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; int mce_bios_cmci_threshold __read_mostly; @@ -302,7 +299,7 @@ static void wait_for_panic(void) while (timeout-- > 0) udelay(1); if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; + panic_timeout = mca_cfg.panic_timeout; panic("Panicing machine check CPU died"); } @@ -360,7 +357,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) pr_emerg(HW_ERR "Machine check: %s\n", exp); if (!fake_panic) { if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; + panic_timeout = mca_cfg.panic_timeout; panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); @@ -1600,7 +1597,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; if (cfg->bootlog != 0) - mce_panic_timeout = 30; + cfg->panic_timeout = 30; return 0; } @@ -1645,7 +1642,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) __this_cpu_write(mce_next_interval, iv); - if (mce_ignore_ce || !iv) + if (mca_cfg.ignore_ce || !iv) return; t->expires = round_jiffies(jiffies + iv); @@ -1972,11 +1969,11 @@ static int __init mcheck_enable(char *str) if (!strcmp(str, "off")) mce_disabled = 1; else if (!strcmp(str, "no_cmci")) - mce_cmci_disabled = 1; + cfg->cmci_disabled = true; else if (!strcmp(str, "dont_log_ce")) cfg->dont_log_ce = true; else if (!strcmp(str, "ignore_ce")) - mce_ignore_ce = 1; + cfg->ignore_ce = true; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) @@ -2154,15 +2151,15 @@ static ssize_t set_ignore_ce(struct device *s, if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; - if (mce_ignore_ce ^ !!new) { + if (mca_cfg.ignore_ce ^ !!new) { if (new) { /* disable ce features */ mce_timer_delete_all(); on_each_cpu(mce_disable_cmci, NULL, 1); - mce_ignore_ce = 1; + mca_cfg.ignore_ce = true; } else { /* enable ce features */ - mce_ignore_ce = 0; + mca_cfg.ignore_ce = false; on_each_cpu(mce_enable_ce, (void *)1, 1); } } @@ -2178,14 +2175,14 @@ static ssize_t set_cmci_disabled(struct device *s, if (strict_strtoull(buf, 0, &new) < 0) return -EINVAL; - if (mce_cmci_disabled ^ !!new) { + if (mca_cfg.cmci_disabled ^ !!new) { if (new) { /* disable cmci */ on_each_cpu(mce_disable_cmci, NULL, 1); - mce_cmci_disabled = 1; + mca_cfg.cmci_disabled = true; } else { /* enable cmci */ - mce_cmci_disabled = 0; + mca_cfg.cmci_disabled = false; on_each_cpu(mce_enable_ce, NULL, 1); } } @@ -2212,13 +2209,13 @@ static struct dev_ext_attribute dev_attr_check_interval = { }; static struct dev_ext_attribute dev_attr_ignore_ce = { - __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), - &mce_ignore_ce + __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce), + &mca_cfg.ignore_ce }; static struct dev_ext_attribute dev_attr_cmci_disabled = { - __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), - &mce_cmci_disabled + __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled), + &mca_cfg.cmci_disabled }; static struct device_attribute *mce_device_attrs[] = { diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 5f88abf07e9c..1acd8ecba1c3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -53,7 +53,7 @@ static int cmci_supported(int *banks) { u64 cap; - if (mce_cmci_disabled || mce_ignore_ce) + if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) return 0; /* -- cgit v1.2.3 From 1462594bf2866c1dc80066ed6f49f4331c551901 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 17 Oct 2012 12:05:33 +0200 Subject: x86, MCA: Finish mca_config conversion mce_ser, mce_bios_cmci_threshold and mce_disabled are the last three bools which need conversion. Move them to the mca_config struct and adjust usage sites accordingly. Signed-off-by: Borislav Petkov Acked-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 -- arch/x86/kernel/cpu/mcheck/mce-severity.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce.c | 36 ++++++++++++++----------------- arch/x86/kernel/cpu/mcheck/mce_intel.c | 6 +++--- 4 files changed, 21 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 6a05c1d327a9..5b7d4fa5d3b7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -24,8 +24,6 @@ struct mce_bank { int mce_severity(struct mce *a, int tolerant, char **msg); struct dentry *mce_get_debugfs_dir(void); -extern int mce_ser; - extern struct mce_bank *mce_banks; #ifdef CONFIG_X86_MCE_INTEL diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 13017626f9a8..beb1f1689e52 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg) continue; if ((m->mcgstatus & s->mcgmask) != s->mcgres) continue; - if (s->ser == SER_REQUIRED && !mce_ser) + if (s->ser == SER_REQUIRED && !mca_cfg.ser) continue; - if (s->ser == NO_SER && mce_ser) + if (s->ser == NO_SER && mca_cfg.ser) continue; if (s->context && ctx != s->context) continue; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8c7a90d89852..80dbda84f1c3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -58,18 +58,13 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); #define CREATE_TRACE_POINTS #include -int mce_disabled __read_mostly; - #define SPINUNIT 100 /* 100ns */ atomic_t mce_entry; DEFINE_PER_CPU(unsigned, mce_exception_count); -int mce_ser __read_mostly; -int mce_bios_cmci_threshold __read_mostly; - -struct mce_bank *mce_banks __read_mostly; +struct mce_bank *mce_banks __read_mostly; struct mca_config mca_cfg __read_mostly = { .bootlog = -1, @@ -510,7 +505,7 @@ static int mce_ring_add(unsigned long pfn) int mce_available(struct cpuinfo_x86 *c) { - if (mce_disabled) + if (mca_cfg.disabled) return 0; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -562,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i) /* * Mask the reported address by the reported granularity. */ - if (mce_ser && (m->status & MCI_STATUS_MISCV)) { + if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) { u8 shift = MCI_MISC_ADDR_LSB(m->misc); m->addr >>= shift; m->addr <<= shift; @@ -617,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * TBD do the same check for MCI_STATUS_EN here? */ if (!(flags & MCP_UC) && - (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) + (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; mce_read_aux(&m, i); @@ -1009,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi) */ void do_machine_check(struct pt_regs *regs, long error_code) { + struct mca_config *cfg = &mca_cfg; struct mce m, *final; int i; int worst = 0; @@ -1036,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) this_cpu_inc(mce_exception_count); - if (!mca_cfg.banks) + if (!cfg->banks) goto out; mce_gather_info(&m, regs); @@ -1063,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * because the first one to see it will clear it. */ order = mce_start(&no_way_out); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; @@ -1082,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * Non uncorrected or non signaled errors are handled by * machine_check_poll. Leave them alone, unless this panics. */ - if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && + if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && !no_way_out) continue; @@ -1091,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); - severity = mce_severity(&m, mca_cfg.tolerant, NULL); + severity = mce_severity(&m, cfg->tolerant, NULL); /* * When machine check was for corrected handler don't touch, @@ -1147,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) * issues we try to recover, or limit damage to the current * process. */ - if (mca_cfg.tolerant < 3) { + if (cfg->tolerant < 3) { if (no_way_out) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { @@ -1426,7 +1422,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) mca_cfg.rip_msr = MSR_IA32_MCG_EIP; if (cap & MCG_SER_P) - mce_ser = 1; + mca_cfg.ser = true; return 0; } @@ -1675,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = */ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { - if (mce_disabled) + if (mca_cfg.disabled) return; if (__mcheck_cpu_ancient_init(c)) @@ -1685,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) return; if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { - mce_disabled = 1; + mca_cfg.disabled = true; return; } @@ -1967,7 +1963,7 @@ static int __init mcheck_enable(char *str) if (*str == '=') str++; if (!strcmp(str, "off")) - mce_disabled = 1; + cfg->disabled = true; else if (!strcmp(str, "no_cmci")) cfg->cmci_disabled = true; else if (!strcmp(str, "dont_log_ce")) @@ -1977,7 +1973,7 @@ static int __init mcheck_enable(char *str) else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) - mce_bios_cmci_threshold = 1; + cfg->bios_cmci_threshold = true; else if (isdigit(str[0])) { get_option(&str, &(cfg->tolerant)); if (*str == ',') { @@ -2435,7 +2431,7 @@ device_initcall_sync(mcheck_init_device); */ static int __init mcheck_disable(char *str) { - mce_disabled = 1; + mca_cfg.disabled = true; return 1; } __setup("nomce", mcheck_disable); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1acd8ecba1c3..79b2b6b6e613 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -200,7 +200,7 @@ static void cmci_discover(int banks) continue; } - if (!mce_bios_cmci_threshold) { + if (!mca_cfg.bios_cmci_threshold) { val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; val |= CMCI_THRESHOLD; } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { @@ -227,7 +227,7 @@ static void cmci_discover(int banks) * set the thresholds properly or does not work with * this boot option. Note down now and report later. */ - if (mce_bios_cmci_threshold && bios_zero_thresh && + if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) bios_wrong_thresh = 1; } else { @@ -235,7 +235,7 @@ static void cmci_discover(int banks) } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (mce_bios_cmci_threshold && bios_wrong_thresh) { + if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { pr_info_once( "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); pr_info_once( -- cgit v1.2.3 From e6d41e8c697e07832efa4a85bf23438bc4c4e1b2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 29 Oct 2012 18:40:08 +0100 Subject: x86, AMD: Change Boris' email address Move to private email and put in maintained status. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1351532410-4887-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 698b6ec12e0f..1ac581f38dfa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -6,7 +6,7 @@ * * Written by Jacob Shin - AMD, Inc. * - * Support: borislav.petkov@amd.com + * Maintained by: Borislav Petkov * * April 2006 * - added support for AMD Family 0x10 processors -- cgit v1.2.3 From 95d18aa2b6c05351181934b3bc34ce038cc7b637 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Mon, 29 Oct 2012 21:48:17 +0100 Subject: perf/x86: Fix sparse warnings FYI, there are new sparse warnings: arch/x86/kernel/cpu/perf_event.c:1356:18: sparse: symbol 'events_attr' was not declared. Should it be static? This patch makes it static and also adds the static keyword to fix arch/x86/kernel/cpu/perf_event.c:1344:9: warning: symbol 'events_sysfs_show' was not declared. Signed-off-by: Peter Huewe Cc: Peter Zijlstra Cc: Yuanhan Liu Cc: fengguang.wu@intel.com Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-lerdpXlnruh0yvWs2owwuizl@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a55ab2ff849..4428fd178bce 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1341,7 +1341,7 @@ static void __init filter_events(struct attribute **attrs) } } -ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, +static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ @@ -1373,7 +1373,7 @@ EVENT_ATTR(ref-cycles, REF_CPU_CYCLES ); static struct attribute *empty_attrs; -struct attribute *events_attr[] = { +static struct attribute *events_attr[] = { EVENT_PTR(CPU_CYCLES), EVENT_PTR(INSTRUCTIONS), EVENT_PTR(CACHE_REFERENCES), -- cgit v1.2.3 From 85b97637bb40a9f486459dd254598759af9c3d50 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Mon, 29 Oct 2012 11:01:50 +0800 Subject: x86/mce: Do not change worker's running cpu in cmci_rediscover(). cmci_rediscover() used set_cpus_allowed_ptr() to change the current process's running cpu, and migrate itself to the dest cpu. But worker processes are not allowed to be migrated. If current is a worker, the worker will be migrated to another cpu, but the corresponding worker_pool is still on the original cpu. In this case, the following BUG_ON in try_to_wake_up_local() will be triggered: BUG_ON(rq != this_rq()); This will cause the kernel panic. The call trace is like the following: [ 6155.451107] ------------[ cut here ]------------ [ 6155.452019] kernel BUG at kernel/sched/core.c:1654! ...... [ 6155.452019] RIP: 0010:[] [] try_to_wake_up_local+0x115/0x130 ...... [ 6155.452019] Call Trace: [ 6155.452019] [] __schedule+0x764/0x880 [ 6155.452019] [] schedule+0x29/0x70 [ 6155.452019] [] schedule_timeout+0x235/0x2d0 [ 6155.452019] [] ? mark_held_locks+0x8d/0x140 [ 6155.452019] [] ? __lock_release+0x133/0x1a0 [ 6155.452019] [] ? _raw_spin_unlock_irq+0x30/0x50 [ 6155.452019] [] ? trace_hardirqs_on_caller+0x105/0x190 [ 6155.452019] [] wait_for_common+0x12b/0x180 [ 6155.452019] [] ? try_to_wake_up+0x2f0/0x2f0 [ 6155.452019] [] wait_for_completion+0x1d/0x20 [ 6155.452019] [] stop_one_cpu+0x8a/0xc0 [ 6155.452019] [] ? __migrate_task+0x1a0/0x1a0 [ 6155.452019] [] ? complete+0x28/0x60 [ 6155.452019] [] set_cpus_allowed_ptr+0x128/0x130 [ 6155.452019] [] cmci_rediscover+0xf5/0x140 [ 6155.452019] [] mce_cpu_callback+0x18d/0x19d [ 6155.452019] [] notifier_call_chain+0x67/0x150 [ 6155.452019] [] __raw_notifier_call_chain+0xe/0x10 [ 6155.452019] [] __cpu_notify+0x20/0x40 [ 6155.452019] [] cpu_notify_nofail+0x15/0x30 [ 6155.452019] [] _cpu_down+0x262/0x2e0 [ 6155.452019] [] cpu_down+0x36/0x50 [ 6155.452019] [] acpi_processor_remove+0x50/0x11e [ 6155.452019] [] acpi_device_remove+0x90/0xb2 [ 6155.452019] [] __device_release_driver+0x7c/0xf0 [ 6155.452019] [] device_release_driver+0x2f/0x50 [ 6155.452019] [] acpi_bus_remove+0x32/0x6d [ 6155.452019] [] acpi_bus_trim+0x87/0xee [ 6155.452019] [] acpi_bus_hot_remove_device+0x88/0x16b [ 6155.452019] [] acpi_os_execute_deferred+0x27/0x34 [ 6155.452019] [] process_one_work+0x219/0x680 [ 6155.452019] [] ? process_one_work+0x1b8/0x680 [ 6155.452019] [] ? acpi_os_wait_events_complete+0x23/0x23 [ 6155.452019] [] worker_thread+0x12e/0x320 [ 6155.452019] [] ? manage_workers+0x110/0x110 [ 6155.452019] [] kthread+0xc6/0xd0 [ 6155.452019] [] kernel_thread_helper+0x4/0x10 [ 6155.452019] [] ? retint_restore_args+0x13/0x13 [ 6155.452019] [] ? __init_kthread_worker+0x70/0x70 [ 6155.452019] [] ? gs_change+0x13/0x13 This patch removes the set_cpus_allowed_ptr() call, and put the cmci rediscover jobs onto all the other cpus using system_wq. This could bring some delay for the jobs. Signed-off-by: Tang Chen Signed-off-by: Miao Xie Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 5f88abf07e9c..4f9a3cbfc4a3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -285,34 +285,39 @@ void cmci_clear(void) raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } +static long cmci_rediscover_work_func(void *arg) +{ + int banks; + + /* Recheck banks in case CPUs don't all have the same */ + if (cmci_supported(&banks)) + cmci_discover(banks); + + return 0; +} + /* * After a CPU went down cycle through all the others and rediscover * Must run in process context. */ void cmci_rediscover(int dying) { - int banks; - int cpu; - cpumask_var_t old; + int cpu, banks; if (!cmci_supported(&banks)) return; - if (!alloc_cpumask_var(&old, GFP_KERNEL)) - return; - cpumask_copy(old, ¤t->cpus_allowed); for_each_online_cpu(cpu) { if (cpu == dying) continue; - if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) + + if (cpu == smp_processor_id()) { + cmci_rediscover_work_func(NULL); continue; - /* Recheck banks in case CPUs don't all have the same */ - if (cmci_supported(&banks)) - cmci_discover(banks); - } + } - set_cpus_allowed_ptr(current, old); - free_cpumask_var(old); + work_on_cpu(cpu, cmci_rediscover_work_func, NULL); + } } /* -- cgit v1.2.3 From 2bbf0a1427c377350f001fbc6260995334739ad7 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 31 Oct 2012 17:20:50 +0100 Subject: x86, amd: Disable way access filter on Piledriver CPUs The Way Access Filter in recent AMD CPUs may hurt the performance of some workloads, caused by aliasing issues in the L1 cache. This patch disables it on the affected CPUs. The issue is similar to that one of last year: http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html This new patch does not replace the old one, we just need another quirk for newer CPUs. The performance penalty without the patch depends on the circumstances, but is a bit less than the last year's 3%. The workloads affected would be those that access code from the same physical page under different virtual addresses, so different processes using the same libraries with ASLR or multiple instances of PIE-binaries. The code needs to be accessed simultaneously from both cores of the same compute unit. More details can be found here: http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf CPUs affected are anything with the core known as Piledriver. That includes the new parts of the AMD A-Series (aka Trinity) and the just released new CPUs of the FX-Series (aka Vishera). The model numbering is a bit odd here: FX CPUs have model 2, A-Series has model 10h, with possible extensions to 1Fh. Hence the range of model ids. Signed-off-by: Andre Przywara Link: http://lkml.kernel.org/r/1351700450-9277-1-git-send-email-osp@andrep.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2c0d12..1b7d1656a042 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -631,6 +631,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* + * The way access filter has a performance penalty on some workloads. + * Disable it on the affected CPUs. + */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { + u64 val; + + if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { + val |= 0x1E; + wrmsrl_safe(0xc0011021, val); + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ -- cgit v1.2.3 From 193f3fcb3ab769bab4a2b9fa181eef3e5699a352 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 19 Oct 2012 10:58:13 +0200 Subject: x86: Add cpu_has_topoext Introduce cpu_has_topoext to check for AMD's CPUID topology extensions support. It indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX See AMD's CPUID Specification, Publication # 25481 (as of Rev. 2.34 September 2010) Signed-off-by: Andreas Herrmann Link: http://lkml.kernel.org/r/20121019085813.GD26718@alberich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2c0d12..64e9ad4e49a0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -304,7 +304,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) int cpu = smp_processor_id(); /* get information required for multi-node processors */ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) { + if (cpu_has_topoext) { u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); -- cgit v1.2.3 From 04a1541828ea223169eb44a336bfad8ec0dfb46a Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 19 Oct 2012 10:59:33 +0200 Subject: x86, cacheinfo: Determine number of cache leafs using CPUID 0x8000001d on AMD CPUID 0x8000001d works quite similar to Intels' CPUID function 4. Use it to determine number of cache leafs. Signed-off-by: Andreas Herrmann Link: http://lkml.kernel.org/r/20121019085933.GE26718@alberich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 7 +------ arch/x86/kernel/cpu/intel_cacheinfo.c | 28 +++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 64e9ad4e49a0..a8538e6d2ff2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -643,12 +643,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) detect_ht(c); #endif - if (c->extended_cpuid_level >= 0x80000006) { - if (cpuid_edx(0x80000006) & 0xf000) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } + init_amd_cacheinfo(c); if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 93c5451bdd52..8ce7a83252f9 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -557,21 +557,39 @@ __cpuinit cpuid4_cache_lookup_regs(int index, return 0; } -static int __cpuinit find_num_cache_leaves(void) +static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c) { - unsigned int eax, ebx, ecx, edx; + unsigned int eax, ebx, ecx, edx, op; union _cpuid4_leaf_eax cache_eax; int i = -1; + if (c->x86_vendor == X86_VENDOR_AMD) + op = 0x8000001d; + else + op = 4; + do { ++i; - /* Do cpuid(4) loop to find out num_cache_leaves */ - cpuid_count(4, i, &eax, &ebx, &ecx, &edx); + /* Do cpuid(op) loop to find out num_cache_leaves */ + cpuid_count(op, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; } while (cache_eax.split.type != CACHE_TYPE_NULL); return i; } +void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c) +{ + + if (cpu_has_topoext) { + num_cache_leaves = find_num_cache_leaves(c); + } else if (c->extended_cpuid_level >= 0x80000006) { + if (cpuid_edx(0x80000006) & 0xf000) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } +} + unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { /* Cache sizes */ @@ -588,7 +606,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (is_initialized == 0) { /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(); + num_cache_leaves = find_num_cache_leaves(c); is_initialized++; } -- cgit v1.2.3 From 2e8458dfe4202df75543402c7343b8f94de4101e Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 19 Oct 2012 11:00:49 +0200 Subject: x86, cacheinfo: Make use of CPUID 0x8000001d for cache information on AMD Rely on CPUID 0x8000001d for cache information when AMD CPUID topology extensions are available. Signed-off-by: Andreas Herrmann Link: http://lkml.kernel.org/r/20121019090049.GF26718@alberich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel_cacheinfo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 8ce7a83252f9..cd2e1ccce591 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -538,7 +538,11 @@ __cpuinit cpuid4_cache_lookup_regs(int index, unsigned edx; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - amd_cpuid4(index, &eax, &ebx, &ecx); + if (cpu_has_topoext) + cpuid_count(0x8000001d, index, &eax.full, + &ebx.full, &ecx.full, &edx); + else + amd_cpuid4(index, &eax, &ebx, &ecx); amd_init_l3_cache(this_leaf, index); } else { cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); -- cgit v1.2.3 From 27d3a8a26ada7660116fdd6830096008c063ee96 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 19 Oct 2012 11:02:09 +0200 Subject: x86, cacheinfo: Base cache sharing info on CPUID 0x8000001d on AMD The patch is based on a patch submitted by Hans Rosenfeld. See http://marc.info/?l=linux-kernel&m=133908777200931 Note that CPUID Fn8000_001D_EAX slightly differs to Intel's CPUID function 4. Bits 14-25 contain NumSharingCache. Actual number of cores sharing this cache. SW to add value of one to get result. The corresponding bits on Intel are defined as "maximum number of threads sharing this cache" (with a "plus 1" encoding). Thus a different method to determine which cores are sharing a cache level has to be used. Signed-off-by: Andreas Herrmann Link: http://lkml.kernel.org/r/20121019090209.GG26718@alberich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel_cacheinfo.c | 41 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index cd2e1ccce591..fe9edec6698a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -750,37 +750,50 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf; - int ret, i, sibling; - struct cpuinfo_x86 *c = &cpu_data(cpu); + int i, sibling; - ret = 0; - if (index == 3) { - ret = 1; - for_each_cpu(i, cpu_llc_shared_mask(cpu)) { + if (cpu_has_topoext) { + unsigned int apicid, nshared, first, last; + + if (!per_cpu(ici_cpuid4_info, cpu)) + return 0; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + nshared = this_leaf->base.eax.split.num_threads_sharing + 1; + apicid = cpu_data(cpu).apicid; + first = apicid - (apicid % nshared); + last = first + nshared - 1; + + for_each_online_cpu(i) { + apicid = cpu_data(i).apicid; + if ((apicid < first) || (apicid > last)) + continue; if (!per_cpu(ici_cpuid4_info, i)) continue; this_leaf = CPUID4_INFO_IDX(i, index); - for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { - if (!cpu_online(sibling)) + + for_each_online_cpu(sibling) { + apicid = cpu_data(sibling).apicid; + if ((apicid < first) || (apicid > last)) continue; set_bit(sibling, this_leaf->shared_cpu_map); } } - } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { - ret = 1; - for_each_cpu(i, cpu_sibling_mask(cpu)) { + } else if (index == 3) { + for_each_cpu(i, cpu_llc_shared_mask(cpu)) { if (!per_cpu(ici_cpuid4_info, i)) continue; this_leaf = CPUID4_INFO_IDX(i, index); - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { if (!cpu_online(sibling)) continue; set_bit(sibling, this_leaf->shared_cpu_map); } } - } + } else + return 0; - return ret; + return 1; } static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) -- cgit v1.2.3 From 27fd185f3d9e83c64b7a596881d7751a638c9683 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 13 Nov 2012 11:32:47 -0800 Subject: x86, hotplug: During CPU0 online, enable x2apic, set_numa_node. Previously these functions were not run on the BSP (CPU 0, the boot processor) since the boot processor init would only be executed before this functionality was initialized. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1352835171-3958-11-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7505f7b13e71..ca165ac6793b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1237,7 +1237,7 @@ void __cpuinit cpu_init(void) oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA - if (cpu != 0 && this_cpu_read(numa_node) == 0 && + if (this_cpu_read(numa_node) == 0 && early_cpu_to_node(cpu) != NUMA_NO_NODE) set_numa_node(early_cpu_to_node(cpu)); #endif @@ -1269,8 +1269,7 @@ void __cpuinit cpu_init(void) barrier(); x86_configure_nx(); - if (cpu != 0) - enable_x2apic(); + enable_x2apic(); /* * set up and load the per-CPU TSS -- cgit v1.2.3 From 30242aa6023b71325c6b8addac06faf544a85fd0 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 13 Nov 2012 11:32:48 -0800 Subject: x86, hotplug: The first online processor saves the MTRR state Ask the first online CPU to save mtrr instead of asking BSP. BSP could be offline when mtrr_save_state() is called. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1352835171-3958-12-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6b96110bb0c3..e4c1a4184531 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -695,11 +695,16 @@ void mtrr_ap_init(void) } /** - * Save current fixed-range MTRR state of the BSP + * Save current fixed-range MTRR state of the first cpu in cpu_online_mask. */ void mtrr_save_state(void) { - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); + int first_cpu; + + get_online_cpus(); + first_cpu = cpumask_first(cpu_online_mask); + smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); + put_online_cpus(); } void set_mtrr_aps_delayed_init(void) -- cgit v1.2.3 From 18c26c27ae0abe82253cb2e2363df465dbbb657e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 22 Oct 2012 22:53:20 -0400 Subject: death to idle_regs() Signed-off-by: Al Viro --- arch/x86/kernel/cpu/common.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7505f7b13e71..6a6432cf89de 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1173,15 +1173,6 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif -/* Make sure %fs and %gs are initialized properly in idle threads */ -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->fs = __KERNEL_PERCPU; - regs->gs = __KERNEL_STACK_CANARY; - - return regs; -} #endif /* CONFIG_X86_64 */ /* -- cgit v1.2.3 From e5bb8ad862a97a0facc83f3b81731de919fec6ad Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Nov 2012 11:50:26 -0800 Subject: x86, 386 removal: Remove CONFIG_BSWAP All 486+ CPUs support BSWAP, so remove the fallback 386 support code. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1354132230-21854-5-git-send-email-hpa@linux.intel.com --- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0e910da16c5..0cd07ccdf380 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -136,24 +136,15 @@ static void __init check_popad(void) /* * Check whether we are able to run this kernel safely on SMP. * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) + * - i386 is no longer supported. * - In order to run on anything without a TSC, we need to be * compiled for a i486. */ static void __init check_config(void) { -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ - defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) + if (boot_cpu_data.x86 < 4) panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif } -- cgit v1.2.3 From 094ab1db7cb7833cd4c820acd868fc26acf3f08e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Nov 2012 11:50:27 -0800 Subject: x86, 386 removal: Remove CONFIG_INVLPG All 486+ CPUs support INVLPG, so remove the fallback 386 support code. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1354132230-21854-6-git-send-email-hpa@linux.intel.com --- arch/x86/kernel/cpu/amd.c | 3 --- arch/x86/kernel/cpu/intel.c | 4 ---- 2 files changed, 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1b7d1656a042..a025d8cc4577 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -753,9 +753,6 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) { - if (!cpu_has_invlpg) - return; - tlb_flushall_shift = 5; if (c->x86 <= 0x11) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 198e019a531a..fcaabd0432c5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -612,10 +612,6 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc) static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) { - if (!cpu_has_invlpg) { - tlb_flushall_shift = -1; - return; - } switch ((c->x86 << 8) + c->x86_model) { case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ -- cgit v1.2.3 From e3228cf4544355f73437a2b9c6916be9cbafc201 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Nov 2012 11:50:29 -0800 Subject: x86, 386 removal: Remove CONFIG_X86_POPAD_OK The check_popad() routine tested for a 386-specific bug, and never actually did anything useful with it anyway other than print a message. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1354132230-21854-8-git-send-email-hpa@linux.intel.com --- arch/x86/kernel/cpu/bugs.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0cd07ccdf380..92dfec986a48 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -106,33 +106,6 @@ static void __init check_hlt(void) pr_cont("OK\n"); } -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - pr_info("Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi"); - /* - * If this fails, it means that any user program may lock the - * CPU hard. Too bad. - */ - if (res != 12345678) - pr_cont("Buggy\n"); - else - pr_cont("OK\n"); -#endif -} - /* * Check whether we are able to run this kernel safely on SMP. * @@ -157,7 +130,6 @@ void __init check_bugs(void) #endif check_config(); check_hlt(); - check_popad(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); -- cgit v1.2.3 From 4319eb4fc7196536c751f9e8a273759b3298c10b Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 6 Dec 2012 21:16:11 +0100 Subject: x86 mtrr: fix comment typo in mtrr_bp_init Signed-off-by: Olaf Hering Signed-off-by: Jiri Kosina --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6b96110bb0c3..b5a0fd891d73 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -606,7 +606,7 @@ void __init mtrr_bp_init(void) /* * This is an AMD specific MSR, but we assume(hope?) that - * Intel will implement it to when they extend the address + * Intel will implement it too when they extend the address * bus of the Xeon. */ if (cpuid_eax(0x80000000) >= 0x80000008) { -- cgit v1.2.3 From bc3eba60682750dd7b45ea616d65c926fc3b8be8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Dec 2012 11:26:31 -0800 Subject: x86, 386 removal: Remove support for IRQ 13 FPU error reporting Remove support for FPU error reporting via IRQ 13, as opposed to exception 16 (#MF). One last remnant of i386 gone. Signed-off-by: H. Peter Anvin Cc: Alan Cox --- arch/x86/kernel/cpu/proc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index fbd895562292..3286a92e662a 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -26,11 +26,6 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, #ifdef CONFIG_X86_32 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) { - /* - * We use exception 16 if we have hardware math and we've either seen - * it or the CPU claims it is internal - */ - int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); seq_printf(m, "fdiv_bug\t: %s\n" "hlt_bug\t\t: %s\n" @@ -45,7 +40,7 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) c->f00f_bug ? "yes" : "no", c->coma_bug ? "yes" : "no", c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", + c->hard_math ? "yes" : "no", c->cpuid_level, c->wp_works_ok ? "yes" : "no"); } -- cgit v1.2.3 From a18e3690a52790a034d6540d54e8e1f1cd125da2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 14:02:53 -0800 Subject: X86: drivers: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit, __devexit_p, __devinitconst, and __devexit from these drivers. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Daniel Drake Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 3cf3d97cce3a..b43200dbfe7e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2500,7 +2500,7 @@ static bool pcidrv_registered; /* * add a pci uncore device */ -static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) +static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) { struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; @@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) kfree(box); } -static int __devinit uncore_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int uncore_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) { struct intel_uncore_type *type; -- cgit v1.2.3 From a706d965dcfdff73bf2bad1c300f8119900714c7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 28 Dec 2012 19:56:07 -0700 Subject: perf x86: revert 20b279 - require exclude_guest to use PEBS - kernel side This patch is brought to you by the letter 'H'. Commit 20b279 breaks compatiblity with older perf binaries when run with precise modifier (:p or :pp) by requiring the exclude_guest attribute to be set. Older binaries default exclude_guest to 0 (ie., wanting guest-based samples) unless host only profiling is requested (:H modifier). The workaround for older binaries is to add H to the modifier list (e.g., -e cycles:ppH - toggles exclude_guest to 1). This was deemed unacceptable by Linus: https://lkml.org/lkml/2012/12/12/570 Between family in town and the fresh snow in Breckenridge there is no time left to be working on the proper fix for this over the holidays. In the New Year I have more pressing problems to resolve -- like some memory leaks in perf which are proving to be elusive -- although the aforementioned snow is probably why they are proving to be elusive. Either way I do not have any spare time to work on this and from the time I have managed to spend on it the solution is more difficult than just moving to a new exclude_guest flag (does not work) or flipping the logic to include_guest (which is not as trivial as one would think). So, two options: silently force exclude_guest on as suggested by Gleb which means no impact to older perf binaries or revert the original patch which caused the breakage. This patch does the latter -- reverts the original patch that introduced the regression. The problem can be revisited in the future as time allows. Signed-off-by: David Ahern Cc: Avi Kivity Cc: Gleb Natapov Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Link: http://lkml.kernel.org/r/1356749767-17322-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4428fd178bce..6774c17a5576 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event) /* BTS is currently only allowed for user-mode. */ if (!attr->exclude_kernel) return -EOPNOTSUPP; - - if (!attr->exclude_guest) - return -EOPNOTSUPP; } hwc->config |= config; @@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip) { int precise = 0; - if (!event->attr.exclude_guest) - return -EOPNOTSUPP; - /* Support for constant skid */ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { precise++; -- cgit v1.2.3