summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/core.c23
-rw-r--r--arch/x86/events/intel/core.c201
-rw-r--r--arch/x86/events/intel/cstate.c47
-rw-r--r--arch/x86/events/intel/lbr.c124
-rw-r--r--arch/x86/events/intel/rapl.c32
-rw-r--r--arch/x86/events/intel/uncore.c88
-rw-r--r--arch/x86/events/intel/uncore.h5
-rw-r--r--arch/x86/events/intel/uncore_snb.c67
-rw-r--r--arch/x86/events/intel/uncore_snbep.c96
-rw-r--r--arch/x86/events/msr.c63
-rw-r--r--arch/x86/events/perf_event.h12
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/kernel/smpboot.c25
13 files changed, 608 insertions, 184 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 91eac39625be..dfebbde2a4cc 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9b4f9d3ce465..0974ba11e954 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include "../perf_event.h"
@@ -185,7 +186,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -204,10 +205,8 @@ struct event_constraint intel_skl_event_constraints[] = {
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
@@ -243,14 +242,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
+ "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ "event=0xe,umask=0x1"); /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
+ "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
@@ -280,7 +316,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -1361,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+ EVENT_PTR(td_total_slots_slm),
+ EVENT_PTR(td_total_slots_scale_slm),
+ EVENT_PTR(td_fetch_bubbles_slm),
+ EVENT_PTR(td_fetch_bubbles_scale_slm),
+ EVENT_PTR(td_slots_issued_slm),
+ EVENT_PTR(td_slots_retired_slm),
+ NULL
+};
+
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3290,11 +3349,11 @@ static int intel_snb_pebs_broken(int cpu)
u32 rev = UINT_MAX; /* default to broken for unknown models */
switch (cpu_data(cpu).x86_model) {
- case 42: /* SNB */
+ case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
- case 45: /* SNB-EP */
+ case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_mask) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
@@ -3331,6 +3390,13 @@ static void intel_snb_check_microcode(void)
}
}
+static bool is_lbr_from(unsigned long msr)
+{
+ unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+ return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
/*
* Under certain circumstances, access certain MSR may cause #GP.
* The function tests if the input MSR can be safely accessed.
@@ -3351,13 +3417,24 @@ static bool check_msr(unsigned long msr, u64 mask)
* Only change the bits which can be updated by wrmsrl.
*/
val_tmp = val_old ^ mask;
+
+ if (is_lbr_from(msr))
+ val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
if (wrmsrl_safe(msr, val_tmp) ||
rdmsrl_safe(msr, &val_new))
return false;
+ /*
+ * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+ * should equal rdmsrl()'s even with the quirk.
+ */
if (val_new != val_tmp)
return false;
+ if (is_lbr_from(msr))
+ val_old = lbr_from_signext_quirk_wr(val_old);
+
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
@@ -3466,6 +3543,13 @@ static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
@@ -3537,15 +3621,15 @@ __init int intel_pmu_init(void)
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65nm Core "Yonah" */
+ case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
break;
- case 15: /* 65nm Core2 "Merom" */
+ case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* 65nm Core2 "Merom-L" */
- case 23: /* 45nm Core2 "Penryn" */
- case 29: /* 45nm Core2 "Dunnington (MP) */
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3556,9 +3640,9 @@ __init int intel_pmu_init(void)
pr_cont("Core2 events, ");
break;
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3586,11 +3670,11 @@ __init int intel_pmu_init(void)
pr_cont("Nehalem events, ");
break;
- case 28: /* 45nm Atom "Pineview" */
- case 38: /* 45nm Atom "Lincroft" */
- case 39: /* 32nm Atom "Penwell" */
- case 53: /* 32nm Atom "Cloverview" */
- case 54: /* 32nm Atom "Cedarview" */
+ case INTEL_FAM6_ATOM_PINEVIEW:
+ case INTEL_FAM6_ATOM_LINCROFT:
+ case INTEL_FAM6_ATOM_PENWELL:
+ case INTEL_FAM6_ATOM_CLOVERVIEW:
+ case INTEL_FAM6_ATOM_CEDARVIEW:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3602,9 +3686,9 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
- case 55: /* 22nm Atom "Silvermont" */
- case 76: /* 14nm Atom "Airmont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3616,11 +3700,12 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.cpu_events = slm_events_attrs;
pr_cont("Silvermont events, ");
break;
- case 92: /* 14nm Atom "Goldmont" */
- case 95: /* 14nm Atom "Goldmont Denverton" */
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3643,9 +3728,9 @@ __init int intel_pmu_init(void)
pr_cont("Goldmont events, ");
break;
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3672,8 +3757,8 @@ __init int intel_pmu_init(void)
pr_cont("Westmere events, ");
break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3686,7 +3771,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == 45)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3708,8 +3793,8 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3725,7 +3810,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == 62)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3743,10 +3828,10 @@ __init int intel_pmu_init(void)
break;
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3770,10 +3855,10 @@ __init int intel_pmu_init(void)
pr_cont("Haswell events, ");
break;
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3806,7 +3891,7 @@ __init int intel_pmu_init(void)
pr_cont("Broadwell events, ");
break;
- case 87: /* Knights Landing Xeon Phi */
+ case INTEL_FAM6_XEON_PHI_KNL:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -3824,16 +3909,22 @@ __init int intel_pmu_init(void)
pr_cont("Knights Landing events, ");
break;
- case 142: /* 14nm Kabylake Mobile */
- case 158: /* 14nm Kabylake Desktop */
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- case 85: /* 14nm Skylake Server */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
+ /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ event_attr_td_recovery_bubbles.event_str_noht =
+ "event=0xd,umask=0x1,cmask=1";
+ event_attr_td_recovery_bubbles.event_str_ht =
+ "event=0xd,umask=0x1,cmask=1,any=1";
+
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3914,6 +4005,8 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
+ if (x86_pmu.lbr_nr)
+ pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/*
* Access extra MSR may cause #GP under certain circumstances.
* E.g. KVM doesn't support offcore event
@@ -3946,16 +4039,14 @@ __init int intel_pmu_init(void)
*/
static __init int fixup_ht_bug(void)
{
- int cpu = smp_processor_id();
- int w, c;
+ int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
+ if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9ba4e4136a15..4c7638b91fa5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = {
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
- X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
- X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
- X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
- X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
- X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
- X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
- X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
- X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
- X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
- X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
- X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
- X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
- X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
- X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
- X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
- X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
- X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
- X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
- X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e2b40cdb05f..707d358e0dff 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -77,9 +77,11 @@ static enum {
LBR_IND_JMP |\
LBR_FAR)
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT (1ULL << 61)
+#define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
+#define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
+#define LBR_FROM_FLAG_ABORT BIT_ULL(61)
+
+#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
/*
* x86control flow change classification
@@ -235,6 +237,97 @@ enum {
LBR_VALID,
};
+/*
+ * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
+ * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
+ * TSX is not supported they have no consistent behavior:
+ *
+ * - For wrmsr(), bits 61:62 are considered part of the sign extension.
+ * - For HW updates (branch captures) bits 61:62 are always OFF and are not
+ * part of the sign extension.
+ *
+ * Therefore, if:
+ *
+ * 1) LBR has TSX format
+ * 2) CPU has no TSX support enabled
+ *
+ * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
+ * value from rdmsr() must be converted to have a 61 bits sign extension,
+ * ignoring the TSX flags.
+ */
+static inline bool lbr_from_signext_quirk_needed(void)
+{
+ int lbr_format = x86_pmu.intel_cap.lbr_format;
+ bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
+ boot_cpu_has(X86_FEATURE_RTM);
+
+ return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+}
+
+DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+
+/* If quirk is enabled, ensure sign extension is 63 bits: */
+inline u64 lbr_from_signext_quirk_wr(u64 val)
+{
+ if (static_branch_unlikely(&lbr_from_quirk_key)) {
+ /*
+ * Sign extend into bits 61:62 while preserving bit 63.
+ *
+ * Quirk is enabled when TSX is disabled. Therefore TSX bits
+ * in val are always OFF and must be changed to be sign
+ * extension bits. Since bits 59:60 are guaranteed to be
+ * part of the sign extension bits, we can just copy them
+ * to 61:62.
+ */
+ val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
+ }
+ return val;
+}
+
+/*
+ * If quirk is needed, ensure sign extension is 61 bits:
+ */
+u64 lbr_from_signext_quirk_rd(u64 val)
+{
+ if (static_branch_unlikely(&lbr_from_quirk_key)) {
+ /*
+ * Quirk is on when TSX is not enabled. Therefore TSX
+ * flags must be read as OFF.
+ */
+ val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
+ }
+ return val;
+}
+
+static inline void wrlbr_from(unsigned int idx, u64 val)
+{
+ val = lbr_from_signext_quirk_wr(val);
+ wrmsrl(x86_pmu.lbr_from + idx, val);
+}
+
+static inline void wrlbr_to(unsigned int idx, u64 val)
+{
+ wrmsrl(x86_pmu.lbr_to + idx, val);
+}
+
+static inline u64 rdlbr_from(unsigned int idx)
+{
+ u64 val;
+
+ rdmsrl(x86_pmu.lbr_from + idx, val);
+
+ return lbr_from_signext_quirk_rd(val);
+}
+
+static inline u64 rdlbr_to(unsigned int idx)
+{
+ u64 val;
+
+ rdmsrl(x86_pmu.lbr_to + idx, val);
+
+ return val;
+}
+
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
{
int i;
@@ -251,8 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
tos = task_ctx->tos;
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
- wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
- wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
+ wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
+
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
@@ -262,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
{
- int i;
unsigned lbr_idx, mask;
u64 tos;
+ int i;
if (task_ctx->lbr_callstack_users == 0) {
task_ctx->lbr_stack_state = LBR_NONE;
@@ -275,8 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
tos = intel_pmu_lbr_tos();
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
- rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
- rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+ task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
@@ -452,8 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
u16 cycles = 0;
int lbr_flags = lbr_desc[lbr_format];
- rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
- rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
+ from = rdlbr_from(lbr_idx);
+ to = rdlbr_to(lbr_idx);
if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
@@ -956,7 +1050,6 @@ void __init intel_pmu_lbr_init_core(void)
* SW branch filter usage:
* - compensate for lack of HW filter
*/
- pr_cont("4-deep LBR, ");
}
/* nehalem/westmere */
@@ -977,7 +1070,6 @@ void __init intel_pmu_lbr_init_nhm(void)
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
- pr_cont("16-deep LBR, ");
}
/* sandy bridge */
@@ -997,7 +1089,6 @@ void __init intel_pmu_lbr_init_snb(void)
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
- pr_cont("16-deep LBR, ");
}
/* haswell */
@@ -1011,7 +1102,8 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
- pr_cont("16-deep LBR, ");
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1031,7 +1123,6 @@ __init void intel_pmu_lbr_init_skl(void)
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
- pr_cont("32-deep LBR, ");
}
/* atom */
@@ -1057,7 +1148,6 @@ void __init intel_pmu_lbr_init_atom(void)
* SW branch filter usage:
* - compensate for lack of HW filter
*/
- pr_cont("8-deep LBR, ");
}
/* slm */
@@ -1088,6 +1178,4 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
-
- pr_cont("8-deep LBR, ");
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e30eef4f29a6..d0c58b35155f 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -786,26 +787,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
};
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
- X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
- X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
- X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
- X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
- X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
- X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
- X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
- X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
- X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce74062d981..59b4974c697f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -882,7 +883,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
@@ -903,20 +904,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
- */
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@ -956,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
static void uncore_pci_remove(struct pci_dev *pdev)
{
- struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, pkg;
@@ -1361,30 +1379,32 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
};
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .cpu_init = skl_uncore_cpu_init,
.pci_init = skl_uncore_pci_init,
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
- X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
- X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
- X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
- X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
- X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
- X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
- X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
- X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
- X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
- X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9a3580..d6063e438158 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+ ((dev << 24) | (func << 16) | (type << 8) | idx)
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff)
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
@@ -360,6 +364,7 @@ int bdw_uncore_pci_init(void);
int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
+void skl_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
/* perf_event_intel_uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 96531d2b843f..97a69dbba649 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,4 +1,4 @@
-/* Nehalem/SandBridge/Haswell uncore support */
+/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
/* Uncore IMC PCI IDs */
@@ -9,6 +9,7 @@
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -64,6 +65,10 @@
#define NHM_UNC_PERFEVTSEL0 0x3c0
#define NHM_UNC_UNCORE_PMC0 0x3b0
+/* SKL uncore global control */
+#define SKL_UNC_PERF_GLOBAL_CTL 0xe01
+#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1)
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void)
snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
}
+static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0) {
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+ }
+}
+
+static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct intel_uncore_ops skl_uncore_msr_ops = {
+ .init_box = skl_uncore_msr_init_box,
+ .exit_box = skl_uncore_msr_exit_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skl_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &skl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+ .event_descs = snb_uncore_events,
+};
+
+static struct intel_uncore_type *skl_msr_uncores[] = {
+ &skl_uncore_cbox,
+ &snb_uncore_arb,
+ NULL,
+};
+
+void skl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = skl_msr_uncores;
+ if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+
{ /* end: all zeroes */ },
};
@@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
+ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */
{ /* end marker */ }
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 874e8bd64d1d..824e54086e07 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = {
*/
static const struct pci_device_id knl_uncore_pci_ids[] = {
- { /* MC UClk */
+ { /* MC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
},
- { /* MC DClk Channel */
+ { /* MC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+ },
+ { /* MC0 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
+ },
+ { /* MC0 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+ },
+ { /* MC0 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+ },
+ { /* MC1 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+ },
+ { /* MC1 DClk CH 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+ },
+ { /* MC1 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+ },
+ { /* EDC0 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
+ },
+ { /* EDC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+ },
+ { /* EDC2 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+ },
+ { /* EDC3 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
},
- { /* EDC UClk */
+ { /* EDC4 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+ },
+ { /* EDC5 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+ },
+ { /* EDC6 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+ },
+ { /* EDC7 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+ },
+ { /* EDC0 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* EDC1 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+ },
+ { /* EDC2 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+ },
+ { /* EDC3 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+ },
+ { /* EDC4 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+ },
+ { /* EDC5 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+ },
+ { /* EDC6 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
},
- { /* EDC EClk */
+ { /* EDC7 EClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
},
{ /* M2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 85ef3c2e80e0..50b3a056f96b 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
#include <linux/perf_event.h>
+#include <asm/intel-family.h>
enum perf_msr_id {
PERF_MSR_TSC = 0,
@@ -34,39 +35,43 @@ static bool test_intel(int idx)
return false;
switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
-
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE2:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
+
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
+
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
+
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
+
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
if (idx == PERF_MSR_SMI)
return true;
break;
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764df815d..8c4a47706296 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \
.event_str = str, \
};
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \
+static struct perf_pmu_events_ht_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+ .id = 0, \
+ .event_str_noht = noht, \
+ .event_str_ht = ht, \
+}
+
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b);
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
#ifdef CONFIG_CPU_SUP_AMD
@@ -892,6 +902,8 @@ void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+u64 lbr_from_signext_quirk_wr(u64 val);
+
void intel_pmu_lbr_reset(void);
void intel_pmu_lbr_enable(struct perf_event *event);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5031b..e346572841a0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+ return __max_smt_threads;
+}
+
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
extern int topology_phys_to_logical_pkg(unsigned int pkg);
#else
@@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg);
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b923cac..2ed0ec1353f8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu)
bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
- int i;
+ int i, threads;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
@@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu)
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
+
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void)
#ifdef CONFIG_HOTPLUG_CPU
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+ int max_threads, cpu;
+
+ max_threads = 0;
+ for_each_online_cpu (cpu) {
+ int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+ if (threads > max_threads)
+ max_threads = threads;
+ }
+ __max_smt_threads = max_threads;
+}
+
static void remove_siblinginfo(int cpu)
{
int sibling;
@@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu)
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+ recompute_smt_state();
}
static void remove_cpu_from_maps(int cpu)