diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/events/core.c | 23 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 201 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 47 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 124 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 32 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 88 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 5 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 67 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 96 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 63 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/topology.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 25 |
13 files changed, 608 insertions, 184 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 91eac39625be..dfebbde2a4cc 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha } EXPORT_SYMBOL_GPL(events_sysfs_show); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9b4f9d3ce465..0974ba11e954 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -16,6 +16,7 @@ #include <asm/cpufeature.h> #include <asm/hardirq.h> +#include <asm/intel-family.h> #include <asm/apic.h> #include "../perf_event.h" @@ -185,7 +186,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; -struct event_constraint intel_skl_event_constraints[] = { +static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -204,10 +205,8 @@ struct event_constraint intel_skl_event_constraints[] = { }; static struct extra_reg intel_knl_extra_regs[] __read_mostly = { - INTEL_UEVENT_EXTRA_REG(0x01b7, - MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, - MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), EVENT_EXTRA_END }; @@ -243,14 +242,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); -struct attribute *nhm_events_attrs[] = { +static struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), NULL, }; -struct attribute *snb_events_attrs[] = { +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + +static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL, }; @@ -280,7 +316,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_bdw_event_constraints[] = { +static struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -1361,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ @@ -3290,11 +3349,11 @@ static int intel_snb_pebs_broken(int cpu) u32 rev = UINT_MAX; /* default to broken for unknown models */ switch (cpu_data(cpu).x86_model) { - case 42: /* SNB */ + case INTEL_FAM6_SANDYBRIDGE: rev = 0x28; break; - case 45: /* SNB-EP */ + case INTEL_FAM6_SANDYBRIDGE_X: switch (cpu_data(cpu).x86_mask) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; @@ -3331,6 +3390,13 @@ static void intel_snb_check_microcode(void) } } +static bool is_lbr_from(unsigned long msr) +{ + unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + + return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; +} + /* * Under certain circumstances, access certain MSR may cause #GP. * The function tests if the input MSR can be safely accessed. @@ -3351,13 +3417,24 @@ static bool check_msr(unsigned long msr, u64 mask) * Only change the bits which can be updated by wrmsrl. */ val_tmp = val_old ^ mask; + + if (is_lbr_from(msr)) + val_tmp = lbr_from_signext_quirk_wr(val_tmp); + if (wrmsrl_safe(msr, val_tmp) || rdmsrl_safe(msr, &val_new)) return false; + /* + * Quirk only affects validation in wrmsr(), so wrmsrl()'s value + * should equal rdmsrl()'s even with the quirk. + */ if (val_new != val_tmp) return false; + if (is_lbr_from(msr)) + val_old = lbr_from_signext_quirk_wr(val_old); + /* Here it's sure that the MSR can be safely accessed. * Restore the old value and return. */ @@ -3466,6 +3543,13 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL }; @@ -3537,15 +3621,15 @@ __init int intel_pmu_init(void) * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { - case 14: /* 65nm Core "Yonah" */ + case INTEL_FAM6_CORE_YONAH: pr_cont("Core events, "); break; - case 15: /* 65nm Core2 "Merom" */ + case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - case 22: /* 65nm Core2 "Merom-L" */ - case 23: /* 45nm Core2 "Penryn" */ - case 29: /* 45nm Core2 "Dunnington (MP) */ + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3556,9 +3640,9 @@ __init int intel_pmu_init(void) pr_cont("Core2 events, "); break; - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3586,11 +3670,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case 28: /* 45nm Atom "Pineview" */ - case 38: /* 45nm Atom "Lincroft" */ - case 39: /* 32nm Atom "Penwell" */ - case 53: /* 32nm Atom "Cloverview" */ - case 54: /* 32nm Atom "Cedarview" */ + case INTEL_FAM6_ATOM_PINEVIEW: + case INTEL_FAM6_ATOM_LINCROFT: + case INTEL_FAM6_ATOM_PENWELL: + case INTEL_FAM6_ATOM_CLOVERVIEW: + case INTEL_FAM6_ATOM_CEDARVIEW: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3602,9 +3686,9 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case 55: /* 22nm Atom "Silvermont" */ - case 76: /* 14nm Atom "Airmont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -3616,11 +3700,12 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = slm_events_attrs; pr_cont("Silvermont events, "); break; - case 92: /* 14nm Atom "Goldmont" */ - case 95: /* 14nm Atom "Goldmont Denverton" */ + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -3643,9 +3728,9 @@ __init int intel_pmu_init(void) pr_cont("Goldmont events, "); break; - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3672,8 +3757,8 @@ __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -3686,7 +3771,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == 45) + if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3708,8 +3793,8 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3725,7 +3810,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == 62) + if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3743,10 +3828,10 @@ __init int intel_pmu_init(void) break; - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: x86_add_quirk(intel_ht_bug); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3770,10 +3855,10 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -3806,7 +3891,7 @@ __init int intel_pmu_init(void) pr_cont("Broadwell events, "); break; - case 87: /* Knights Landing Xeon Phi */ + case INTEL_FAM6_XEON_PHI_KNL: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3824,16 +3909,22 @@ __init int intel_pmu_init(void) pr_cont("Knights Landing events, "); break; - case 142: /* 14nm Kabylake Mobile */ - case 158: /* 14nm Kabylake Desktop */ - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - case 85: /* 14nm Skylake Server */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_skl(); + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; @@ -3914,6 +4005,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } + if (x86_pmu.lbr_nr) + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); /* * Access extra MSR may cause #GP under certain circumstances. * E.g. KVM doesn't support offcore event @@ -3946,16 +4039,14 @@ __init int intel_pmu_init(void) */ static __init int fixup_ht_bug(void) { - int cpu = smp_processor_id(); - int w, c; + int c; /* * problem not present on this CPU model, nothing to do */ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_sibling_cpumask(cpu)); - if (w > 1) { + if (topology_max_smt_threads() > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9ba4e4136a15..4c7638b91fa5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,6 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = { { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } static const struct x86_cpu_id intel_cstates_match[] __initconst = { - X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */ - X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */ - X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), - X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */ - X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */ - X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), - X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */ - X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */ + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */ - X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */ + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */ - X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */ - X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates), - X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), - X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */ - X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */ - X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */ + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), - X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */ - X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */ - X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */ - X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */ + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), - X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */ - X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */ + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9e2b40cdb05f..707d358e0dff 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -77,9 +77,11 @@ static enum { LBR_IND_JMP |\ LBR_FAR) -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) -#define LBR_FROM_FLAG_IN_TX (1ULL << 62) -#define LBR_FROM_FLAG_ABORT (1ULL << 61) +#define LBR_FROM_FLAG_MISPRED BIT_ULL(63) +#define LBR_FROM_FLAG_IN_TX BIT_ULL(62) +#define LBR_FROM_FLAG_ABORT BIT_ULL(61) + +#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) /* * x86control flow change classification @@ -235,6 +237,97 @@ enum { LBR_VALID, }; +/* + * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in + * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when + * TSX is not supported they have no consistent behavior: + * + * - For wrmsr(), bits 61:62 are considered part of the sign extension. + * - For HW updates (branch captures) bits 61:62 are always OFF and are not + * part of the sign extension. + * + * Therefore, if: + * + * 1) LBR has TSX format + * 2) CPU has no TSX support enabled + * + * ... then any value passed to wrmsr() must be sign extended to 63 bits and any + * value from rdmsr() must be converted to have a 61 bits sign extension, + * ignoring the TSX flags. + */ +static inline bool lbr_from_signext_quirk_needed(void) +{ + int lbr_format = x86_pmu.intel_cap.lbr_format; + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || + boot_cpu_has(X86_FEATURE_RTM); + + return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); +} + +DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); + +/* If quirk is enabled, ensure sign extension is 63 bits: */ +inline u64 lbr_from_signext_quirk_wr(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Sign extend into bits 61:62 while preserving bit 63. + * + * Quirk is enabled when TSX is disabled. Therefore TSX bits + * in val are always OFF and must be changed to be sign + * extension bits. Since bits 59:60 are guaranteed to be + * part of the sign extension bits, we can just copy them + * to 61:62. + */ + val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; + } + return val; +} + +/* + * If quirk is needed, ensure sign extension is 61 bits: + */ +u64 lbr_from_signext_quirk_rd(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Quirk is on when TSX is not enabled. Therefore TSX + * flags must be read as OFF. + */ + val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); + } + return val; +} + +static inline void wrlbr_from(unsigned int idx, u64 val) +{ + val = lbr_from_signext_quirk_wr(val); + wrmsrl(x86_pmu.lbr_from + idx, val); +} + +static inline void wrlbr_to(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_to + idx, val); +} + +static inline u64 rdlbr_from(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_from + idx, val); + + return lbr_from_signext_quirk_rd(val); +} + +static inline u64 rdlbr_to(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_to + idx, val); + + return val; +} + static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { int i; @@ -251,8 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); - wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); + wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -262,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { - int i; unsigned lbr_idx, mask; u64 tos; + int i; if (task_ctx->lbr_callstack_users == 0) { task_ctx->lbr_stack_state = LBR_NONE; @@ -275,8 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) tos = intel_pmu_lbr_tos(); for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); - rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -452,8 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) u16 cycles = 0; int lbr_flags = lbr_desc[lbr_format]; - rdmsrl(x86_pmu.lbr_from + lbr_idx, from); - rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + from = rdlbr_from(lbr_idx); + to = rdlbr_to(lbr_idx); if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; @@ -956,7 +1050,6 @@ void __init intel_pmu_lbr_init_core(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("4-deep LBR, "); } /* nehalem/westmere */ @@ -977,7 +1070,6 @@ void __init intel_pmu_lbr_init_nhm(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* sandy bridge */ @@ -997,7 +1089,6 @@ void __init intel_pmu_lbr_init_snb(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* haswell */ @@ -1011,7 +1102,8 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - pr_cont("16-deep LBR, "); + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); } /* skylake */ @@ -1031,7 +1123,6 @@ __init void intel_pmu_lbr_init_skl(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("32-deep LBR, "); } /* atom */ @@ -1057,7 +1148,6 @@ void __init intel_pmu_lbr_init_atom(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("8-deep LBR, "); } /* slm */ @@ -1088,6 +1178,4 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = snb_lbr_sel_map; - - pr_cont("8-deep LBR, "); } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index e30eef4f29a6..d0c58b35155f 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,6 +55,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -786,26 +787,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = { }; static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */ - X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */ - X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */ - X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */ - X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */ - X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */ - X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */ - X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */ - X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */ - X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index fce74062d981..59b4974c697f 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,5 @@ #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; @@ -882,7 +883,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; int phys_id, pkg, ret; @@ -903,20 +904,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. - */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; - /* Knights Landing uses a common PCI device ID for multiple instances of - * an uncore PMU device type. There is only one entry per device type in - * the knl_uncore_pci_ids table inspite of multiple devices present for - * some device types. Hence PCI device idx would be 0 for all devices. - * So increment pmu pointer to point to an unused array element. + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. */ - if (boot_cpu_data.x86_model == 87) { - while (pmu->func_id >= 0) - pmu++; + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = pdev->driver; + const struct pci_device_id *ids = pci_drv->id_table; + unsigned int devfn; + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn == pdev->devfn) { + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + break; + } + } + ids++; + } + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) @@ -956,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id static void uncore_pci_remove(struct pci_dev *pdev) { - struct intel_uncore_box *box = pci_get_drvdata(pdev); + struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; int i, phys_id, pkg; @@ -1361,30 +1379,32 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = { }; static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .cpu_init = skl_uncore_cpu_init, .pci_init = skl_uncore_pci_init, }; static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */ - X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */ - X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */ - X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */ - X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */ - X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */ - X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */ - X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */ - X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */ - X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */ - X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */ - X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */ + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79766b9a3580..d6063e438158 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -15,7 +15,11 @@ #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff @@ -360,6 +364,7 @@ int bdw_uncore_pci_init(void); int skl_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); +void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); /* perf_event_intel_uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 96531d2b843f..97a69dbba649 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,4 +1,4 @@ -/* Nehalem/SandBridge/Haswell uncore support */ +/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" /* Uncore IMC PCI IDs */ @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 #define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -64,6 +65,10 @@ #define NHM_UNC_PERFEVTSEL0 0x3c0 #define NHM_UNC_UNCORE_PMC0 0x3b0 +/* SKL uncore global control */ +#define SKL_UNC_PERF_GLOBAL_CTL 0xe01 +#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1) + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); @@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void) snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; } +static void skl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops skl_uncore_msr_ops = { + .init_box = skl_uncore_msr_init_box, + .exit_box = skl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type skl_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &skl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *skl_msr_uncores[] = { + &skl_uncore_cbox, + &snb_uncore_arb, + NULL, +}; + +void skl_uncore_cpu_init(void) +{ + uncore_msr_uncores = skl_msr_uncores; + if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + snb_uncore_arb.ops = &skl_uncore_msr_ops; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, }; @@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ { /* end marker */ } }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 874e8bd64d1d..824e54086e07 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = { */ static const struct pci_device_id knl_uncore_pci_ids[] = { - { /* MC UClk */ + { /* MC0 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), }, - { /* MC DClk Channel */ + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), }, - { /* EDC UClk */ + { /* EDC4 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), }, - { /* EDC EClk */ + { /* EDC7 EClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), }, { /* M2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 85ef3c2e80e0..50b3a056f96b 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,4 +1,5 @@ #include <linux/perf_event.h> +#include <asm/intel-family.h> enum perf_msr_id { PERF_MSR_TSC = 0, @@ -34,39 +35,43 @@ static bool test_intel(int idx) return false; switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE2: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: + + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: if (idx == PERF_MSR_SMI) return true; break; - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8bd764df815d..8c4a47706296 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); #ifdef CONFIG_CPU_SUP_AMD @@ -892,6 +902,8 @@ void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +u64 lbr_from_signext_quirk_wr(u64 val); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 7f991bd5031b..e346572841a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) + +extern int __max_smt_threads; + +static inline int topology_max_smt_threads(void) +{ + return __max_smt_threads; +} + int topology_update_package_map(unsigned int apicid, unsigned int cpu); extern int topology_phys_to_logical_pkg(unsigned int pkg); #else @@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg); static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..2ed0ec1353f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +/* Maximum number of SMT threads on any online core */ +int __max_smt_threads __read_mostly; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu) bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; - int i; + int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); @@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu) if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); } + + threads = cpumask_weight(topology_sibling_cpumask(cpu)); + if (threads > __max_smt_threads) + __max_smt_threads = threads; } /* maps the cpu to the sched domain representing multi-core */ @@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void) #ifdef CONFIG_HOTPLUG_CPU +/* Recompute SMT state for all CPUs on offline */ +static void recompute_smt_state(void) +{ + int max_threads, cpu; + + max_threads = 0; + for_each_online_cpu (cpu) { + int threads = cpumask_weight(topology_sibling_cpumask(cpu)); + + if (threads > max_threads) + max_threads = threads; + } + __max_smt_threads = max_threads; +} + static void remove_siblinginfo(int cpu) { int sibling; @@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu) c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); + recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) |