summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event.h56
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c35
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c182
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c2
6 files changed, 312 insertions, 25 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..b31798d5e62e 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -71,6 +71,7 @@
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_MTRRfix64K_00000 0x00000250
#define MSR_MTRRfix16K_80000 0x00000258
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bf0f01aea994..5ed7a4c5baf7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = {
*/
static void __init filter_events(struct attribute **attrs)
{
+ struct device_attribute *d;
+ struct perf_pmu_events_attr *pmu_attr;
int i, j;
for (i = 0; attrs[i]; i++) {
+ d = (struct device_attribute *)attrs[i];
+ pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
+ /* str trumps id */
+ if (pmu_attr->event_str)
+ continue;
if (x86_pmu.event_map(i))
continue;
@@ -1330,22 +1337,45 @@ static void __init filter_events(struct attribute **attrs)
}
}
-static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+/* Merge two pointer arrays */
+static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr);
-
u64 config = x86_pmu.event_map(pmu_attr->id);
- return x86_pmu.events_sysfs_show(page, config);
-}
-#define EVENT_VAR(_id) event_attr_##_id
-#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+ /* string trumps id */
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
-#define EVENT_ATTR(_name, _id) \
- PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
- events_sysfs_show)
+ return x86_pmu.events_sysfs_show(page, config);
+}
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0);
+ 0, x86_pmu.num_counters, 0, 0);
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+ if (x86_pmu.event_attrs)
+ x86_pmu_events_group.attrs = x86_pmu.event_attrs;
+
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
else
filter_events(x86_pmu_events_group.attrs);
+ if (x86_pmu.cpu_events) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
+ if (!WARN_ON(!tmp))
+ x86_pmu_events_group.attrs = tmp;
+ }
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7f5c75c2afdd..ba9aadfa683b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
+ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -59,7 +60,13 @@ struct event_constraint {
u64 cmask;
int weight;
int overlap;
+ int flags;
};
+/*
+ * struct event_constraint flags
+ */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -170,16 +177,17 @@ struct cpu_hw_events {
void *kfree_on_online;
};
-#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
.overlap = (o), \
+ .flags = f, \
}
#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
/*
* The overlap flag marks event constraints with overlapping counter
@@ -203,7 +211,7 @@ struct cpu_hw_events {
* and its counter masks must be kept at a minimum.
*/
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
/*
* Constraint on the Event code.
@@ -231,6 +239,14 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define INTEL_PLD_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+
+#define INTEL_PST_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -260,12 +276,22 @@ struct extra_reg {
.msr = (ms), \
.config_mask = (m), \
.valid_mask = (vm), \
- .idx = EXTRA_REG_##i \
+ .idx = EXTRA_REG_##i, \
}
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+
+#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+ INTEL_UEVENT_EXTRA_REG(c, \
+ MSR_PEBS_LD_LAT_THRESHOLD, \
+ 0xffff, \
+ LDLAT)
+
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
union perf_capabilities {
@@ -355,8 +381,10 @@ struct x86_pmu {
*/
int attr_rdpmc;
struct attribute **format_attrs;
+ struct attribute **event_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config);
+ struct attribute **cpu_events;
/*
* CPU Hotplug hooks
@@ -421,6 +449,23 @@ do { \
#define ERF_NO_HT_SHARING 1
#define ERF_HAS_RSP_1 2
+#define EVENT_VAR(_id) event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id) \
+static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
+ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ .id = PERF_COUNT_HW_##_id, \
+ .event_str = NULL, \
+};
+
+#define EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+};
+
extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -628,6 +673,9 @@ int p6_pmu_init(void);
int knc_pmu_init(void);
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
+
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index dab7580c47ae..e84c4ba44b59 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
+EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+
+struct attribute *nhm_events_attrs[] = {
+ EVENT_PTR(mem_ld_nhm),
+ NULL,
+};
+
+struct attribute *snb_events_attrs[] = {
+ EVENT_PTR(mem_ld_snb),
+ EVENT_PTR(mem_st_snb),
+ NULL,
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -1392,8 +1410,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if ((event->hw.config & c->cmask) == c->code)
+ if ((event->hw.config & c->cmask) == c->code) {
+ /* hw.flags zeroed at initialization */
+ event->hw.flags |= c->flags;
return c;
+ }
}
}
@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
+ event->hw.flags = 0;
intel_put_shared_regs_event_constraints(cpuc, event);
}
@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void)
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
+PMU_FORMAT_ATTR(ldlat, "config1:0-15");
+
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_cmask.attr,
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
+ &format_attr_ldlat.attr, /* PEBS load latency */
NULL,
};
@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
+ x86_pmu.cpu_events = nhm_events_attrs;
+
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_westmere_extra_regs;
x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.cpu_events = nhm_events_attrs;
+
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+ x86_pmu.cpu_events = snb_events_attrs;
+
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+ x86_pmu.cpu_events = snb_events_attrs;
+
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a4f2ee..36dc13d1ad02 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -24,6 +24,130 @@ struct pebs_record_32 {
*/
+union intel_x86_pebs_dse {
+ u64 val;
+ struct {
+ unsigned int ld_dse:4;
+ unsigned int ld_stlb_miss:1;
+ unsigned int ld_locked:1;
+ unsigned int ld_reserved:26;
+ };
+ struct {
+ unsigned int st_l1d_hit:1;
+ unsigned int st_reserved1:3;
+ unsigned int st_stlb_miss:1;
+ unsigned int st_locked:1;
+ unsigned int st_reserved2:26;
+ };
+};
+
+
+/*
+ * Map PEBS Load Latency Data Source encodings to generic
+ * memory data source information
+ */
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+static const u64 pebs_data_source[] = {
+ P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
+ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
+ OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
+ OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
+ OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
+ OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
+ OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
+ OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
+ OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+ OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
+ OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
+ OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
+ OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
+ OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
+ OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+};
+
+static u64 precise_store_data(u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
+
+ dse.val = status;
+
+ /*
+ * bit 4: TLB access
+ * 1 = stored missed 2nd level TLB
+ *
+ * so it either hit the walker or the OS
+ * otherwise hit 2nd level TLB
+ */
+ if (dse.st_stlb_miss)
+ val |= P(TLB, MISS);
+ else
+ val |= P(TLB, HIT);
+
+ /*
+ * bit 0: hit L1 data cache
+ * if not set, then all we know is that
+ * it missed L1D
+ */
+ if (dse.st_l1d_hit)
+ val |= P(LVL, HIT);
+ else
+ val |= P(LVL, MISS);
+
+ /*
+ * bit 5: Locked prefix
+ */
+ if (dse.st_locked)
+ val |= P(LOCK, LOCKED);
+
+ return val;
+}
+
+static u64 load_latency_data(u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ u64 val;
+ int model = boot_cpu_data.x86_model;
+ int fam = boot_cpu_data.x86;
+
+ dse.val = status;
+
+ /*
+ * use the mapping table for bit 0-3
+ */
+ val = pebs_data_source[dse.ld_dse];
+
+ /*
+ * Nehalem models do not support TLB, Lock infos
+ */
+ if (fam == 0x6 && (model == 26 || model == 30
+ || model == 31 || model == 46)) {
+ val |= P(TLB, NA) | P(LOCK, NA);
+ return val;
+ }
+ /*
+ * bit 4: TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (dse.ld_stlb_miss)
+ val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /*
+ * bit 5: locked prefix
+ */
+ if (dse.ld_locked)
+ val |= P(LOCK, LOCKED);
+
+ return val;
+}
+
struct pebs_record_core {
u64 flags, ip;
u64 ax, bx, cx, dx;
@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
- INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
};
struct event_constraint intel_westmere_pebs_event_constraints[] = {
- INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -430,8 +556,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
if (x86_pmu.pebs_constraints) {
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
- if ((event->hw.config & c->cmask) == c->code)
+ if ((event->hw.config & c->cmask) == c->code) {
+ event->hw.flags |= c->flags;
return c;
+ }
}
}
@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx;
+
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+ cpuc->pebs_enabled |= 1ULL << 63;
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs, void *__pebs)
{
/*
- * We cast to pebs_record_core since that is a subset of
- * both formats and we don't use the other fields in this
- * routine.
+ * We cast to pebs_record_nhm to get the load latency data
+ * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct pebs_record_core *pebs = __pebs;
+ struct pebs_record_nhm *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
+ u64 sample_type;
+ int fll, fst;
if (!intel_pmu_save_and_restart(event))
return;
+ fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
+ fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
+
perf_sample_data_init(&data, 0, event->hw.last_period);
+ data.period = event->hw.last_period;
+ sample_type = event->attr.sample_type;
+
+ /*
+ * if PEBS-LL or PreciseStore
+ */
+ if (fll || fst) {
+ if (sample_type & PERF_SAMPLE_ADDR)
+ data.addr = pebs->dla;
+
+ /*
+ * Use latency for weight (only avail with PEBS-LL)
+ */
+ if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
+ data.weight = pebs->lat;
+
+ /*
+ * data.data_src encodes the data source
+ */
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ if (fll)
+ data.data_src.val = load_latency_data(pebs->dse);
+ else
+ data.data_src.val = precise_store_data(pebs->dse);
+ }
+ }
+
/*
* We use the interrupt regs as a base because the PEBS record
* does not contain a full regs set, specifically it seems to
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index b43200dbfe7e..75da9e18b128 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
type->unconstrainted = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
- 0, type->num_counters, 0);
+ 0, type->num_counters, 0, 0);
for (i = 0; i < type->num_boxes; i++) {
pmus[i].func_id = -1;