summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-21 13:14:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-21 13:14:16 -0700
commit2c856e14dad8cb1b085ae1f30c5e125c6d46019b (patch)
tree0b0067f6b83a536e7edb41d400a50c383c26e686 /arch/arm64
parentd34687ab97731b3a707106f78756342b61f46dbc (diff)
parent357b565d5d52b2dc2a51390eb8f887a9caa8597f (diff)
Merge tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm[64] perf updates from Will Deacon: "I have another mixed bag of ARM-related perf patches here. It's about 25% CPU and 75% interconnect, but with drivers/bus/ languishing without an obvious maintainer or tree, Olof and I agreed to keep all of these PMU patches together. I suspect a whole load of code from drivers/bus/arm-* can be moved under drivers/perf/, so that's on the radar for the future. Summary: - Initial support for ARMv8.1 CPU PMUs - Support for the CPU PMU in Cavium ThunderX - CPU PMU support for systems running 32-bit Linux in secure mode - Support for the system PMU in ARM CCI-550 (Cache Coherent Interconnect)" * tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (26 commits) drivers/perf: arm_pmu: avoid NULL dereference when not using devicetree arm64: perf: Extend ARMV8_EVTYPE_MASK to include PMCR.LC arm-cci: remove unused variable arm-cci: don't return value from void function arm-cci: make private functions static arm-cci: CoreLink CCI-550 PMU driver arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU arm-cci: CCI-500: Work around PMU counter writes arm-cci: Provide hook for writing to PMU counters arm-cci: Add helper to enable PMU without synchornising counters arm-cci: Add routines to save/restore all counters arm-cci: Get the status of a counter arm-cci: write_counter: Remove redundant check arm-cci: Delay PMU counter writes to pmu::pmu_enable arm-cci: Refactor CCI PMU enable/disable methods arm-cci: Group writes to counter arm-cci: fix handling cpumask_any_but return value arm-cci: simplify sysfs attr handling drivers/perf: arm_pmu: implement CPU_PM notifier arm64: dts: Add Cavium ThunderX specific PMU ...
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/boot/dts/cavium/thunder-88xx.dtsi5
-rw-r--r--arch/arm64/kernel/perf_event.c122
2 files changed, 105 insertions, 22 deletions
diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
index 9cb7cf94284a..2eb9b225f0bc 100644
--- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
+++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
@@ -360,6 +360,11 @@
<1 10 0xff01>;
};
+ pmu {
+ compatible = "cavium,thunder-pmu", "arm,armv8-pmuv3";
+ interrupts = <1 7 4>;
+ };
+
soc {
compatible = "simple-bus";
#address-cells = <2>;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 1b52269ffa87..767c4f6e1f5b 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -88,16 +88,25 @@
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
#define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30
+/* ARMv8 implementation defined event types. */
+#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
+#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
+#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42
+#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43
+#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C
+#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D
+#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E
+#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F
+
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2
-/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */
-#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
-#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
-#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD 0x42
-#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST 0x43
-#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD 0x4c
-#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST 0x4d
+/* ARMv8 Cavium ThunderX specific event types. */
+#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9
+#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA
+#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB
+#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC
+#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED
/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
@@ -132,6 +141,18 @@ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};
+static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
+};
+
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -175,16 +196,46 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST,
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
@@ -325,7 +376,6 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
NULL,
};
-
/*
* Perf Events' indices
*/
@@ -356,9 +406,10 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */
#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+#define ARMV8_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */
#define ARMV8_PMCR_N_MASK 0x1f
-#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
+#define ARMV8_PMCR_MASK 0x7f /* Mask for writable bits */
/*
* PMOVSR: counters overflow flag status reg
@@ -369,8 +420,8 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
/*
* PMXEVTYPER: Event selection reg
*/
-#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
-#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
+#define ARMV8_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
+#define ARMV8_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
/*
* Event filters for PMUv3
@@ -445,9 +496,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER)
- asm volatile("msr pmccntr_el0, %0" :: "r" (value));
- else if (armv8pmu_select_counter(idx) == idx)
+ else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
+ /*
+ * Set the upper 32bits as this is a 64bit counter but we only
+ * count using the lower 32bits and we want an interrupt when
+ * it overflows.
+ */
+ u64 value64 = 0xffffffff00000000ULL | value;
+
+ asm volatile("msr pmccntr_el0, %0" :: "r" (value64));
+ } else if (armv8pmu_select_counter(idx) == idx)
asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
}
@@ -722,8 +780,11 @@ static void armv8pmu_reset(void *info)
armv8pmu_disable_intens(idx);
}
- /* Initialize & Reset PMNC: C and P bits. */
- armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
+ /*
+ * Initialize & Reset PMNC. Request overflow interrupt for
+ * 64 bit cycle counter but cheat in armv8pmu_write_counter().
+ */
+ armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC);
}
static int armv8_pmuv3_map_event(struct perf_event *event)
@@ -747,6 +808,13 @@ static int armv8_a57_map_event(struct perf_event *event)
ARMV8_EVTYPE_EVENT);
}
+static int armv8_thunder_map_event(struct perf_event *event)
+{
+ return armpmu_map_event(event, &armv8_thunder_perf_map,
+ &armv8_thunder_perf_cache_map,
+ ARMV8_EVTYPE_EVENT);
+}
+
static void armv8pmu_read_num_pmnc_events(void *info)
{
int *nb_cnt = info;
@@ -815,11 +883,21 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
return armv8pmu_probe_num_events(cpu_pmu);
}
+static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv8_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv8_cavium_thunder";
+ cpu_pmu->map_event = armv8_thunder_map_event;
+ cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ return armv8pmu_probe_num_events(cpu_pmu);
+}
+
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
+ {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
{},
};