summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c85
-rw-r--r--kernel/events/internal.h6
-rw-r--r--kernel/events/ring_buffer.c31
3 files changed, 92 insertions, 30 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 03ac9c8b02fb..ce64f3fed5c6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1249,26 +1249,31 @@ unclone_ctx(struct perf_event_context *ctx)
return parent_ctx;
}
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
+ enum pid_type type)
{
+ u32 nr;
/*
* only top level events have the pid namespace they were created in
*/
if (event->parent)
event = event->parent;
- return task_tgid_nr_ns(p, event->ns);
+ nr = __task_pid_nr_ns(p, type, event->ns);
+ /* avoid -1 if it is idle thread or runs in another ns */
+ if (!nr && !pid_alive(p))
+ nr = -1;
+ return nr;
}
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
{
- /*
- * only top level events have the pid namespace they were created in
- */
- if (event->parent)
- event = event->parent;
+ return perf_event_pid_type(event, p, __PIDTYPE_TGID);
+}
- return task_pid_nr_ns(p, event->ns);
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+ return perf_event_pid_type(event, p, PIDTYPE_PID);
}
/*
@@ -1570,6 +1575,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
if (sample_type & PERF_SAMPLE_TRANSACTION)
size += sizeof(data->txn);
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ size += sizeof(data->phys_addr);
+
event->header_size = size;
}
@@ -3211,6 +3219,13 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
return;
perf_ctx_lock(cpuctx, ctx);
+ /*
+ * We must check ctx->nr_events while holding ctx->lock, such
+ * that we serialize against perf_install_in_context().
+ */
+ if (!ctx->nr_events)
+ goto unlock;
+
perf_pmu_disable(ctx->pmu);
/*
* We want to keep the following priority order:
@@ -3224,6 +3239,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task);
perf_pmu_enable(ctx->pmu);
+
+unlock:
perf_ctx_unlock(cpuctx, ctx);
}
@@ -6003,6 +6020,9 @@ void perf_output_sample(struct perf_output_handle *handle,
}
}
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ perf_output_put(handle, data->phys_addr);
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
@@ -6018,6 +6038,38 @@ void perf_output_sample(struct perf_output_handle *handle,
}
}
+static u64 perf_virt_to_phys(u64 virt)
+{
+ u64 phys_addr = 0;
+ struct page *p = NULL;
+
+ if (!virt)
+ return 0;
+
+ if (virt >= TASK_SIZE) {
+ /* If it's vmalloc()d memory, leave phys_addr as 0 */
+ if (virt_addr_valid((void *)(uintptr_t)virt) &&
+ !(virt >= VMALLOC_START && virt < VMALLOC_END))
+ phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
+ } else {
+ /*
+ * Walking the pages tables for user address.
+ * Interrupts are disabled, so it prevents any tear down
+ * of the page tables.
+ * Try IRQ-safe __get_user_pages_fast first.
+ * If failed, leave phys_addr as 0.
+ */
+ if ((current->mm != NULL) &&
+ (__get_user_pages_fast(virt, 1, 0, &p) == 1))
+ phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+
+ if (p)
+ put_page(p);
+ }
+
+ return phys_addr;
+}
+
void perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
@@ -6136,6 +6188,9 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size;
}
+
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ data->phys_addr = perf_virt_to_phys(data->addr);
}
static void __always_inline
@@ -7287,6 +7342,11 @@ static void perf_log_throttle(struct perf_event *event, int enable)
perf_output_end(&handle);
}
+void perf_event_itrace_started(struct perf_event *event)
+{
+ event->attach_state |= PERF_ATTACH_ITRACE;
+}
+
static void perf_log_itrace_start(struct perf_event *event)
{
struct perf_output_handle handle;
@@ -7302,7 +7362,7 @@ static void perf_log_itrace_start(struct perf_event *event)
event = event->parent;
if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
- event->hw.itrace_started)
+ event->attach_state & PERF_ATTACH_ITRACE)
return;
rec.header.type = PERF_RECORD_ITRACE_START;
@@ -9890,6 +9950,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EINVAL;
}
+ /* Only privileged users can get physical addresses */
+ if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
+ perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
if (!attr.sample_max_stack)
attr.sample_max_stack = sysctl_perf_event_max_stack;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 486fd78eb8d5..843e97047335 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -38,9 +38,9 @@ struct ring_buffer {
struct user_struct *mmap_user;
/* AUX area */
- local_t aux_head;
+ long aux_head;
local_t aux_nest;
- local_t aux_wakeup;
+ long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
unsigned long aux_pgoff;
int aux_nr_pages;
int aux_overwrite;
@@ -208,7 +208,7 @@ static inline int get_recursion_context(int *recursion)
{
int rctx;
- if (in_nmi())
+ if (unlikely(in_nmi()))
rctx = 3;
else if (in_irq())
rctx = 2;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ee97196bb151..af71a84e12ee 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -367,7 +367,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
goto err_put;
- aux_head = local_read(&rb->aux_head);
+ aux_head = rb->aux_head;
handle->rb = rb;
handle->event = event;
@@ -382,7 +382,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
*/
if (!rb->aux_overwrite) {
aux_tail = ACCESS_ONCE(rb->user_page->aux_tail);
- handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark;
+ handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
if (aux_head - aux_tail < perf_aux_size(rb))
handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
@@ -433,12 +433,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
aux_head = handle->head;
- local_set(&rb->aux_head, aux_head);
+ rb->aux_head = aux_head;
} else {
handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
- aux_head = local_read(&rb->aux_head);
- local_add(size, &rb->aux_head);
+ aux_head = rb->aux_head;
+ rb->aux_head += size;
}
if (size || handle->aux_flags) {
@@ -450,11 +450,10 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
handle->aux_flags);
}
- aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
-
- if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
+ rb->user_page->aux_head = rb->aux_head;
+ if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
wakeup = true;
- local_add(rb->aux_watermark, &rb->aux_wakeup);
+ rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
}
if (wakeup) {
@@ -478,22 +477,20 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
{
struct ring_buffer *rb = handle->rb;
- unsigned long aux_head;
if (size > handle->size)
return -ENOSPC;
- local_add(size, &rb->aux_head);
+ rb->aux_head += size;
- aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
- if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
+ rb->user_page->aux_head = rb->aux_head;
+ if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
perf_output_wakeup(handle);
- local_add(rb->aux_watermark, &rb->aux_wakeup);
- handle->wakeup = local_read(&rb->aux_wakeup) +
- rb->aux_watermark;
+ rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
+ handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
}
- handle->head = aux_head;
+ handle->head = rb->aux_head;
handle->size -= size;
return 0;