summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ring_buffer.c276
1 files changed, 181 insertions, 95 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 00867ff82412..026238c55b0c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -27,6 +27,7 @@
#include <linux/oom.h>
#include <asm/local.h>
+#include <asm/local64.h>
static void update_pages_handler(struct work_struct *work);
@@ -419,6 +420,17 @@ struct rb_event_info {
};
/*
+ * Used for the add_timestamp
+ * NONE
+ * NORMAL - may be for either time extend or absolute
+ * FORCE - force a full time stamp.
+ */
+enum {
+ RB_ADD_STAMP_NONE,
+ RB_ADD_STAMP_NORMAL,
+ RB_ADD_STAMP_FORCE
+};
+/*
* Used for which event context the event is in.
* NMI = 0
* IRQ = 1
@@ -470,7 +482,8 @@ struct ring_buffer_per_cpu {
size_t shortest_full;
unsigned long read;
unsigned long read_bytes;
- u64 write_stamp;
+ local64_t write_stamp;
+ local64_t before_stamp;
u64 read_stamp;
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
@@ -2416,16 +2429,13 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
unsigned length = info->length;
u64 delta = info->delta;
- /* Only a commit updates the timestamp */
- if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
- delta = 0;
-
/*
* If we need to add a timestamp, then we
* add it to the start of the reserved space.
*/
if (unlikely(info->add_timestamp)) {
- bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+ bool abs = info->add_timestamp == RB_ADD_STAMP_FORCE ||
+ ring_buffer_time_stamp_abs(cpu_buffer->buffer);
event = rb_add_time_stamp(event, abs ? info->delta : delta, abs);
length -= RB_LEN_TIME_EXTEND;
@@ -2480,6 +2490,39 @@ static inline bool sched_clock_stable(void)
}
#endif
+static __always_inline bool
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+ unsigned long index;
+
+ index = rb_event_index(event);
+ addr &= PAGE_MASK;
+
+ return cpu_buffer->commit_page->page == (void *)addr &&
+ rb_commit_index(cpu_buffer) == index;
+}
+
+static u64 rb_time_delta(struct ring_buffer_event *event)
+{
+ switch (event->type_len) {
+ case RINGBUF_TYPE_PADDING:
+ return 0;
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ return ring_buffer_event_time_stamp(event);
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ return 0;
+
+ case RINGBUF_TYPE_DATA:
+ return event->time_delta;
+ default:
+ return 0;
+ }
+}
+
static inline int
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
@@ -2488,6 +2531,8 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *bpage;
unsigned long index;
unsigned long addr;
+ u64 write_stamp;
+ u64 delta;
new_index = rb_event_index(event);
old_index = new_index + rb_event_ts_length(event);
@@ -2496,10 +2541,32 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
bpage = READ_ONCE(cpu_buffer->tail_page);
+ delta = rb_time_delta(event);
+
+ write_stamp = local64_read(&cpu_buffer->write_stamp);
+
+ /* Make sure the write stamp is read before testing the location */
+ barrier();
+
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
unsigned long event_length = rb_event_length(event);
+ u64 ret;
+
+ ret = local64_cmpxchg(&cpu_buffer->write_stamp, write_stamp, write_stamp - delta);
+ /* Something came in, can't discard */
+ if (ret != write_stamp)
+ return 0;
+
+ /*
+ * If an event were to come in now, it would see that the
+ * write_stamp and the before_stamp are different, and assume
+ * that this event just added itself before updating
+ * the write stamp. The interrupting event will fix the
+ * write stamp for us, and use the before stamp as its delta.
+ */
+
/*
* This is on the tail page. It is possible that
* a write could come in and move the tail page
@@ -2551,10 +2618,6 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- /* Only update the write stamp if the page has an event */
- if (rb_page_write(cpu_buffer->commit_page))
- cpu_buffer->write_stamp =
- cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
@@ -2626,54 +2689,10 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
event->time_delta = 1;
}
-static __always_inline bool
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- unsigned long addr = (unsigned long)event;
- unsigned long index;
-
- index = rb_event_index(event);
- addr &= PAGE_MASK;
-
- return cpu_buffer->commit_page->page == (void *)addr &&
- rb_commit_index(cpu_buffer) == index;
-}
-
-static __always_inline void
-rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- u64 delta;
-
- /*
- * The event first in the commit queue updates the
- * time stamp.
- */
- if (rb_event_is_commit(cpu_buffer, event)) {
- /*
- * A commit event that is first on a page
- * updates the write timestamp with the page stamp
- */
- if (!rb_event_index(event))
- cpu_buffer->write_stamp =
- cpu_buffer->commit_page->page->time_stamp;
- else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
- delta = ring_buffer_event_time_stamp(event);
- cpu_buffer->write_stamp += delta;
- } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
- delta = ring_buffer_event_time_stamp(event);
- cpu_buffer->write_stamp = delta;
- } else
- cpu_buffer->write_stamp += event->time_delta;
- }
-}
-
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
local_inc(&cpu_buffer->entries);
- rb_update_write_stamp(cpu_buffer, event);
rb_end_commit(cpu_buffer);
}
@@ -2872,13 +2891,13 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
(unsigned long long)info->delta,
(unsigned long long)info->ts,
- (unsigned long long)cpu_buffer->write_stamp,
+ (unsigned long long)local64_read(&cpu_buffer->write_stamp),
sched_clock_stable() ? "" :
"If you just came from a suspend/resume,\n"
"please switch to the trace global clock:\n"
" echo global > /sys/kernel/debug/tracing/trace_clock\n"
"or add trace_clock=global to the kernel command line\n");
- info->add_timestamp = 1;
+ info->add_timestamp = RB_ADD_STAMP_NORMAL;
}
static struct ring_buffer_event *
@@ -2887,9 +2906,37 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
{
struct ring_buffer_event *event;
struct buffer_page *tail_page;
- unsigned long tail, write;
+ unsigned long tail, write, w;
+ u64 before, after;
+ bool abs = false;
+
+ /* Don't let the compiler play games with cpu_buffer->tail_page */
+ tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
+
+ /*A*/ w = local_read(&tail_page->write) & RB_WRITE_MASK;
+ barrier();
+ before = local64_read(&cpu_buffer->before_stamp);
+ after = local64_read(&cpu_buffer->write_stamp);
+ barrier();
+ info->ts = rb_time_stamp(cpu_buffer->buffer);
+
+ if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
+ info->delta = info->ts;
+ abs = true;
+ } else {
+ info->delta = info->ts - after;
+ }
+
+ if (unlikely(test_time_stamp(info->delta)))
+ rb_handle_timestamp(cpu_buffer, info);
/*
+ * If interrupting an event time update, we may need an absolute timestamp.
+ * Don't bother if this is the start of a new page (w == 0).
+ */
+ if (unlikely(before != after && w))
+ info->add_timestamp = RB_ADD_STAMP_FORCE;
+ /*
* If the time delta since the last event is too big to
* hold in the time field of the event, then we append a
* TIME EXTEND event ahead of the data event.
@@ -2897,25 +2944,88 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (unlikely(info->add_timestamp))
info->length += RB_LEN_TIME_EXTEND;
- /* Don't let the compiler play games with cpu_buffer->tail_page */
- tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
- write = local_add_return(info->length, &tail_page->write);
+ /*B*/ local64_set(&cpu_buffer->before_stamp, info->ts);
+
+ /*C*/ write = local_add_return(info->length, &tail_page->write);
/* set write to only the index of the write */
write &= RB_WRITE_MASK;
+
tail = write - info->length;
+ /* See if we shot pass the end of this buffer page */
+ if (unlikely(write > BUF_PAGE_SIZE)) {
+ if (tail != w) {
+ /* before and after may now different, fix it up*/
+ before = local64_read(&cpu_buffer->before_stamp);
+ after = local64_read(&cpu_buffer->write_stamp);
+ if (before != after)
+ (void)local64_cmpxchg(&cpu_buffer->before_stamp, before, after);
+ }
+ return rb_move_tail(cpu_buffer, tail, info);
+ }
+
+ if (likely(tail == w)) {
+ u64 save_before;
+
+ /* Nothing interrupted us between A and C */
+ /*D*/ local64_set(&cpu_buffer->write_stamp, info->ts);
+ barrier();
+ /*E*/ save_before = local64_read(&cpu_buffer->before_stamp);
+ if (likely(info->add_timestamp != RB_ADD_STAMP_FORCE))
+ /* This did not interrupt any time update */
+ info->delta = info->ts - after;
+ else
+ /* Just use full timestamp for inerrupting event */
+ info->delta = info->ts;
+ barrier();
+ if (unlikely(info->ts != save_before)) {
+ /* SLOW PATH - Interrupted between C and E */
+
+ after = local64_read(&cpu_buffer->write_stamp);
+ /* Write stamp must only go forward */
+ if (save_before > after) {
+ /*
+ * We do not care about the result, only that
+ * it gets updated atomically.
+ */
+ (void)local64_cmpxchg(&cpu_buffer->write_stamp, after, save_before);
+ }
+ }
+ } else {
+ u64 ts;
+ /* SLOW PATH - Interrupted between A and C */
+ after = local64_read(&cpu_buffer->write_stamp);
+ ts = rb_time_stamp(cpu_buffer->buffer);
+ barrier();
+ /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
+ after < ts) {
+ /* Nothing came after this event between C and E */
+ info->delta = ts - after;
+ (void)local64_cmpxchg(&cpu_buffer->write_stamp, after, info->ts);
+ info->ts = ts;
+ } else {
+ /*
+ * Interrupted beween C and E:
+ * Lost the previous events time stamp. Just set the
+ * delta to zero, and this will be the same time as
+ * the event this event interrupted. And the events that
+ * came after this will still be correct (as they would
+ * have built their delta on the previous event.
+ */
+ info->delta = 0;
+ }
+ if (info->add_timestamp == RB_ADD_STAMP_FORCE)
+ info->add_timestamp = RB_ADD_STAMP_NORMAL;
+ }
+
/*
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
- if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
+ if (unlikely(!tail && info->add_timestamp != RB_ADD_STAMP_FORCE && !abs))
info->delta = 0;
- /* See if we shot pass the end of this buffer page */
- if (unlikely(write > BUF_PAGE_SIZE))
- return rb_move_tail(cpu_buffer, tail, info);
-
/* We reserved something on the buffer */
event = __rb_page_index(tail_page, tail);
@@ -2944,9 +3054,9 @@ rb_reserve_next_event(struct trace_buffer *buffer,
struct ring_buffer_event *event;
struct rb_event_info info;
int nr_loops = 0;
- u64 diff;
rb_start_commit(cpu_buffer);
+ /* The commit page can not change after this */
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
/*
@@ -2965,7 +3075,7 @@ rb_reserve_next_event(struct trace_buffer *buffer,
info.length = rb_calculate_event_length(length);
again:
- info.add_timestamp = 0;
+ info.add_timestamp = RB_ADD_STAMP_NONE;
info.delta = 0;
/*
@@ -2980,22 +3090,6 @@ rb_reserve_next_event(struct trace_buffer *buffer,
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
goto out_fail;
- info.ts = rb_time_stamp(cpu_buffer->buffer);
- diff = info.ts - cpu_buffer->write_stamp;
-
- /* make sure this diff is calculated here */
- barrier();
-
- if (ring_buffer_time_stamp_abs(buffer)) {
- info.delta = info.ts;
- rb_handle_timestamp(cpu_buffer, &info);
- } else /* Did the write stamp get updated already? */
- if (likely(info.ts >= cpu_buffer->write_stamp)) {
- info.delta = diff;
- if (unlikely(test_time_stamp(info.delta)))
- rb_handle_timestamp(cpu_buffer, &info);
- }
-
event = __rb_reserve_next(cpu_buffer, &info);
if (unlikely(PTR_ERR(event) == -EAGAIN)) {
@@ -3004,11 +3098,8 @@ rb_reserve_next_event(struct trace_buffer *buffer,
goto again;
}
- if (!event)
- goto out_fail;
-
- return event;
-
+ if (likely(event))
+ return event;
out_fail:
rb_end_commit(cpu_buffer);
return NULL;
@@ -3154,11 +3245,6 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer,
if (rb_try_to_discard(cpu_buffer, event))
goto out;
- /*
- * The commit is still visible by the reader, so we
- * must still update the timestamp.
- */
- rb_update_write_stamp(cpu_buffer, event);
out:
rb_end_commit(cpu_buffer);
@@ -4475,8 +4561,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->read = 0;
cpu_buffer->read_bytes = 0;
- cpu_buffer->write_stamp = 0;
- cpu_buffer->read_stamp = 0;
+ local64_set(&cpu_buffer->write_stamp, 0);
+ local64_set(&cpu_buffer->before_stamp, 0);
cpu_buffer->lost_events = 0;
cpu_buffer->last_overrun = 0;