From 5f2b0ba4d94b3ac23cbc4b7f675d98eb677a760a Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Nov 2010 11:22:23 -0500 Subject: x86, nmi_watchdog: Remove the old nmi_watchdog Now that we have a new nmi_watchdog that is more generic and sits on top of the perf subsystem, we really do not need the old nmi_watchdog any more. In addition, the old nmi_watchdog doesn't really work if you are using the default clocksource, hpet. The old nmi_watchdog code relied on local apic interrupts to determine if the cpu is still alive. With hpet as the clocksource, these interrupts don't increment any more and the old nmi_watchdog triggers false postives. This piece removes the old nmi_watchdog code and stubs out any variables and functions calls. The stubs are the same ones used by the new nmi_watchdog code, so it should be well tested. Signed-off-by: Don Zickus Cc: fweisbec@gmail.com Cc: gorcunov@openvz.org LKML-Reference: <1289578944-28564-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 06aab5eee134..0cb3e5c246d0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -16,10 +16,7 @@ */ #ifdef ARCH_HAS_NMI_WATCHDOG #include -extern void touch_nmi_watchdog(void); -extern void acpi_nmi_disable(void); -extern void acpi_nmi_enable(void); -#else +#endif #ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { @@ -30,7 +27,6 @@ extern void touch_nmi_watchdog(void); #endif static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } -#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3 From 072b198a4ad48bd722ec6d203d65422a4698eae7 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Nov 2010 11:22:24 -0500 Subject: x86, nmi_watchdog: Remove all stub function calls from old nmi_watchdog Now that the bulk of the old nmi_watchdog is gone, remove all the stub variables and hooks associated with it. This touches lots of files mainly because of how the io_apic nmi_watchdog was implemented. Now that the io_apic nmi_watchdog is forever gone, remove all its fingers. Most of this code was not being exercised by virtue of nmi_watchdog != NMI_IO_APIC, so there shouldn't be anything to risky here. Signed-off-by: Don Zickus Cc: fweisbec@gmail.com Cc: gorcunov@openvz.org LKML-Reference: <1289578944-28564-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0cb3e5c246d0..1c451e6ecc17 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -25,8 +25,6 @@ static inline void touch_nmi_watchdog(void) #else extern void touch_nmi_watchdog(void); #endif -static inline void acpi_nmi_disable(void) { } -static inline void acpi_nmi_enable(void) { } /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3 From 9c0729dc8062bed96189bd14ac6d4920f3958743 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Fri, 5 Nov 2010 05:59:39 -0400 Subject: x86: Eliminate bp argument from the stack tracing routines The various stack tracing routines take a 'bp' argument in which the caller is supposed to provide the base pointer to use, or 0 if doesn't have one. Since bp is garbage whenever CONFIG_FRAME_POINTER is not defined, this means all callers in principle should either always pass 0, or be conditional on CONFIG_FRAME_POINTER. However, there are only really three use cases for stack tracing: (a) Trace the current task, including IRQ stack if any (b) Trace the current task, but skip IRQ stack (c) Trace some other task In all cases, if CONFIG_FRAME_POINTER is not defined, bp should just be 0. If it _is_ defined, then - in case (a) bp should be gotten directly from the CPU's register, so the caller should pass NULL for regs, - in case (b) the caller should should pass the IRQ registers to dump_trace(), - in case (c) bp should be gotten from the top of the task's stack, so the caller should pass NULL for regs. Hence, the bp argument is not necessary because the combination of task and regs is sufficient to determine an appropriate value for bp. This patch introduces a new inline function stack_frame(task, regs) that computes the desired bp. This function is then called from the two versions of dump_stack(). Signed-off-by: Soren Sandmann Acked-by: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Arjan van de Ven , Cc: Frederic Weisbecker , Cc: Arnaldo Carvalho de Melo , LKML-Reference: > Signed-off-by: Frederic Weisbecker --- include/linux/stacktrace.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 51efbef38fb0..25310f1d7f37 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -2,6 +2,7 @@ #define __LINUX_STACKTRACE_H struct task_struct; +struct pt_regs; #ifdef CONFIG_STACKTRACE struct task_struct; @@ -13,7 +14,8 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); +extern void save_stack_trace_regs(struct stack_trace *trace, + struct pt_regs *regs); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); -- cgit v1.2.3 From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:39:17 +0100 Subject: tracing: New flag to allow non privileged users to use a trace event This adds a new trace event internal flag that allows them to be used in perf by non privileged users in case of task bound tracing. This is desired for syscalls tracepoint because they don't leak global system informations, like some other tracepoints. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8beabb958f61..312dce7e0d52 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -154,12 +154,14 @@ enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, + TRACE_EVENT_FL_CAP_ANY_BIT, }; enum { TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), + TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), }; struct ftrace_event_call { -- cgit v1.2.3 From 1ed0c5971159974185653170543a764cc061c857 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:46:57 +0100 Subject: tracing: New macro to set up initial event flags value This introduces the new TRACE_EVENT_FLAGS() macro in order to set up initial event flags value. This macro must simply follow the definition of a trace event and take the event name and the flag value as parameters: TRACE_EVENT(my_event, ..... .... ); TRACE_EVENT_FLAGS(my_event, 1) This will set up 1 as the initial my_event->flags value. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/tracepoint.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a4a90b6726ce..5a6074fcd81d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -234,6 +234,8 @@ do_trace: \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -354,4 +356,6 @@ do_trace: \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* ifdef TRACE_EVENT (see note above) */ -- cgit v1.2.3 From 53cf810b1934f08a68e131aeeb16267a778f43df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:11:42 +0100 Subject: tracing: Allow syscall trace events for non privileged users As for the raw syscalls events, individual syscall events won't leak system wide information on task bound tracing. Allow non privileged users to use them in such workflow. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 8 ++++++++ include/linux/syscalls.h | 6 ++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 312dce7e0d52..725bf6bd39f7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -198,6 +198,14 @@ struct ftrace_event_call { #endif }; +#define __TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_PRED 32 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cacc27a0e285..13b9731d30cf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -137,7 +137,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ @@ -152,7 +153,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ -- cgit v1.2.3 From 423478cde453eebdfcfebf4b8d378d8f5d49b853 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:21:26 +0100 Subject: tracing: Remove useless syscall ftrace_event_call declaration It is defined right after, which makes the declaration completely useless. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/syscalls.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 13b9731d30cf..18cd0684fc4e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -143,8 +141,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ -- cgit v1.2.3 From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 12 Nov 2010 22:32:11 -0500 Subject: tracing/events: Show real number in array fields Currently we have in something like the sched_switch event: field:char prev_comm[TASK_COMM_LEN]; offset:12; size:16; signed:1; When a userspace tool such as perf tries to parse this, the TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro simply uses a #len to show the string of the length. When the length is an enum, we get a string that means nothing for tools. By adding a static buffer and a mutex to protect it, we can store the string into that buffer with snprintf and show the actual number. Now we get: field:char prev_comm[16]; offset:12; size:16; signed:1; Something much more useful. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 725bf6bd39f7..47e3997f7b5c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -225,6 +225,10 @@ enum { FILTER_PTR_STRING, }; +#define EVENT_STORAGE_SIZE 128 +extern struct mutex event_storage_mutex; +extern char event_storage[EVENT_STORAGE_SIZE]; + extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, -- cgit v1.2.3 From 6c7e550f13f8ad82efb6a5653ae628c2543c1768 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Tue, 23 Nov 2010 16:21:43 +0100 Subject: perf: Introduce is_sampling_event() and use it when appropriate. Signed-off-by: Franck Bui-Huu Signed-off-by: Peter Zijlstra LKML-Reference: <1290525705-6265-1-git-send-email-fbuihuu@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index de2c41758e29..cbf04cc1e630 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -969,6 +969,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool is_sampling_event(struct perf_event *event) +{ + return event->attr.sample_period != 0; +} + /* * Return 1 for a software event, 0 for a hardware event */ -- cgit v1.2.3 From 004417a6d468e24399e383645c068b498eed84ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Nov 2010 18:38:29 +0100 Subject: perf, arch: Cleanup perf-pmu init vs lockup-detector The perf hardware pmu got initialized at various points in the boot, some before early_initcall() some after (notably arch_initcall). The problem is that the NMI lockup detector is ran from early_initcall() and expects the hardware pmu to be present. Sanitize this by moving all architecture hardware pmu implementations to initialize at early_initcall() and move the lockup detector to an explicit initcall right after that. Cc: paulus Cc: davem Cc: Michael Cree Cc: Deng-Cheng Zhu Acked-by: Paul Mundt Acked-by: Will Deacon Signed-off-by: Peter Zijlstra LKML-Reference: <1290707759.2145.119.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c79e921a68b..d2e63d1e725c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; +void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) { @@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } +static inline void lockup_detector_init(void) +{ +} #endif #ifdef CONFIG_DETECT_HUNG_TASK -- cgit v1.2.3 From c320c7b7d380e630f595de1236d9d085b035d5b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Oct 2010 12:50:11 -0200 Subject: perf events: Precalculate the header space for PERF_SAMPLE_ fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others can be precalculated, reducing a bit the per sample cost. Acked-by: Peter Zijlstra Cc: Frédéric Weisbecker Cc: Ian Munsie Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cbf04cc1e630..adf6d9931643 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -758,6 +758,8 @@ struct perf_event { u64 shadow_ctx_time; struct perf_event_attr attr; + u16 header_size; + u16 read_size; struct hw_perf_event hw; struct perf_event_context *ctx; -- cgit v1.2.3 From 287050d390264402e11bea8b811859e42e8faa29 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Dec 2010 16:46:18 -0500 Subject: tracing: Add TRACE_EVENT_CONDITIONAL() There are instances in the kernel that we only want to trace a tracepoint when a certain condition is set. But we do not want to test for that condition in the core kernel. If we test for that condition before calling the tracepoin, then we will be performing that test even when tracing is not enabled. This is 99.99% of the time. We currently can just filter out on that condition, but that happens after we write to the trace buffer. We just wasted time writing to the ring buffer for an event we never cared about. This patch adds: TRACE_EVENT_CONDITION() and DEFINE_EVENT_CONDITION() These have a new TP_CONDITION() argument that comes right after the TP_ARGS(). This condition can use the parameters of TP_ARGS() in the TRACE_EVENT() to determine if the tracepoint should be traced or not. The TP_CONDITION() will be placed in a if (cond) trace; For example, for the tracepoint sched_wakeup, it is useless to trace a wakeup event where the caller never actually wakes anything up (where success == 0). So adding: TP_CONDITION(success), which uses the "success" parameter of the wakeup tracepoint will have it only trace when we have successfully woken up a task. Acked-by: Mathieu Desnoyers Acked-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Thomas Gleixner Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 5a6074fcd81d..d3e4f87e95c0 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, #define TP_PROTO(args...) args #define TP_ARGS(args...) args +#define TP_CONDITION(args...) args #ifdef CONFIG_TRACEPOINTS @@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ -#define __DO_TRACE(tp, proto, args) \ +#define __DO_TRACE(tp, proto, args, cond) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ void *__data; \ \ + if (!(cond)) \ + return; \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ @@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ - TP_ARGS(data_args)); \ + TP_ARGS(data_args), \ + TP_CONDITION(cond)); \ } \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ @@ -186,7 +190,7 @@ do_trace: \ EXPORT_SYMBOL(__tracepoint_##name) #else /* !CONFIG_TRACEPOINTS */ -#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ static inline void trace_##name(proto) \ { } \ static inline int \ @@ -227,13 +231,18 @@ do_trace: \ * "void *__data, proto" as the callback prototype. */ #define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , void *__data, __data) + __DECLARE_TRACE(name, void, , 1, void *__data, __data) #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) +#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) + #define TRACE_EVENT_FLAGS(event, flag) #endif /* DECLARE_TRACE */ @@ -349,12 +358,20 @@ do_trace: \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_CONDITION(template, name, proto, \ + args, cond) \ + DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_CONDITION(name, proto, args, cond, \ + struct, assign, print) \ + DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_FLAGS(event, flag) -- cgit v1.2.3 From 6844c09d849aeb00e8ddfe9525e8567a531c22d0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Dec 2010 16:36:35 -0200 Subject: perf events: Separate the routines handling the PERF_SAMPLE_ identity fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those will be made available in sample like events like MMAP, EXEC, etc in a followup patch. So precalculate the extra id header space and have a separate routine to fill them up. V2: Thomas noticed that the id header needs to be precalculated at inherit_events too: LKML-Reference: Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Ian Munsie Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Frédéric Weisbecker Cc: Ian Munsie Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Stephane Eranian Cc: Thomas Gleixner LKML-Reference: <1291318772-30880-2-git-send-email-acme@infradead.org> Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index adf6d9931643..b9950b1620d8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -759,6 +759,7 @@ struct perf_event { struct perf_event_attr attr; u16 header_size; + u16 id_header_size; u16 read_size; struct hw_perf_event hw; -- cgit v1.2.3 From c980d1091810df13f21aabbce545fd98f545bbf7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 4 Dec 2010 23:02:20 -0200 Subject: perf events: Make sample_type identity fields available in all PERF_RECORD_ events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If perf_event_attr.sample_id_all is set it will add the PERF_SAMPLE_ identity info: TID, TIME, ID, CPU, STREAM_ID As a trailer, so that older perf tools can process new files, just ignoring the extra payload. With this its possible to do further analysis on problems in the event stream, like detecting reordering of MMAP and FORK events, etc. V2: Fixup header size in comm, mmap and task processing, as we have to take into account different sample_types for each matching event, noticed by Thomas Gleixner. Thomas also noticed a problem in v2 where if we didn't had space in the buffer we wouldn't restore the header size. Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Ian Munsie Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Frédéric Weisbecker Cc: Ian Munsie Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Stephane Eranian Cc: Thomas Gleixner LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b9950b1620d8..2814ead4adb8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -215,8 +215,9 @@ struct perf_event_attr { */ precise_ip : 2, /* skid constraint */ mmap_data : 1, /* non-exec mmap data */ + sample_id_all : 1, /* sample_type all events */ - __reserved_1 : 46; + __reserved_1 : 45; union { __u32 wakeup_events; /* wakeup every n events */ @@ -327,6 +328,15 @@ struct perf_event_header { enum perf_event_type { /* + * If perf_event_attr.sample_id_all is set then all event types will + * have the sample_type selected fields related to where/when + * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID) + * described in PERF_RECORD_SAMPLE below, it will be stashed just after + * the perf_event_header and the fields already present for the existing + * fields, i.e. at the end of the payload. That way a newer perf.data + * file will be supported by older perf tools, with these new optional + * fields being ignored. + * * The MMAP events record the PROT_EXEC mappings so that we can * correlate userspace IPs to code. They have the following structure: * -- cgit v1.2.3 From cd7ebe2298ff1c3112232878678ce5fe6be8a15b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 3 Dec 2010 18:54:28 +0900 Subject: kprobes: Use text_poke_smp_batch for optimizing Use text_poke_smp_batch() in optimization path for reducing the number of stop_machine() issues. If the number of optimizing probes is more than MAX_OPTIMIZE_PROBES(=256), kprobes optimizes first MAX_OPTIMIZE_PROBES probes and kicks optimizer for remaining probes. Changes in v5: - Use kick_kprobe_optimizer() instead of directly calling schedule_delayed_work(). - Rescheduling optimizer outside of kprobe mutex lock. Changes in v2: - Allocate code buffer and parameters in arch_init_kprobes() instead of using static arraies. - Merge previous max optimization limit patch into this patch. So, this patch introduces upper limit of optimization at once. Signed-off-by: Masami Hiramatsu Cc: Rusty Russell Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jason Baron Cc: Mathieu Desnoyers Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <20101203095428.2961.8994.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e7d1b2e0070d..fe157ba6aa0e 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -275,7 +275,7 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn); extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); -extern int arch_optimize_kprobe(struct optimized_kprobe *op); +extern void arch_optimize_kprobes(struct list_head *oplist); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern kprobe_opcode_t *get_optinsn_slot(void); extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); -- cgit v1.2.3 From f984ba4eb575e4a27ed28a76d4126d2aa9233c32 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 3 Dec 2010 18:54:34 +0900 Subject: kprobes: Use text_poke_smp_batch for unoptimizing Use text_poke_smp_batch() on unoptimization path for reducing the number of stop_machine() issues. If the number of unoptimizing probes is more than MAX_OPTIMIZE_PROBES(=256), kprobes unoptimizes first MAX_OPTIMIZE_PROBES probes and kicks optimizer for remaining probes. Signed-off-by: Masami Hiramatsu Cc: Rusty Russell Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jason Baron Cc: Mathieu Desnoyers Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <20101203095434.2961.22657.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index fe157ba6aa0e..b78edb58ee66 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -276,6 +276,8 @@ extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); extern void arch_optimize_kprobes(struct list_head *oplist); +extern void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern kprobe_opcode_t *get_optinsn_slot(void); extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); -- cgit v1.2.3 From 96a84c20d635fb1e98ab92f9fc517c4441f5c424 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 29 Nov 2010 17:07:16 -0500 Subject: lockup detector: Compile fixes from removing the old x86 nmi watchdog My patch that removed the old x86 nmi watchdog broke other arches. This change reverts a piece of that patch and puts the change in the correct spot. Signed-off-by: Don Zickus Reviewed-by: Cyrill Gorcunov Cc: fweisbec@gmail.com Cc: yinghai@kernel.org LKML-Reference: <1291068437-5331-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 1c451e6ecc17..17ccf44e7dcb 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -16,7 +16,8 @@ */ #ifdef ARCH_HAS_NMI_WATCHDOG #include -#endif +extern void touch_nmi_watchdog(void); +#else #ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { @@ -25,6 +26,7 @@ static inline void touch_nmi_watchdog(void) #else extern void touch_nmi_watchdog(void); #endif +#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3 From 2e80a82a49c4c7eca4e35734380f28298ba5db19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Nov 2010 23:17:36 +0100 Subject: perf: Dynamic pmu types Extend the perf_pmu_register() interface to allow for named and dynamic pmu types. Because we need to support the existing static types we cannot use dynamic types for everything, hence provide a type argument. If we want to enumerate the PMUs they need a name, provide one. Signed-off-by: Peter Zijlstra LKML-Reference: <20101117222056.259707703@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 30e50e2c7f30..21206d27466b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -588,6 +588,9 @@ struct perf_event; struct pmu { struct list_head entry; + char *name; + int type; + int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; int task_ctx_nr; @@ -916,7 +919,7 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -extern int perf_pmu_register(struct pmu *pmu); +extern int perf_pmu_register(struct pmu *pmu, char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); -- cgit v1.2.3 From abe43400579d5de0078c2d3a760e6598e183f871 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Nov 2010 23:17:37 +0100 Subject: perf: Sysfs enumeration Simple sysfs emumeration of the PMUs. Use a "event_source" bus, and add PMU devices using their name. Each PMU device has a type attribute which contrains the value needed for perf_event_attr::type to identify this PMU. This is the minimal stub needed to start using this interface, we'll consider extending the sysfs usage later. Cc: Kay Sievers Cc: Greg KH Signed-off-by: Peter Zijlstra LKML-Reference: <20101117222056.316982569@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 21206d27466b..dda5b0a3ff60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -588,6 +588,7 @@ struct perf_event; struct pmu { struct list_head entry; + struct device *dev; char *name; int type; -- cgit v1.2.3 From 4a7863cc2eb5f9804f1c4e9156619a801cd7f14f Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 22 Dec 2010 14:00:03 -0500 Subject: x86, nmi_watchdog: Remove ARCH_HAS_NMI_WATCHDOG and rely on CONFIG_HARDLOCKUP_DETECTOR The x86 arch has shifted its use of the nmi_watchdog from a local implementation to the global one provide by kernel/watchdog.c. This shift has caused a whole bunch of compile problems under different config options. I attempt to simplify things with the patch below. In order to simplify things, I had to come to terms with the meaning of two terms ARCH_HAS_NMI_WATCHDOG and CONFIG_HARDLOCKUP_DETECTOR. Basically they mean the same thing, the former on a local level and the latter on a global level. With the old x86 nmi watchdog gone, there is no need to rely on defining the ARCH_HAS_NMI_WATCHDOG variable because it doesn't make sense any more. x86 will now use the global implementation. The changes below do a few things. First it changes the few places that relied on ARCH_HAS_NMI_WATCHDOG to use CONFIG_X86_LOCAL_APIC (the former was an alias for the latter anyway, so nothing unusual here). Those pieces of code were relying more on local apic functionality the nmi watchdog functionality, so the change should make sense. Second, I removed the x86 implementation of touch_nmi_watchdog(). It isn't need now, instead x86 will rely on kernel/watchdog.c's implementation. Third, I removed the #define ARCH_HAS_NMI_WATCHDOG itself from x86. And tweaked the include/linux/nmi.h file to tell users to look for an externally defined touch_nmi_watchdog in the case of ARCH_HAS_NMI_WATCHDOG _or_ CONFIG_HARDLOCKUP_DETECTOR. This changes removes some of the ugliness in that file. Finally, I added a Kconfig dependency for CONFIG_HARDLOCKUP_DETECTOR that said you can't have ARCH_HAS_NMI_WATCHDOG _and_ CONFIG_HARDLOCKUP_DETECTOR. You can only have one nmi_watchdog. Tested with ARCH=i386: allnoconfig, defconfig, allyesconfig, (various broken configs) ARCH=x86_64: allnoconfig, defconfig, allyesconfig, (various broken configs) Hopefully, after this patch I won't get any more compile broken emails. :-) v3: changed a couple of 'linux/nmi.h' -> 'asm/nmi.h' to pick-up correct function prototypes when CONFIG_HARDLOCKUP_DETECTOR is not set. Signed-off-by: Don Zickus Cc: Peter Zijlstra Cc: fweisbec@gmail.com LKML-Reference: <1293044403-14117-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 17ccf44e7dcb..c536f8545f74 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,18 +14,14 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#ifdef ARCH_HAS_NMI_WATCHDOG +#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) #include extern void touch_nmi_watchdog(void); #else -#ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); } -#else -extern void touch_nmi_watchdog(void); -#endif #endif /* -- cgit v1.2.3