From c757bea93bea4b77ebd181cc6dca60c15e3b1a2c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 21 Dec 2009 22:35:16 -0500
Subject: tracing: Fix setting tracer specific options

The function __set_tracer_option() takes as its last parameter a
"neg" value. If set it should negate the value of the option.

The trace_options_write() passed the value written to the file
which is what the new value needs to be set as. But since this
is not the negative, it never sets the value.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee61915935d5..d0a4c12d1f1c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	if (!!(topt->flags->val & topt->opt->bit) != val) {
 		mutex_lock(&trace_types_lock);
 		ret = __set_tracer_option(current_trace, topt->flags,
-					  topt->opt, val);
+					  topt->opt, !val);
 		mutex_unlock(&trace_types_lock);
 		if (ret)
 			return ret;
-- 
cgit v1.2.3


From 4440095c8268c1a5e11577097d2be429cec036ca Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 23 Dec 2009 21:00:20 +0100
Subject: SYSCTL: Print binary sysctl warnings (nearly) only once

When printing legacy sysctls print the warning message
for each of them only once.  This way there is a guarantee
the syslog won't be flooded for any sane program.

The original attempt at this made the tables non const and stored
the flag inline.

Linus suggested using a separate hash table for this, this is based on a
code snippet from him.

The hash implies this is not exact and can sometimes not print a
new sysctl due to a hash collision, but in practice this should not
be a problem

I used a FNV32 hash over the binary string with a 32byte bitmap. This
gives relatively little collisions when all the predefined binary sysctls
are hashed:

size 256
bucket
length      number
0:          [25]
1:          [67]
2:          [88]
3:          [47]
4:          [22]
5:          [6]
6:          [1]

The worst case is a single collision of 6 hash values.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 kernel/sysctl_binary.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 112533d5fc08..8f5d16e0707a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1417,6 +1417,35 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 	return;
 }
 
+#define WARN_ONCE_HASH_BITS 8
+#define WARN_ONCE_HASH_SIZE (1<<WARN_ONCE_HASH_BITS)
+
+static DECLARE_BITMAP(warn_once_bitmap, WARN_ONCE_HASH_SIZE);
+
+#define FNV32_OFFSET 2166136261U
+#define FNV32_PRIME 0x01000193
+
+/*
+ * Print each legacy sysctl (approximately) only once.
+ * To avoid making the tables non-const use a external
+ * hash-table instead.
+ * Worst case hash collision: 6, but very rarely.
+ * NOTE! We don't use the SMP-safe bit tests. We simply
+ * don't care enough.
+ */
+static void warn_on_bintable(const int *name, int nlen)
+{
+	int i;
+	u32 hash = FNV32_OFFSET;
+
+	for (i = 0; i < nlen; i++)
+		hash = (hash ^ name[i]) * FNV32_PRIME;
+	hash %= WARN_ONCE_HASH_SIZE;
+	if (__test_and_set_bit(hash, warn_once_bitmap))
+		return;
+	deprecated_sysctl_warning(name, nlen);
+}
+
 static ssize_t do_sysctl(int __user *args_name, int nlen,
 	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
 {
@@ -1431,7 +1460,7 @@ static ssize_t do_sysctl(int __user *args_name, int nlen,
 		if (get_user(name[i], args_name + i))
 			return -EFAULT;
 
-	deprecated_sysctl_warning(name, nlen);
+	warn_on_bintable(name, nlen);
 
 	return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
 }
-- 
cgit v1.2.3


From c2ef6661ce62e26a8c0978e521fab646128a144b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 21 Dec 2009 13:02:24 +0100
Subject: kprobes: Fix distinct type warning

Every time I see this:

 kernel/kprobes.c: In function 'register_kretprobe':
 kernel/kprobes.c:1038: warning: comparison of distinct pointer types lacks a cast

I'm wondering if something changed in common code and we need to
do something for s390. Apparently that's not the case.
Let's get rid of this annoying warning.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <20091221120224.GA4471@osiris.boeblingen.de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e5342a344c43..b7df302a0204 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1035,7 +1035,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 	/* Pre-allocate memory for max kretprobe instances */
 	if (rp->maxactive <= 0) {
 #ifdef CONFIG_PREEMPT
-		rp->maxactive = max(10, 2 * num_possible_cpus());
+		rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
 #else
 		rp->maxactive = num_possible_cpus();
 #endif
-- 
cgit v1.2.3


From 40892367bc893f3abf6f5ca8ac2ed1c98ba26a77 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Dec 2009 12:01:17 -0800
Subject: tracing: Kconfig spelling fixes and cleanups

Fix filename reference (ftrace-implementation.txt ->
ftrace-design.txt).

Fix spelling, punctuation, grammar.

Fix help text indentation and line lengths to reduce need for
horizontal scrolling or larger window sizes.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20091221120117.3fb49cdc.randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 112 +++++++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..6c22d8a2f289 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,17 +12,17 @@ config NOP_TRACER
 config HAVE_FTRACE_NMI_ENTER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_TRACER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_GRAPH_TRACER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_GRAPH_FP_TEST
 	bool
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_DYNAMIC_FTRACE
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_HW_BRANCH_TRACER
 	bool
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER
 config HAVE_SYSCALL_TRACEPOINTS
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config TRACER_MAX_TRACE
 	bool
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
 # This allows those options to appear when no other tracer is selected. But the
 # options do not appear when something else selects it. We need the two options
 # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
-# hidding of the automatic options.
+# hiding of the automatic options.
 
 config TRACING
 	bool
@@ -119,7 +119,7 @@ menuconfig FTRACE
 	bool "Tracers"
 	default y if DEBUG_KERNEL
 	help
-	 Enable the kernel tracing infrastructure.
+	  Enable the kernel tracing infrastructure.
 
 if FTRACE
 
@@ -133,7 +133,7 @@ config FUNCTION_TRACER
 	help
 	  Enable the kernel to trace every kernel function. This is done
 	  by using a compiler feature to insert a small, 5-byte No-Operation
-	  instruction to the beginning of every kernel function, which NOP
+	  instruction at the beginning of every kernel function, which NOP
 	  sequence is then dynamically patched into a tracer call when
 	  tracing is enabled by the administrator. If it's runtime disabled
 	  (the bootup default), then the overhead of the instructions is very
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER
 	  and its entry.
 	  Its first purpose is to trace the duration of functions and
 	  draw a call graph for each thread with some information like
-	  the return value. This is done by setting the current return 
+	  the return value. This is done by setting the current return
 	  address on the current task structure into a stack of calls.
 
 
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER
 
 	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
-	  (Note that kernel size and overhead increases with this option
+	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the preempt-off timing option can be
 	  used together or separately.)
 
@@ -186,7 +186,7 @@ config PREEMPT_TRACER
 	select TRACER_MAX_TRACE
 	select RING_BUFFER_ALLOW_SWAP
 	help
-	  This option measures the time spent in preemption off critical
+	  This option measures the time spent in preemption-off critical
 	  sections, with microsecond accuracy.
 
 	  The default measurement method is a maximum search, which is
@@ -195,7 +195,7 @@ config PREEMPT_TRACER
 
 	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
-	  (Note that kernel size and overhead increases with this option
+	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the irqs-off timing option can be
 	  used together or separately.)
 
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS
 	depends on !GENERIC_TRACER
 	select TRACING
 	help
-	  This tracer hooks to various trace points in the kernel
+	  This tracer hooks to various trace points in the kernel,
 	  allowing the user to pick and choose which trace point they
 	  want to trace. It also includes the sched_switch tracer plugin.
 
@@ -265,19 +265,19 @@ choice
 	 The likely/unlikely profiler only looks at the conditions that
 	 are annotated with a likely or unlikely macro.
 
-	 The "all branch" profiler will profile every if statement in the
+	 The "all branch" profiler will profile every if-statement in the
 	 kernel. This profiler will also enable the likely/unlikely
-	 profiler as well.
+	 profiler.
 
-	 Either of the above profilers add a bit of overhead to the system.
-	 If unsure choose "No branch profiling".
+	 Either of the above profilers adds a bit of overhead to the system.
+	 If unsure, choose "No branch profiling".
 
 config BRANCH_PROFILE_NONE
 	bool "No branch profiling"
 	help
-	 No branch profiling. Branch profiling adds a bit of overhead.
-	 Only enable it if you want to analyse the branching behavior.
-	 Otherwise keep it disabled.
+	  No branch profiling. Branch profiling adds a bit of overhead.
+	  Only enable it if you want to analyse the branching behavior.
+	  Otherwise keep it disabled.
 
 config PROFILE_ANNOTATED_BRANCHES
 	bool "Trace likely/unlikely profiler"
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES
 
 	  /sys/kernel/debug/tracing/profile_annotated_branch
 
-	  Note: this will add a significant overhead, only turn this
+	  Note: this will add a significant overhead; only turn this
 	  on if you need to profile the system's use of these macros.
 
 config PROFILE_ALL_BRANCHES
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES
 
 	  This configuration, when enabled, will impose a great overhead
 	  on the system. This should only be enabled when the system
-	  is to be analyzed
+	  is to be analyzed in much detail.
 endchoice
 
 config TRACING_BRANCHES
@@ -335,7 +335,7 @@ config POWER_TRACER
 	depends on X86
 	select GENERIC_TRACER
 	help
-	  This tracer helps developers to analyze and optimize the kernels
+	  This tracer helps developers to analyze and optimize the kernel's
 	  power management decisions, specifically the C-state and P-state
 	  behavior.
 
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER
 	select GENERIC_TRACER
 	help
 	  This tracer records all branches on the system in a circular
-	  buffer giving access to the last N branches for each cpu.
+	  buffer, giving access to the last N branches for each cpu.
 
 config KMEMTRACE
 	bool "Trace SLAB allocations"
 	select GENERIC_TRACER
 	help
 	  kmemtrace provides tracing for slab allocator functions, such as
-	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
 	  data is then fed to the userspace application in order to analyse
 	  allocation hotspots, internal fragmentation and so on, making it
 	  possible to see how well an allocator performs, as well as debug
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER
 	bool "Trace workqueues"
 	select GENERIC_TRACER
 	help
-	  The workqueue tracer provides some statistical informations
+	  The workqueue tracer provides some statistical information
           about each cpu workqueue thread such as the number of the
           works inserted and executed since their creation. It can help
-          to evaluate the amount of work each of them have to perform.
+          to evaluate the amount of work each of them has to perform.
           For example it can help a developer to decide whether he should
-          choose a per cpu workqueue instead of a singlethreaded one.
+          choose a per-cpu workqueue instead of a singlethreaded one.
 
 config BLK_DEV_IO_TRACE
-	bool "Support for tracing block io actions"
+	bool "Support for tracing block IO actions"
 	depends on SYSFS
 	depends on BLOCK
 	select RELAY
@@ -456,15 +456,15 @@ config KPROBE_EVENT
 	select TRACING
 	default y
 	help
-	  This allows the user to add tracing events (similar to tracepoints) on the fly
-	  via the ftrace interface. See Documentation/trace/kprobetrace.txt
-	  for more details.
+	  This allows the user to add tracing events (similar to tracepoints)
+	  on the fly via the ftrace interface. See
+	  Documentation/trace/kprobetrace.txt for more details.
 
 	  Those events can be inserted wherever kprobes can probe, and record
 	  various register and memory values.
 
-	  This option is also required by perf-probe subcommand of perf tools. If
-	  you want to use perf tools, this option is strongly recommended.
+	  This option is also required by perf-probe subcommand of perf tools.
+	  If you want to use perf tools, this option is strongly recommended.
 
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE
 	depends on HAVE_DYNAMIC_FTRACE
 	default y
 	help
-         This option will modify all the calls to ftrace dynamically
-	 (will patch them out of the binary image and replaces them
-	 with a No-Op instruction) as they are called. A table is
-	 created to dynamically enable them again.
+          This option will modify all the calls to ftrace dynamically
+	  (will patch them out of the binary image and replace them
+	  with a No-Op instruction) as they are called. A table is
+	  created to dynamically enable them again.
 
-	 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
-	 has native performance as long as no tracing is active.
+	  This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
+	  otherwise has native performance as long as no tracing is active.
 
-	 The changes to the code are done by a kernel thread that
-	 wakes up once a second and checks to see if any ftrace calls
-	 were made. If so, it runs stop_machine (stops all CPUS)
-	 and modifies the code to jump over the call to ftrace.
+	  The changes to the code are done by a kernel thread that
+	  wakes up once a second and checks to see if any ftrace calls
+	  were made. If so, it runs stop_machine (stops all CPUS)
+	  and modifies the code to jump over the call to ftrace.
 
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
 	default n
 	help
-	 This option enables the kernel function profiler. A file is created
-	 in debugfs called function_profile_enabled which defaults to zero.
-	 When a 1 is echoed into this file profiling begins, and when a
-	 zero is entered, profiling stops. A file in the trace_stats
-	 directory called functions, that show the list of functions that
-	 have been hit and their counters.
+	  This option enables the kernel function profiler. A file is created
+	  in debugfs called function_profile_enabled which defaults to zero.
+	  When a 1 is echoed into this file profiling begins, and when a
+	  zero is entered, profiling stops. A "functions" file is created in
+	  the trace_stats directory; this file shows the list of functions that
+	  have been hit and their counters.
 
-	 If in doubt, say N
+	  If in doubt, say N.
 
 config FTRACE_MCOUNT_RECORD
 	def_bool y
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK
 	tristate "Ring buffer benchmark stress tester"
 	depends on RING_BUFFER
 	help
-	  This option creates a test to stress the ring buffer and bench mark it.
-	  It creates its own ring buffer such that it will not interfer with
+	  This option creates a test to stress the ring buffer and benchmark it.
+	  It creates its own ring buffer such that it will not interfere with
 	  any other users of the ring buffer (such as ftrace). It then creates
 	  a producer and consumer that will run for 10 seconds and sleep for
 	  10 seconds. Each interval it will print out the number of events
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK
 	  It does not disable interrupts or raise its priority, so it may be
 	  affected by processes that are running.
 
-	  If unsure, say N
+	  If unsure, say N.
 
 endif # FTRACE
 
-- 
cgit v1.2.3


From 88f7a890d74137ab0d126a5d65679cd620f1a289 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:22:22 +0800
Subject: ksym_tracer: Fix to make the tracer work

ksym tracer doesn't work:

 # echo tasklist_lock:rw- > ksym_trace_filter
 -bash: echo: write error: No such device

It's because we pass to perf_event_create_kernel_counter()
a cpu number which is not present.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF19E.1010201@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/hw_breakpoint.c    | 10 +++++++---
 kernel/trace/trace_ksym.c |  1 -
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 366eedf949c0..48fb0bb6992a 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,6 +40,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/cpu.h>
 #include <linux/smp.h>
 
 #include <linux/hw_breakpoint.h>
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 	if (!cpu_events)
 		return ERR_PTR(-ENOMEM);
 
-	for_each_possible_cpu(cpu) {
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
 
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			goto fail;
 		}
 	}
+	put_online_cpus();
 
 	return cpu_events;
 
 fail:
-	for_each_possible_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		if (IS_ERR(*pevent))
 			break;
 		unregister_hw_breakpoint(*pevent);
 	}
+	put_online_cpus();
+
 	free_percpu(cpu_events);
-	/* return the error if any */
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index faf37fa4408c..340b6ff193e0 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	entry->attr.bp_addr = addr;
 	entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
 
-	ret = -EAGAIN;
 	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
 					ksym_hbp_handler);
 
-- 
cgit v1.2.3


From 3d13ec2efdb5843ad91e57b60d50b44d922cf063 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:23:19 +0800
Subject: ksym_tracer: Fix to allow writing newline to ksym_trace_filter

It used to work, but now doesn't:

 # echo > ksym_filter
 bash: echo: write error: Invalid argument

It's caused by d954fbf0ff6b5fdfb32350e85a2f15d3db976506
("tracing: Fix wrong usage of strstrip in trace_ksyms").

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF1D7.5040400@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_ksym.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 340b6ff193e0..160a8d8b37a2 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -299,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	 * 2: echo 0 > ksym_trace_filter
 	 * 3: echo "*:---" > ksym_trace_filter
 	 */
-	if (!buf[0] || !strcmp(buf, "0") ||
-	    !strcmp(buf, "*:---")) {
+	if (!input_string[0] || !strcmp(input_string, "0") ||
+	    !strcmp(input_string, "*:---")) {
 		__ksym_trace_reset();
 		ret = 0;
 		goto out;
-- 
cgit v1.2.3


From e6d9491bf8ba6728cc86aeabbc688d20ec0563b5 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:23:40 +0800
Subject: ksym_tracer: Fix race when incrementing count

We are under rcu read section but not holding the write lock, so
count++ is not atomic. Use atomic64_t instead.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF1EC.9010608@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_ksym.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 160a8d8b37a2..67d79f709fc5 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -32,6 +32,8 @@
 #include <linux/hw_breakpoint.h>
 #include <asm/hw_breakpoint.h>
 
+#include <asm/atomic.h>
+
 /*
  * For now, let us restrict the no. of symbols traced simultaneously to number
  * of available hardware breakpoint registers.
@@ -44,7 +46,7 @@ struct trace_ksym {
 	struct perf_event	**ksym_hbp;
 	struct perf_event_attr	attr;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-	unsigned long		counter;
+	atomic64_t		counter;
 #endif
 	struct hlist_node	ksym_hlist;
 };
@@ -69,9 +71,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
-		if ((entry->attr.bp_addr == hbp_hit_addr) &&
-		    (entry->counter <= MAX_UL_INT)) {
-			entry->counter++;
+		if (entry->attr.bp_addr == hbp_hit_addr) {
+			atomic64_inc(&entry->counter);
 			break;
 		}
 	}
@@ -501,7 +502,8 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 		seq_printf(m, "  %-36s", fn_name);
 	else
 		seq_printf(m, "  %-36s", "<NA>");
-	seq_printf(m, " %15lu\n", entry->counter);
+	seq_printf(m, " %15llu\n",
+		   (unsigned long long)atomic64_read(&entry->counter));
 
 	return 0;
 }
-- 
cgit v1.2.3


From 53ab668064edaeef99c0ee22799483d45f4c81f6 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:24:03 +0800
Subject: ksym_tracer: Remove trace_stat

trace_stat is problematic. Don't use it, use seqfile instead.

This fixes a race that reading the stat file is not protected by
any lock, which can lead to use after free.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF203.40200@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_ksym.c | 127 ++++++++++++++++++----------------------------
 1 file changed, 50 insertions(+), 77 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 67d79f709fc5..94103cdcf9d8 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,7 +26,6 @@
 #include <linux/fs.h>
 
 #include "trace_output.h"
-#include "trace_stat.h"
 #include "trace.h"
 
 #include <linux/hw_breakpoint.h>
@@ -444,103 +443,77 @@ struct tracer ksym_tracer __read_mostly =
 	.print_line	= ksym_trace_output
 };
 
-__init static int init_ksym_trace(void)
-{
-	struct dentry *d_tracer;
-	struct dentry *entry;
-
-	d_tracer = tracing_init_dentry();
-	ksym_filter_entry_count = 0;
-
-	entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
-				    NULL, &ksym_tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'ksym_trace_filter' file\n");
-
-	return register_tracer(&ksym_tracer);
-}
-device_initcall(init_ksym_trace);
-
-
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-static int ksym_tracer_stat_headers(struct seq_file *m)
+static int ksym_profile_show(struct seq_file *m, void *v)
 {
+	struct hlist_node *node;
+	struct trace_ksym *entry;
+	int access_type = 0;
+	char fn_name[KSYM_NAME_LEN];
+
 	seq_puts(m, "  Access Type ");
 	seq_puts(m, "  Symbol                                       Counter\n");
 	seq_puts(m, "  ----------- ");
 	seq_puts(m, "  ------                                       -------\n");
-	return 0;
-}
 
-static int ksym_tracer_stat_show(struct seq_file *m, void *v)
-{
-	struct hlist_node *stat = v;
-	struct trace_ksym *entry;
-	int access_type = 0;
-	char fn_name[KSYM_NAME_LEN];
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
 
-	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+		access_type = entry->attr.bp_type;
 
-	access_type = entry->attr.bp_type;
+		switch (access_type) {
+		case HW_BREAKPOINT_R:
+			seq_puts(m, "  R           ");
+			break;
+		case HW_BREAKPOINT_W:
+			seq_puts(m, "  W           ");
+			break;
+		case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+			seq_puts(m, "  RW          ");
+			break;
+		default:
+			seq_puts(m, "  NA          ");
+		}
 
-	switch (access_type) {
-	case HW_BREAKPOINT_R:
-		seq_puts(m, "  R           ");
-		break;
-	case HW_BREAKPOINT_W:
-		seq_puts(m, "  W           ");
-		break;
-	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
-		seq_puts(m, "  RW          ");
-		break;
-	default:
-		seq_puts(m, "  NA          ");
+		if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
+			seq_printf(m, "  %-36s", fn_name);
+		else
+			seq_printf(m, "  %-36s", "<NA>");
+		seq_printf(m, " %15llu\n",
+			   (unsigned long long)atomic64_read(&entry->counter));
 	}
-
-	if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
-		seq_printf(m, "  %-36s", fn_name);
-	else
-		seq_printf(m, "  %-36s", "<NA>");
-	seq_printf(m, " %15llu\n",
-		   (unsigned long long)atomic64_read(&entry->counter));
+	rcu_read_unlock();
 
 	return 0;
 }
 
-static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+static int ksym_profile_open(struct inode *node, struct file *file)
 {
-	return ksym_filter_head.first;
-}
-
-static void *
-ksym_tracer_stat_next(void *v, int idx)
-{
-	struct hlist_node *stat = v;
-
-	return stat->next;
+	return single_open(file, ksym_profile_show, NULL);
 }
 
-static struct tracer_stat ksym_tracer_stats = {
-	.name = "ksym_tracer",
-	.stat_start = ksym_tracer_stat_start,
-	.stat_next = ksym_tracer_stat_next,
-	.stat_headers = ksym_tracer_stat_headers,
-	.stat_show = ksym_tracer_stat_show
+static const struct file_operations ksym_profile_fops = {
+	.open		= ksym_profile_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
 
-__init static int ksym_tracer_stat_init(void)
+__init static int init_ksym_trace(void)
 {
-	int ret;
+	struct dentry *d_tracer;
 
-	ret = register_stat_tracer(&ksym_tracer_stats);
-	if (ret) {
-		printk(KERN_WARNING "Warning: could not register "
-				    "ksym tracer stats\n");
-		return 1;
-	}
+	d_tracer = tracing_init_dentry();
 
-	return 0;
+	trace_create_file("ksym_trace_filter", 0644, d_tracer,
+			  NULL, &ksym_tracing_fops);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+	trace_create_file("ksym_profile", 0444, d_tracer,
+			  NULL, &ksym_profile_fops);
+#endif
+
+	return register_tracer(&ksym_tracer);
 }
-fs_initcall(ksym_tracer_stat_init);
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
+device_initcall(init_ksym_trace);
-- 
cgit v1.2.3


From 79b408210885b9f7f0b067b07a09d68f4da3a700 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:19 +0800
Subject: tracing/kprobe: Show sign of fields in trace_kprobe format files

The format files of trace_kprobe do not show the sign of the fields.
The other format files show the field signed type of the fields and
this patch makes the trace_kprobe formats consistent with the others.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D27.5040009@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ecab06547a5..83f1e6ef7063 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1182,10 +1182,11 @@ static int __probe_event_show_format(struct trace_seq *s,
 #undef SHOW_FIELD
 #define SHOW_FIELD(type, item, name)					\
 	do {								\
-		ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"	\
-				"offset:%u;\tsize:%u;\n", name,		\
+		ret = trace_seq_printf(s, "\tfield:" #type " %s;\t"	\
+				"offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
 				(unsigned int)offsetof(typeof(field), item),\
-				(unsigned int)sizeof(type));		\
+				(unsigned int)sizeof(type),		\
+				is_signed_type(type));			\
 		if (!ret)						\
 			return 0;					\
 	} while (0)
-- 
cgit v1.2.3


From fb7ae981cb9fe8665b9da97e8734745e030c151d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:38 +0800
Subject: tracing: Fix sign fields in ftrace_define_fields_##call()

Add is_signed_type() call to trace_define_field() in ftrace macros.

The code previously just passed in 0 (false), disregarding whether
or not the field was actually a signed type.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D3A.6020007@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_export.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bfe26d0..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
-				 sizeof(field.container.item), 0,	\
-				 FILTER_OTHER);				\
+				 sizeof(field.container.item),		\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
-- 
cgit v1.2.3


From 05cbaa2853cdfc255fdd04e65a82bfe9208c4e52 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 30 Dec 2009 16:00:35 +0100
Subject: perf: Fix NULL deref in inheritance code

Liming found a NULL deref when a task has a perf context but no
counters  when it forks.

This can occur in two cases, a race during construction where
the fork hits after installing the context but before the first
counter gets inserted, or more reproducably, a fork after the
last counter is closed (which leaves the context around).

Reported-by: Wang Liming <liming.wang@windriver.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
CC: <stable@kernel.org>
LKML-Reference: <1262185684.7135.222.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 03cc061398d1..58ed1dae5875 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5148,7 +5148,7 @@ int perf_event_init_task(struct task_struct *child)
 					    GFP_KERNEL);
 			if (!child_ctx) {
 				ret = -ENOMEM;
-				goto exit;
+				break;
 			}
 
 			__perf_event_init_context(child_ctx, child);
@@ -5164,7 +5164,7 @@ int perf_event_init_task(struct task_struct *child)
 		}
 	}
 
-	if (inherited_all) {
+	if (child_ctx && inherited_all) {
 		/*
 		 * Mark the child context as a clone of the parent
 		 * context, or of whatever the parent is a clone of.
@@ -5184,7 +5184,6 @@ int perf_event_init_task(struct task_struct *child)
 		get_ctx(child_ctx->parent_ctx);
 	}
 
-exit:
 	mutex_unlock(&parent_ctx->mutex);
 
 	perf_unpin_context(parent_ctx);
-- 
cgit v1.2.3


From 10b465aaf9536ee5a16652fa0700740183d48ec9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 19 Dec 2009 14:43:01 +0000
Subject: modules: Skip empty sections when exporting section notes

Commit 35dead4 "modules: don't export section names of empty sections
via sysfs" changed the set of sections that have attributes, but did
not change the iteration over these attributes in add_notes_attrs().
This can lead to add_notes_attrs() creating attributes with the wrong
names or with null name pointers.

Introduce a sect_empty() function and use it in both add_sect_attrs()
and add_notes_attrs().

Reported-by: Martin Michlmayr <tbm@cyrius.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Tested-by: Martin Michlmayr <tbm@cyrius.com>
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/module.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index e96b8ed1cb6a..f82386bd9ee9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1010,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
  * J. Corbet <corbet@lwn.net>
  */
 #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
+
+static inline bool sect_empty(const Elf_Shdr *sect)
+{
+	return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
+}
+
 struct module_sect_attr
 {
 	struct module_attribute mattr;
@@ -1051,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
 
 	/* Count loaded sections and allocate structures */
 	for (i = 0; i < nsect; i++)
-		if (sechdrs[i].sh_flags & SHF_ALLOC
-		    && sechdrs[i].sh_size)
+		if (!sect_empty(&sechdrs[i]))
 			nloaded++;
 	size[0] = ALIGN(sizeof(*sect_attrs)
 			+ nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1070,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
 	sattr = &sect_attrs->attrs[0];
 	gattr = &sect_attrs->grp.attrs[0];
 	for (i = 0; i < nsect; i++) {
-		if (! (sechdrs[i].sh_flags & SHF_ALLOC))
-			continue;
-		if (!sechdrs[i].sh_size)
+		if (sect_empty(&sechdrs[i]))
 			continue;
 		sattr->address = sechdrs[i].sh_addr;
 		sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
@@ -1156,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
 	/* Count notes sections and allocate structures.  */
 	notes = 0;
 	for (i = 0; i < nsect; i++)
-		if ((sechdrs[i].sh_flags & SHF_ALLOC) &&
+		if (!sect_empty(&sechdrs[i]) &&
 		    (sechdrs[i].sh_type == SHT_NOTE))
 			++notes;
 
@@ -1172,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
 	notes_attrs->notes = notes;
 	nattr = &notes_attrs->attrs[0];
 	for (loaded = i = 0; i < nsect; ++i) {
-		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+		if (sect_empty(&sechdrs[i]))
 			continue;
 		if (sechdrs[i].sh_type == SHT_NOTE) {
 			nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
-- 
cgit v1.2.3


From 5ded3dc6a3c7549b36a8ac27bbd81b33756a2c29 Mon Sep 17 00:00:00 2001
From: David Sharp <dhsharp@google.com>
Date: Wed, 6 Jan 2010 17:12:07 -0800
Subject: ring-buffer: Wrap a list.next reference with rb_list_head()

This reference at the end of rb_get_reader_page() was causing off-by-one
writes to the prev pointer of the page after the reader page when that
page is the head page, and therefore the reader page has the RB_PAGE_HEAD
flag in its list.next pointer. This eventually results in a GPF in a
subsequent call to rb_set_head_page() (usually from rb_get_reader_page())
when that prev pointer is dereferenced. The dereferenced register would
characteristically have an address that appears shifted left by one byte
(eg, ffxxxxxxxxxxxxyy instead of ffffxxxxxxxxxxxx) due to being written at
an address one byte too high.

Signed-off-by: David Sharp <dhsharp@google.com>
LKML-Reference: <1262826727-9090-1-git-send-email-dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..d5b7308b7e1b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2906,7 +2906,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 *
 	 * Now make the new head point back to the reader page.
 	 */
-	reader->list.next->prev = &cpu_buffer->reader_page->list;
+	rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
 	rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
 
 	/* Finally update the reader page to the new head */
-- 
cgit v1.2.3


From 0e1ff5d72a6393f2ef5dbf74f58bb55a12d63834 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 Jan 2010 20:40:44 -0500
Subject: ring-buffer: Add rb_list_head() wrapper around new reader page next
 field

If the very unlikely case happens where the writer moves the head by one
between where the head page is read and where the new reader page
is assigned _and_ the writer then writes and wraps the entire ring buffer
so that the head page is back to what was originally read as the head page,
the page to be swapped will have a corrupted next pointer.

Simple solution is to wrap the assignment of the next pointer with a
rb_list_head().

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d5b7308b7e1b..edefe3b2801b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2869,7 +2869,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 * Splice the empty reader page into the list around the head.
 	 */
 	reader = rb_set_head_page(cpu_buffer);
-	cpu_buffer->reader_page->list.next = reader->list.next;
+	cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
 
 	/*
-- 
cgit v1.2.3


From 8767ba2796a1c894e6d9524584a26a8224f0543d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Fri, 8 Jan 2010 14:42:38 -0800
Subject: kmod: fix resource leak in call_usermodehelper_pipe()

Fix resource (write-pipe file) leak in call_usermodehelper_pipe().

When call_usermodehelper_exec() fails, write-pipe file is opened and
call_usermodehelper_pipe() just returns an error.  Since it is hard for
caller to determine whether the error occured when opening the pipe or
executing the helper, the caller cannot close the pipe by themselves.

I've found this resoruce leak when testing coredump.  You can check how
the resource leaks as below;

$ echo "|nocommand" > /proc/sys/kernel/core_pattern
$ ulimit -c unlimited
$ while [ 1 ]; do ./segv; done &> /dev/null &
$ cat /proc/meminfo (<- repeat it)

where segv.c is;
//-----
int main () {
        char *p = 0;
        *p = 1;
}
//-----

This patch closes write-pipe file if call_usermodehelper_exec() failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kmod.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 25b103190364..bf0e231d9702 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -520,13 +520,15 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 		return -ENOMEM;
 
 	ret = call_usermodehelper_stdinpipe(sub_info, filp);
-	if (ret < 0)
-		goto out;
+	if (ret < 0) {
+		call_usermodehelper_freeinfo(sub_info);
+		return ret;
+	}
 
-	return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+	ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+	if (ret < 0)	/* Failed to execute helper, close pipe */
+		filp_close(*filp, NULL);
 
-  out:
-	call_usermodehelper_freeinfo(sub_info);
 	return ret;
 }
 EXPORT_SYMBOL(call_usermodehelper_pipe);
-- 
cgit v1.2.3


From bd4f490a079730aadfaf9a728303ea0135c01945 Mon Sep 17 00:00:00 2001
From: Dave Anderson <anderson@redhat.com>
Date: Fri, 8 Jan 2010 14:42:50 -0800
Subject: cgroups: fix 2.6.32 regression causing BUG_ON() in cgroup_diput()

The LTP cgroup test suite generates a "kernel BUG at kernel/cgroup.c:790!"
here in cgroup_diput():

                 /*
                  * if we're getting rid of the cgroup, refcount should ensure
                  * that there are no pidlists left.
                  */
                 BUG_ON(!list_empty(&cgrp->pidlists));

The cgroup pidlist rework in 2.6.32 generates the BUG_ON, which is caused
when pidlist_array_load() calls cgroup_pidlist_find():

(1) if a matching cgroup_pidlist is found, it down_write's the mutex of the
     pre-existing cgroup_pidlist, and increments its use_count.
(2) if no matching cgroup_pidlist is found, then a new one is allocated, it
     down_write's its mutex, and the use_count is set to 0.
(3) the matching, or new, cgroup_pidlist gets returned back to pidlist_array_load(),
     which increments its use_count -- regardless whether new or pre-existing --
     and up_write's the mutex.

So if a matching list is ever encountered by cgroup_pidlist_find() during
the life of a cgroup directory, it results in an inflated use_count value,
preventing it from ever getting released by cgroup_release_pid_array().
Then if the directory is subsequently removed, cgroup_diput() hits the
BUG_ON() when it finds that the directory's cgroup is still populated with
a pidlist.

The patch simply removes the use_count increment when a matching pidlist
is found by cgroup_pidlist_find(), because it gets bumped by the calling
pidlist_array_load() function while still protected by the list's mutex.

Signed-off-by: Dave Anderson <anderson@redhat.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: Paul Menage <menage@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0249f4be9b5c..1fbcc748044a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2468,7 +2468,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
 			/* make sure l doesn't vanish out from under us */
 			down_write(&l->mutex);
 			mutex_unlock(&cgrp->pidlist_mutex);
-			l->use_count++;
 			return l;
 		}
 	}
-- 
cgit v1.2.3


From b45c6e76bc2c72f6426c14bed64fdcbc9bf37cb0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 8 Jan 2010 14:42:52 -0800
Subject: kernel/signal.c: fix kernel information leak with
 print-fatal-signals=1

When print-fatal-signals is enabled it's possible to dump any memory
reachable by the kernel to the log by simply jumping to that address from
user space.

Or crash the system if there's some hardware with read side effects.

The fatal signals handler will dump 16 bytes at the execution address,
which is fully controlled by ring 3.

In addition when something jumps to a unmapped address there will be up to
16 additional useless page faults, which might be potentially slow (and at
least is not very efficient)

Fortunately this option is off by default and only there on i386.

But fix it by checking for kernel addresses and also stopping when there's
a page fault.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index d09692b40376..934ae5e687b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -979,7 +979,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
 		for (i = 0; i < 16; i++) {
 			unsigned char insn;
 
-			__get_user(insn, (unsigned char *)(regs->ip + i));
+			if (get_user(insn, (unsigned char *)(regs->ip + i)))
+				break;
 			printk("%02x ", insn);
 		}
 	}
-- 
cgit v1.2.3


From 7485d0d3758e8e6491a5c9468114e74dc050785d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 5 Jan 2010 16:32:43 +0900
Subject: futexes: Remove rw parameter from get_futex_key()

Currently, futexes have two problem:

A) The current futex code doesn't handle private file mappings properly.

get_futex_key() uses PageAnon() to distinguish file and
anon, which can cause the following bad scenario:

  1) thread-A call futex(private-mapping, FUTEX_WAIT), it
     sleeps on file mapping object.
  2) thread-B writes a variable and it makes it cow.
  3) thread-B calls futex(private-mapping, FUTEX_WAKE), it
     wakes up blocked thread on the anonymous page. (but it's nothing)

B) Current futex code doesn't handle zero page properly.

Read mode get_user_pages() can return zero page, but current
futex code doesn't handle it at all. Then, zero page makes
infinite loop internally.

The solution is to use write mode get_user_page() always for
page lookup. It prevents the lookup of both file page of private
mappings and zero page.

Performance concerns:

Probaly very little, because glibc always initialize variables
for futex before to call futex(). It means glibc users never see
the overhead of this patch.

Compatibility concerns:

This patch has few compatibility issues. After this patch,
FUTEX_WAIT require writable access to futex variables (read-only
mappings makes EFAULT). But practically it's not a problem,
glibc always initalizes variables for futexes explicitly - nobody
uses read-only mappings.

Reported-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Darren Hart <dvhltc@us.ibm.com>
Cc: <stable@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Ulrich Drepper <drepper@gmail.com>
LKML-Reference: <20100105162633.45A2.A69D9226@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/futex.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 8e3c3ffe1b9a..d9b3a2228f9d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -203,8 +203,6 @@ static void drop_futex_key_refs(union futex_key *key)
  * @uaddr:	virtual address of the futex
  * @fshared:	0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
  * @key:	address where result is stored.
- * @rw:		mapping needs to be read/write (values: VERIFY_READ,
- * 		VERIFY_WRITE)
  *
  * Returns a negative error code or 0
  * The key words are stored in *key on success.
@@ -216,7 +214,7 @@ static void drop_futex_key_refs(union futex_key *key)
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
 static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
@@ -239,7 +237,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	 *        but access_ok() should be faster than find_vma()
 	 */
 	if (!fshared) {
-		if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
+		if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
 			return -EFAULT;
 		key->private.mm = mm;
 		key->private.address = address;
@@ -248,7 +246,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	}
 
 again:
-	err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
+	err = get_user_pages_fast(address, 1, 1, &page);
 	if (err < 0)
 		return err;
 
@@ -867,7 +865,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 	if (!bitset)
 		return -EINVAL;
 
-	ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
+	ret = get_futex_key(uaddr, fshared, &key);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -913,10 +911,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
 	int ret, op_ret;
 
 retry:
-	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
+	ret = get_futex_key(uaddr1, fshared, &key1);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
+	ret = get_futex_key(uaddr2, fshared, &key2);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -1175,11 +1173,10 @@ retry:
 		pi_state = NULL;
 	}
 
-	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
+	ret = get_futex_key(uaddr1, fshared, &key1);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, fshared, &key2,
-			    requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+	ret = get_futex_key(uaddr2, fshared, &key2);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -1738,7 +1735,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
 	 */
 retry:
 	q->key = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
+	ret = get_futex_key(uaddr, fshared, &q->key);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -1904,7 +1901,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 	q.requeue_pi_key = NULL;
 retry:
 	q.key = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
+	ret = get_futex_key(uaddr, fshared, &q.key);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -2023,7 +2020,7 @@ retry:
 	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
 		return -EPERM;
 
-	ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
+	ret = get_futex_key(uaddr, fshared, &key);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -2215,7 +2212,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 	rt_waiter.task = NULL;
 
 	key2 = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
+	ret = get_futex_key(uaddr2, fshared, &key2);
 	if (unlikely(ret != 0))
 		goto out;
 
-- 
cgit v1.2.3


From 751e9983ee276cb150e8812b1d995f6035a63878 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:02 +0800
Subject: ftrace: Fix MATCH_END_ONLY function filter

For '*foo' pattern, we should allow any string ending with
'foo', but ftrace filter incorrectly disallows strings
like bar_foo_foo:

  # echo '*io' > set_ftrace_filter
  # cat set_ftrace_filter | grep 'req_bio_endio'
  # cat available_filter_functions | grep 'req_bio_endio'
  req_bio_endio

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E870E.6060607@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7968762c8167..1e6640f80454 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
 static int ftrace_match(char *str, char *regex, int len, int type)
 {
 	int matched = 0;
-	char *ptr;
+	int slen;
 
 	switch (type) {
 	case MATCH_FULL:
@@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 			matched = 1;
 		break;
 	case MATCH_END_ONLY:
-		ptr = strstr(str, regex);
-		if (ptr && (ptr[len] == 0))
+		slen = strlen(str);
+		if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
 			matched = 1;
 		break;
 	}
-- 
cgit v1.2.3


From 285caad415f459f336247932b4db95a571357a02 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:21 +0800
Subject: tracing/filters: Fix MATCH_FRONT_ONLY filter matching

MATCH_FRONT_ONLY actually is a full matching:

  # ./perf record -R -f -a -e lock:lock_acquire \
	--filter 'name ~rcu_*' sleep 1
  # ./perf trace
  (no output)

We should pass the length of the pattern string to strncmp().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8721.5090301@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..11c3973e6552 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -261,7 +261,7 @@ static int regex_match_full(char *str, struct regex *r, int len)
 
 static int regex_match_front(char *str, struct regex *r, int len)
 {
-	if (strncmp(str, r->pattern, len) == 0)
+	if (strncmp(str, r->pattern, r->len) == 0)
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From a3291c14ecf0a995e30d993b7f2cae031de98727 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:41 +0800
Subject: tracing/filters: Fix MATCH_END_ONLY filter matching

For '*foo' pattern, we should allow any string ending with
'foo', but event filtering incorrectly disallows strings
like bar_foo_foo:

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8735.6070604@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 11c3973e6552..49e44dd17851 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -275,9 +275,10 @@ static int regex_match_middle(char *str, struct regex *r, int len)
 
 static int regex_match_end(char *str, struct regex *r, int len)
 {
-	char *ptr = strstr(str, r->pattern);
+	int strlen = len - 1;
 
-	if (ptr && (ptr[r->len] == 0))
+	if (strlen >= r->len &&
+	    memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From b2af211f284eb1bef19fbb85fc8ef551bb1e7460 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:54:11 +0800
Subject: tracing/filters: Fix MATCH_MIDDLE_ONLY filter matching

The @str might not be NULL-terminated if it's of type
DYN_STRING or STATIC_STRING, so we should use strnstr()
instead of strstr().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8753.2000102@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 49e44dd17851..f364b085397e 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -268,7 +268,7 @@ static int regex_match_front(char *str, struct regex *r, int len)
 
 static int regex_match_middle(char *str, struct regex *r, int len)
 {
-	if (strstr(str, r->pattern))
+	if (strnstr(str, r->pattern, len))
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From 16da27a8bc7a0d050686d1b2e9efb53fab9ed226 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:54:27 +0800
Subject: tracing/filters: Fix MATCH_FULL filter matching for PTR_STRING

MATCH_FULL matching for PTR_STRING is not working correctly:

  # echo 'func == vt' > events/bkl/lock_kernel/filter
  # echo 1 > events/bkl/lock_kernel/enable
  ...
  # cat trace
   Xorg-1484  [000]  1973.392586: lock_kernel: ... func=vt_ioctl()
    gpm-1402  [001]  1974.027740: lock_kernel: ... func=vt_ioctl()

We should pass to regex.match(..., len) the length (including '\0')
of the source string instead of the length of the pattern string.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8763.5070707@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f364b085397e..60c2a4efad4a 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
 {
 	char **addr = (char **)(event + pred->offset);
 	int cmp, match;
+	int len = strlen(*addr) + 1;	/* including tailing '\0' */
 
-	cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+	cmp = pred->regex.match(*addr, &pred->regex, len);
 
 	match = cmp ^ pred->not;
 
@@ -782,10 +783,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
 			pred->regex.field_len = field->size;
 		} else if (field->filter_type == FILTER_DYN_STRING)
 			fn = filter_pred_strloc;
-		else {
+		else
 			fn = filter_pred_pchar;
-			pred->regex.field_len = strlen(pred->regex.pattern);
-		}
 	} else {
 		if (field->is_signed)
 			ret = strict_strtoll(pred->regex.pattern, 0, &val);
-- 
cgit v1.2.3


From d1303dd1d6b220cab375f24fa91a5640e54e169e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:54:40 +0800
Subject: tracing/filters: Add comment for match callbacks

We should be clear on 2 things:

- the length parameter of a match callback includes
  tailing '\0'.

- the string to be searched might not be NULL-terminated.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8770.7000608@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 60c2a4efad4a..e42af9aad69f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -252,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
 	return 0;
 }
 
-/* Basic regex callbacks */
+/*
+ * regex_match_foo - Basic regex callbacks
+ *
+ * @str: the string to be searched
+ * @r:   the regex structure containing the pattern string
+ * @len: the length of the string to be searched (including '\0')
+ *
+ * Note:
+ * - @str might not be NULL-terminated if it's of type DYN_STRING
+ *   or STATIC_STRING
+ */
+
 static int regex_match_full(char *str, struct regex *r, int len)
 {
 	if (strncmp(str, r->pattern, len) == 0)
-- 
cgit v1.2.3


From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:12 -0800
Subject: kfifo: use void * pointers for user buffers

The pointers to user buffers are currently unsigned char *, which requires
a lot of casting in the caller for any non-char typed buffers.  Use void *
instead.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kfifo.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..ab615e695052 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
 #include <linux/log2.h>
 #include <linux/uaccess.h>
 
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
 		unsigned int size)
 {
 	fifo->buffer = buffer;
@@ -44,7 +44,7 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @size: the size of the internal buffer, this have to be a power of 2.
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
 				unsigned int len)
 {
 	len = min(kfifo_avail(fifo), len);
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 {
 	len = min(kfifo_len(fifo), len);
 
-- 
cgit v1.2.3


From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:15 -0800
Subject: kfifo: sanitize *_user error handling

Right now for kfifo_*_user it's not easily possible to distingush between
a user copy failing and the FIFO not containing enough data.  The problem
is that both conditions are multiplexed into the same return code.

Avoid this by moving the "copy length" into a separate output parameter
and only return 0/-EFAULT in the main return value.

I didn't fully adapt the weird "record" variants, those seem
to be unused anyways and were rather messy (should they be just removed?)

I would appreciate some double checking if I did all the conversions
correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kfifo.c | 76 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 27 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index ab615e695052..b50bb622e8b0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
 	memcpy(to + l, fifo->buffer, len - l);
 }
 
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
-	 const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off,
+	 unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	if (unlikely(ret)) {
+		*lenout = ret;
+		return -EFAULT;
+	}
+	*lenout = l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	return copy_from_user(fifo->buffer, from + l, len - l);
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	*lenout += ret ? ret : len - l;
+	return ret ? -EFAULT : 0;
 }
 
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
-		void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
 	ret = copy_to_user(to, fifo->buffer + off, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	*lenout = l;
+	if (unlikely(ret)) {
+		*lenout -= ret;
+		return -EFAULT;
+	}
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	return copy_to_user(to + l, fifo->buffer, len - l);
+	len -= l;
+	ret = copy_to_user(to + l, fifo->buffer, len);
+	if (unlikely(ret)) {
+		*lenout += len - ret;
+		return -EFAULT;
+	}
+	*lenout += len;
+	return 0;
 }
 
 unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -299,10 +313,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
 unsigned int __kfifo_from_user_n(struct kfifo *fifo,
 	const void __user *from, unsigned int len, unsigned int recsize)
 {
+	unsigned total;
+
 	if (kfifo_avail(fifo) < len + recsize)
 		return len + 1;
 
-	return __kfifo_from_user_data(fifo, from, len, recsize);
+	__kfifo_from_user_data(fifo, from, len, recsize, &total);
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_from_user_n);
 
@@ -313,18 +330,21 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
  * @len: the length of the data to be added.
  *
  * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+        const void __user *from, unsigned int len, unsigned *total)
 {
+	int ret;
 	len = min(kfifo_avail(fifo), len);
-	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+	if (ret)
+		return ret;
 	__kfifo_add_in(fifo, len);
-	return len;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_from_user);
 
@@ -339,17 +359,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
 	void __user *to, unsigned int len, unsigned int reclen,
 	unsigned int recsize)
 {
-	unsigned int ret;
+	unsigned int ret, total;
 
 	if (kfifo_len(fifo) < reclen + recsize)
 		return len;
 
-	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
 
 	if (likely(ret == 0))
 		__kfifo_add_out(fifo, reclen + recsize);
 
-	return ret;
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_to_user_n);
 
@@ -358,20 +378,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
+ @ @lenout: pointer to output variable with copied data
  *
  * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned *lenout)
 {
+	int ret;
 	len = min(kfifo_len(fifo), len);
-	len -= __kfifo_to_user_data(fifo, to, len, 0);
-	__kfifo_add_out(fifo, len);
-	return len;
+	ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+	__kfifo_add_out(fifo, *lenout);
+	return ret;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
-- 
cgit v1.2.3


From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:16 -0800
Subject: kfifo: add kfifo_out_peek

In some upcoming code it's useful to peek into a FIFO without permanentely
removing data.  This patch implements a new kfifo_out_peek() to do this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kfifo.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'kernel')

diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index b50bb622e8b0..7384f120be87 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -302,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_out);
 
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+			    unsigned offset)
+{
+	len = min(kfifo_len(fifo), len + offset);
+
+	__kfifo_out_data(fifo, to, len, offset);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
 unsigned int __kfifo_out_generic(struct kfifo *fifo,
 	void *to, unsigned int len, unsigned int recsize,
 	unsigned int *total)
-- 
cgit v1.2.3


From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: document everywhere that size has to be power of two

On my first try using them I missed that the fifos need to be power of
two, resulting in a runtime bug.  Document that requirement everywhere
(and fix one grammar bug)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kfifo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 7384f120be87..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -41,7 +41,7 @@ static void _kfifo_init(struct kfifo *fifo, void *buffer,
  * kfifo_init - initialize a FIFO using a preallocated buffer
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
  *
  */
 void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
-- 
cgit v1.2.3


From af2422c42c0ff42b8b93dbb3a5fe65250fb65c40 Mon Sep 17 00:00:00 2001
From: David John <davidjon@xenontk.org>
Date: Fri, 15 Jan 2010 17:01:23 -0800
Subject: smp_call_function_any(): pass the node value to cpumask_of_node()

The change in acpi_cpufreq to use smp_call_function_any causes a warning
when it is called since the function erroneously passes the cpu id to
cpumask_of_node rather than the node that the cpu is on.  Fix this.

cpumask_of_node(3): node > nr_node_ids(1)
Pid: 1, comm: swapper Not tainted 2.6.33-rc3-00097-g2c1f189 #223
Call Trace:
 [<ffffffff81028bb3>] cpumask_of_node+0x23/0x58
 [<ffffffff81061f51>] smp_call_function_any+0x65/0xfa
 [<ffffffff810160d1>] ? do_drv_read+0x0/0x2f
 [<ffffffff81015fba>] get_cur_val+0xb0/0x102
 [<ffffffff81016080>] get_cur_freq_on_cpu+0x74/0xc5
 [<ffffffff810168a7>] acpi_cpufreq_cpu_init+0x417/0x515
 [<ffffffff81562ce9>] ? __down_write+0xb/0xd
 [<ffffffff8148055e>] cpufreq_add_dev+0x278/0x922

Signed-off-by: David John <davidjon@xenontk.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/smp.c b/kernel/smp.c
index de735a6637d0..f10408422444 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -347,7 +347,7 @@ int smp_call_function_any(const struct cpumask *mask,
 		goto call;
 
 	/* Try for same node. */
-	nodemask = cpumask_of_node(cpu);
+	nodemask = cpumask_of_node(cpu_to_node(cpu));
 	for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
 	     cpu = cpumask_next_and(cpu, nodemask, mask)) {
 		if (cpu_online(cpu))
-- 
cgit v1.2.3


From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 4 Jan 2010 14:44:56 +0100
Subject: sched: Fix vmark regression on big machines

SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
enabled, leading to many cache misses on large machines as we traverse
looking for an idle shared cache to wake to.  Change the enabler of
select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
sibling domain level.

Reported-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 			 * If there's an idle sibling in this domain, make that
 			 * the wake_affine target instead of the current cpu.
 			 */
-			if (tmp->flags & SD_PREFER_SIBLING)
+			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 				target = select_idle_sibling(p, tmp, target);
 
 			if (target >= 0) {
-- 
cgit v1.2.3


From 6d558c3ac9b6508d26fd5cadccce51fc9d726b1c Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Mon, 11 Jan 2010 14:21:25 +0800
Subject: sched: Reassign prev and switch_count when reacquire_kernel_lock()
 fail

Assume A->B schedule is processing, if B have acquired BKL before and it
need reschedule this time. Then on B's context, it will go to
need_resched_nonpreemptible for reschedule. But at this time, prev and
switch_count are related to A. It's wrong and will lead to incorrect
scheduler statistics.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <2674af741001102238w7b0ddcadref00d345e2181d11@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index c535cc4f6428..4508fe7048be 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5530,8 +5530,11 @@ need_resched_nonpreemptible:
 
 	post_schedule(rq);
 
-	if (unlikely(reacquire_kernel_lock(current) < 0))
+	if (unlikely(reacquire_kernel_lock(current) < 0)) {
+		prev = rq->curr;
+		switch_count = &prev->nivcsw;
 		goto need_resched_nonpreemptible;
+	}
 
 	preempt_enable_no_resched();
 	if (need_resched())
-- 
cgit v1.2.3


From fe432200abb0d64f409895168d9ad8fbb9d8e6c6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 09:08:26 +0100
Subject: perf: Fix perf_event_do_pending() fallback callsite

Paul questioned the context in which we should call
perf_event_do_pending(). After looking at that I found that it should be
called from IRQ context these days, however the fallback call-site is
placed in softirq context. Ammend this by placing the callback in the IRQ
timer path.

Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1263374859.4244.192.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/timer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index 15533b792397..c61a7949387f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1198,6 +1198,7 @@ void update_process_times(int user_tick)
 	run_local_timers();
 	rcu_check_callbacks(cpu, user_tick);
 	printk_tick();
+	perf_event_do_pending();
 	scheduler_tick();
 	run_posix_cpu_timers(p);
 }
@@ -1209,8 +1210,6 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
-	perf_event_do_pending();
-
 	hrtimer_run_pending();
 
 	if (time_after_eq(jiffies, base->timer_jiffies))
-- 
cgit v1.2.3


From 22e190851f8709c48baf00ed9ce6144cdc54d025 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 09:12:32 +0100
Subject: perf: Honour event state for aux stream data

Anton reported that perf record kept receiving events even after calling
ioctl(PERF_EVENT_IOC_DISABLE). It turns out that FORK,COMM and MMAP
events didn't respect the disabled state and kept flowing in.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Anton Blanchard <anton@samba.org>
LKML-Reference: <1263459187.4244.265.camel@laptop>
CC: stable@kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'kernel')

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 603c0d8b5df1..d27746bd3a06 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3268,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event,
 
 static int perf_event_task_match(struct perf_event *event)
 {
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
 	if (event->cpu != -1 && event->cpu != smp_processor_id())
 		return 0;
 
@@ -3377,6 +3380,9 @@ static void perf_event_comm_output(struct perf_event *event,
 
 static int perf_event_comm_match(struct perf_event *event)
 {
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
 	if (event->cpu != -1 && event->cpu != smp_processor_id())
 		return 0;
 
@@ -3494,6 +3500,9 @@ static void perf_event_mmap_output(struct perf_event *event,
 static int perf_event_mmap_match(struct perf_event *event,
 				   struct perf_mmap_event *mmap_event)
 {
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
 	if (event->cpu != -1 && event->cpu != smp_processor_id())
 		return 0;
 
-- 
cgit v1.2.3