diff options
author | Ravi Bangoria <ravi.bangoria@linux.ibm.com> | 2018-08-20 10:12:47 +0530 |
---|---|---|
committer | Steven Rostedt (VMware) <rostedt@goodmis.org> | 2018-09-24 04:44:53 -0400 |
commit | 1cc33161a83d20b5462b1e93f95d3ce6388079ee (patch) | |
tree | bb9b7abedc85e2a78eb5268cd1dae921e3e6bb3d /kernel/trace | |
parent | d6b183eda466415bb5defcf9afe4cb64734839e8 (diff) |
uprobes: Support SDT markers having reference count (semaphore)
Userspace Statically Defined Tracepoints[1] are dtrace style markers
inside userspace applications. Applications like PostgreSQL, MySQL,
Pthread, Perl, Python, Java, Ruby, Node.js, libvirt, QEMU, glib etc
have these markers embedded in them. These markers are added by developer
at important places in the code. Each marker source expands to a single
nop instruction in the compiled code but there may be additional
overhead for computing the marker arguments which expands to couple of
instructions. In case the overhead is more, execution of it can be
omitted by runtime if() condition when no one is tracing on the marker:
if (reference_counter > 0) {
Execute marker instructions;
}
Default value of reference counter is 0. Tracer has to increment the
reference counter before tracing on a marker and decrement it when
done with the tracing.
Implement the reference counter logic in core uprobe. User will be
able to use it from trace_uprobe as well as from kernel module. New
trace_uprobe definition with reference counter will now be:
<path>:<offset>[(ref_ctr_offset)]
where ref_ctr_offset is an optional field. For kernel module, new
variant of uprobe_register() has been introduced:
uprobe_register_refctr(inode, offset, ref_ctr_offset, consumer)
No new variant for uprobe_unregister() because it's assumed to have
only one reference counter for one uprobe.
[1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
Note: 'reference counter' is called as 'semaphore' in original Dtrace
(or Systemtap, bcc and even in ELF) documentation and code. But the
term 'semaphore' is misleading in this context. This is just a counter
used to hold number of tracers tracing on a marker. This is not really
used for any synchronization. So we are calling it a 'reference counter'
in kernel / perf code.
Link: http://lkml.kernel.org/r/20180820044250.11659-2-ravi.bangoria@linux.ibm.com
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
[Only trace_uprobe.c]
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Song Liu <songliubraving@fb.com>
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 38 |
2 files changed, 36 insertions, 4 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bf6f1d70484d..147be8523560 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4621,7 +4621,7 @@ static const char readme_msg[] = "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n" #endif #ifdef CONFIG_UPROBE_EVENTS - "\t place: <path>:<offset>\n" + " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n" #endif "\t args: <name>=fetcharg[:type]\n" "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n" diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e696667da29a..7b85172beab6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -47,6 +47,7 @@ struct trace_uprobe { struct inode *inode; char *filename; unsigned long offset; + unsigned long ref_ctr_offset; unsigned long nhit; struct trace_probe tp; }; @@ -352,10 +353,10 @@ end: static int create_trace_uprobe(int argc, char **argv) { struct trace_uprobe *tu; - char *arg, *event, *group, *filename; + char *arg, *event, *group, *filename, *rctr, *rctr_end; char buf[MAX_EVENT_NAME_LEN]; struct path path; - unsigned long offset; + unsigned long offset, ref_ctr_offset; bool is_delete, is_return; int i, ret; @@ -364,6 +365,7 @@ static int create_trace_uprobe(int argc, char **argv) is_return = false; event = NULL; group = NULL; + ref_ctr_offset = 0; /* argc must be >= 1 */ if (argv[0][0] == '-') @@ -438,6 +440,26 @@ static int create_trace_uprobe(int argc, char **argv) goto fail_address_parse; } + /* Parse reference counter offset if specified. */ + rctr = strchr(arg, '('); + if (rctr) { + rctr_end = strchr(rctr, ')'); + if (rctr > rctr_end || *(rctr_end + 1) != 0) { + ret = -EINVAL; + pr_info("Invalid reference counter offset.\n"); + goto fail_address_parse; + } + + *rctr++ = '\0'; + *rctr_end = '\0'; + ret = kstrtoul(rctr, 0, &ref_ctr_offset); + if (ret) { + pr_info("Invalid reference counter offset.\n"); + goto fail_address_parse; + } + } + + /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); if (ret) goto fail_address_parse; @@ -472,6 +494,7 @@ static int create_trace_uprobe(int argc, char **argv) goto fail_address_parse; } tu->offset = offset; + tu->ref_ctr_offset = ref_ctr_offset; tu->path = path; tu->filename = kstrdup(filename, GFP_KERNEL); @@ -590,6 +613,9 @@ static int probes_seq_show(struct seq_file *m, void *v) trace_event_name(&tu->tp.call), tu->filename, (int)(sizeof(void *) * 2), tu->offset); + if (tu->ref_ctr_offset) + seq_printf(m, "(0x%lx)", tu->ref_ctr_offset); + for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); @@ -905,7 +931,13 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, tu->consumer.filter = filter; tu->inode = d_real_inode(tu->path.dentry); - ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + if (tu->ref_ctr_offset) { + ret = uprobe_register_refctr(tu->inode, tu->offset, + tu->ref_ctr_offset, &tu->consumer); + } else { + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + } + if (ret) goto err_buffer; |