From 9fb48c744ba6a4bf58b666f4e6fdac3008ea1bd4 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:34 -0600 Subject: params: add 3rd arg to option handler callback signature Add a 3rd arg, named "doing", to unknown-options callbacks invoked from parse_args(). The arg is passed as: "Booting kernel" from start_kernel(), initcall_level_names[i] from do_initcall_level(), mod->name from load_module(), via parse_args(), parse_one() parse_args() already has the "name" parameter, which is renamed to "doing" to better reflect current uses 1,2 above. parse_args() passes it to an altered parse_one(), which now passes it down into the unknown option handler callbacks. The mod->name will be needed to handle dyndbg for loadable modules, since params passed by modprobe are not qualified (they do not have a "$modname." prefix), and by the time the unknown-param callback is called, the module name is not otherwise available. Minor tweaks: Add param-name to parse_one's pr_debug(), current message doesnt identify the param being handled, add it. Add a pr_info to print current level and level_name of the initcall, and number of registered initcalls at that level. This adds 7 lines to dmesg output, like: initlevel:6=device, 172 registered initcalls Drop "parameters" from initcall_level_names[], its unhelpful in the pr_info() added above. This array is passed into parse_args() by do_initcall_level(). CC: Rusty Russell Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- kernel/params.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index f37d82631347..b60e2c74b961 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -85,11 +85,13 @@ bool parameq(const char *a, const char *b) static int parse_one(char *param, char *val, + const char *doing, const struct kernel_param *params, unsigned num_params, s16 min_level, s16 max_level, - int (*handle_unknown)(char *param, char *val)) + int (*handle_unknown)(char *param, char *val, + const char *doing)) { unsigned int i; int err; @@ -104,8 +106,8 @@ static int parse_one(char *param, if (!val && params[i].ops->set != param_set_bool && params[i].ops->set != param_set_bint) return -EINVAL; - pr_debug("They are equal! Calling %p\n", - params[i].ops->set); + pr_debug("handling %s with %p\n", param, + params[i].ops->set); mutex_lock(¶m_lock); err = params[i].ops->set(val, ¶ms[i]); mutex_unlock(¶m_lock); @@ -114,11 +116,11 @@ static int parse_one(char *param, } if (handle_unknown) { - pr_debug("Unknown argument: calling %p\n", handle_unknown); - return handle_unknown(param, val); + pr_debug("doing %s: %s='%s'\n", doing, param, val); + return handle_unknown(param, val, doing); } - pr_debug("Unknown argument `%s'\n", param); + pr_debug("Unknown argument '%s'\n", param); return -ENOENT; } @@ -175,28 +177,29 @@ static char *next_arg(char *args, char **param, char **val) } /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ -int parse_args(const char *name, +int parse_args(const char *doing, char *args, const struct kernel_param *params, unsigned num, s16 min_level, s16 max_level, - int (*unknown)(char *param, char *val)) + int (*unknown)(char *param, char *val, const char *doing)) { char *param, *val; - pr_debug("Parsing ARGS: %s\n", args); - /* Chew leading spaces */ args = skip_spaces(args); + if (args && *args) + pr_debug("doing %s, parsing ARGS: '%s'\n", doing, args); + while (*args) { int ret; int irq_was_disabled; args = next_arg(args, ¶m, &val); irq_was_disabled = irqs_disabled(); - ret = parse_one(param, val, params, num, + ret = parse_one(param, val, doing, params, num, min_level, max_level, unknown); if (irq_was_disabled && !irqs_disabled()) { printk(KERN_WARNING "parse_args(): option '%s' enabled " @@ -205,19 +208,19 @@ int parse_args(const char *name, switch (ret) { case -ENOENT: printk(KERN_ERR "%s: Unknown parameter `%s'\n", - name, param); + doing, param); return ret; case -ENOSPC: printk(KERN_ERR "%s: `%s' too large for parameter `%s'\n", - name, val ?: "", param); + doing, val ?: "", param); return ret; case 0: break; default: printk(KERN_ERR "%s: `%s' invalid for parameter `%s'\n", - name, val ?: "", param); + doing, val ?: "", param); return ret; } } -- cgit v1.2.3 From b48420c1d3019ce8d84fb8e58f4ca86b8e3655b8 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:35 -0600 Subject: dynamic_debug: make dynamic-debug work for module initialization This introduces a fake module param $module.dyndbg. Its based upon Thomas Renninger's $module.ddebug boot-time debugging patch from https://lkml.org/lkml/2010/9/15/397 The 'fake' module parameter is provided for all modules, whether or not they need it. It is not explicitly added to each module, but is implemented in callbacks invoked from parse_args. For builtin modules, dynamic_debug_init() now directly calls parse_args(..., &ddebug_dyndbg_boot_params_cb), to process the params undeclared in the modules, just after the ddebug tables are processed. While its slightly weird to reprocess the boot params, parse_args() is already called repeatedly by do_initcall_levels(). More importantly, the dyndbg queries (given in ddebug_query or dyndbg params) cannot be activated until after the ddebug tables are ready, and reusing parse_args is cleaner than doing an ad-hoc parse. This reparse would break options like inc_verbosity, but they probably should be params, like verbosity=3. ddebug_dyndbg_boot_params_cb() handles both bare dyndbg (aka: ddebug_query) and module-prefixed dyndbg params, and ignores all other parameters. For example, the following will enable pr_debug()s in 4 builtin modules, in the order given: dyndbg="module params +p; module aio +p" module.dyndbg=+p pci.dyndbg For loadable modules, parse_args() in load_module() calls ddebug_dyndbg_module_params_cb(). This handles bare dyndbg params as passed from modprobe, and errors on other unknown params. Note that modprobe reads /proc/cmdline, so "modprobe foo" grabs all foo.params, strips the "foo.", and passes these to the kernel. ddebug_dyndbg_module_params_cb() is again called for the unknown params; it handles dyndbg, and errors on others. The "doing" arg added previously contains the module name. For non CONFIG_DYNAMIC_DEBUG builds, the stub function accepts and ignores $module.dyndbg params, other unknowns get -ENOENT. If no param value is given (as in pci.dyndbg example above), "+p" is assumed, which enables all pr_debug callsites in the module. The dyndbg fake parameter is not shown in /sys/module/*/parameters, thus it does not use any resources. Changes to it are made via the control file. Also change pr_info in ddebug_exec_queries to vpr_info, no need to see it all the time. Signed-off-by: Jim Cromie CC: Thomas Renninger CC: Rusty Russell Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 78ac6ec1e425..a4e60973ca73 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2953,7 +2953,7 @@ static struct module *load_module(void __user *umod, /* Module is ready to execute: parsing args may do that. */ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, NULL); + -32768, 32767, &ddebug_dyndbg_module_param_cb); if (err < 0) goto unlink; -- cgit v1.2.3 From 1ef9eaf2bf8901e92bb931875a5621692c8a0b84 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Thu, 3 May 2012 11:57:37 -0600 Subject: params.c: fix Smack complaint about parse_args In commit 9fb48c744: "params: add 3rd arg to option handler callback signature", the if-guard added to the pr_debug was overzealous; no callers pass NULL, and existing code above and below the guard assumes as much. Change the if-guard to match, and silence the Smack complaint. CC: Dan Carpenter Signed-off-by: Jim Cromie Signed-off-by: Greg Kroah-Hartman --- kernel/params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index b60e2c74b961..be78c904b564 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -190,7 +190,7 @@ int parse_args(const char *doing, /* Chew leading spaces */ args = skip_spaces(args); - if (args && *args) + if (*args) pr_debug("doing %s, parsing ARGS: '%s'\n", doing, args); while (*args) { -- cgit v1.2.3 From b5f3abf950f16fa615dc621e38eec63b2cc67946 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Thu, 3 May 2012 18:22:44 -0600 Subject: params: replace printk(KERN_...) with pr_(...) I left 1 printk which uses __FILE__, __LINE__ explicitly, which should not be subject to generic preferences expressed via pr_fmt(). + tweaks suggested by Joe Perches: - add doing to irq-enabled warning, like others. It wont happen often.. - change sysfs failure crit, not just err, make it 1 line in logs. - coalese 2 format fragments into 1 >80 char line cc: Joe Perches Signed-off-by: Jim Cromie Signed-off-by: Greg Kroah-Hartman --- kernel/params.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index be78c904b564..ed35345be536 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -201,25 +201,22 @@ int parse_args(const char *doing, irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, min_level, max_level, unknown); - if (irq_was_disabled && !irqs_disabled()) { - printk(KERN_WARNING "parse_args(): option '%s' enabled " - "irq's!\n", param); - } + if (irq_was_disabled && !irqs_disabled()) + pr_warn("%s: option '%s' enabled irq's!\n", + doing, param); + switch (ret) { case -ENOENT: - printk(KERN_ERR "%s: Unknown parameter `%s'\n", - doing, param); + pr_err("%s: Unknown parameter `%s'\n", doing, param); return ret; case -ENOSPC: - printk(KERN_ERR - "%s: `%s' too large for parameter `%s'\n", + pr_err("%s: `%s' too large for parameter `%s'\n", doing, val ?: "", param); return ret; case 0: break; default: - printk(KERN_ERR - "%s: `%s' invalid for parameter `%s'\n", + pr_err("%s: `%s' invalid for parameter `%s'\n", doing, val ?: "", param); return ret; } @@ -266,8 +263,7 @@ STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); int param_set_charp(const char *val, const struct kernel_param *kp) { if (strlen(val) > 1024) { - printk(KERN_ERR "%s: string parameter too long\n", - kp->name); + pr_err("%s: string parameter too long\n", kp->name); return -ENOSPC; } @@ -403,8 +399,7 @@ static int param_array(const char *name, int len; if (*num == max) { - printk(KERN_ERR "%s: can only take %i arguments\n", - name, max); + pr_err("%s: can only take %i arguments\n", name, max); return -EINVAL; } len = strcspn(val, ","); @@ -423,8 +418,7 @@ static int param_array(const char *name, } while (save == ','); if (*num < min) { - printk(KERN_ERR "%s: needs at least %i arguments\n", - name, min); + pr_err("%s: needs at least %i arguments\n", name, min); return -EINVAL; } return 0; @@ -483,7 +477,7 @@ int param_set_copystring(const char *val, const struct kernel_param *kp) const struct kparam_string *kps = kp->str; if (strlen(val)+1 > kps->maxlen) { - printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", + pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); return -ENOSPC; } @@ -753,11 +747,8 @@ static struct module_kobject * __init locate_module_kobject(const char *name) #endif if (err) { kobject_put(&mk->kobj); - printk(KERN_ERR - "Module '%s' failed add to sysfs, error number %d\n", + pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", name, err); - printk(KERN_ERR - "The system will be unstable now.\n"); return NULL; } -- cgit v1.2.3 From 7ff9554bb578ba02166071d2d487b7fc7d860d62 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 3 May 2012 02:29:13 +0200 Subject: printk: convert byte-buffer to variable-length record buffer - Record-based stream instead of the traditional byte stream buffer. All records carry a 64 bit timestamp, the syslog facility and priority in the record header. - Records consume almost the same amount, sometimes less memory than the traditional byte stream buffer (if printk_time is enabled). The record header is 16 bytes long, plus some padding bytes at the end if needed. The byte-stream buffer needed 3 chars for the syslog prefix, 15 char for the timestamp and a newline. - Buffer management is based on message sequence numbers. When records need to be discarded, the reading heads move on to the next full record. Unlike the byte-stream buffer, no old logged lines get truncated or partly overwritten by new ones. Sequence numbers also allow consumers of the log stream to get notified if any message in the stream they are about to read gets discarded during the time of reading. - Better buffered IO support for KERN_CONT continuation lines, when printk() is called multiple times for a single line. The use of KERN_CONT is now mandatory to use continuation; a few places in the kernel need trivial fixes here. The buffering could possibly be extended to per-cpu variables to allow better thread-safety for multiple printk() invocations for a single line. - Full-featured syslog facility value support. Different facilities can tag their messages. All userspace-injected messages enforce a facility value > 0 now, to be able to reliably distinguish them from the kernel-generated messages. Independent subsystems like a baseband processor running its own firmware, or a kernel-related userspace process can use their own unique facility values. Multiple independent log streams can co-exist that way in the same buffer. All share the same global sequence number counter to ensure proper ordering (and interleaving) and to allow the consumers of the log to reliably correlate the events from different facilities. Tested-by: William Douglas Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 1014 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 590 insertions(+), 424 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index b663c2c95d39..74357329550f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -54,8 +54,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) { } -#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) - /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL @@ -98,24 +96,6 @@ EXPORT_SYMBOL_GPL(console_drivers); */ static int console_locked, console_suspended; -/* - * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars - * It is also used in interesting ways to provide interlocking in - * console_unlock();. - */ -static DEFINE_RAW_SPINLOCK(logbuf_lock); - -#define LOG_BUF_MASK (log_buf_len-1) -#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) - -/* - * The indices into log_buf are not constrained to log_buf_len - they - * must be masked before subscripting - */ -static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ -static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ -static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ - /* * If exclusive_console is non-NULL then only this console is to be printed to. */ @@ -146,12 +126,176 @@ EXPORT_SYMBOL(console_set_on_cmdline); static int console_may_schedule; #ifdef CONFIG_PRINTK +/* + * The printk log buffer consists of a chain of concatenated variable + * length records. Every record starts with a record header, containing + * the overall length of the record. + * + * The heads to the first and last entry in the buffer, as well as the + * sequence numbers of these both entries are maintained when messages + * are stored.. + * + * If the heads indicate available messages, the length in the header + * tells the start next message. A length == 0 for the next message + * indicates a wrap-around to the beginning of the buffer. + * + * Every record carries the monotonic timestamp in microseconds, as well as + * the standard userspace syslog level and syslog facility. The usual + * kernel messages use LOG_KERN; userspace-injected messages always carry + * a matching syslog facility, by default LOG_USER. The origin of every + * message can be reliably determined that way. + * + * The human readable log message directly follows the message header. The + * length of the message text is stored in the header, the stored message + * is not terminated. + * + */ + +struct log { + u64 ts_nsec; /* timestamp in nanoseconds */ + u16 len; /* length of entire record */ + u16 text_len; /* length of text buffer */ + u16 dict_len; /* length of dictionary buffer */ + u16 level; /* syslog level + facility */ +}; + +/* + * The logbuf_lock protects kmsg buffer, indices, counters. It is also + * used in interesting ways to provide interlocking in console_unlock(); + */ +static DEFINE_RAW_SPINLOCK(logbuf_lock); +/* cpu currently holding logbuf_lock */ +static volatile unsigned int logbuf_cpu = UINT_MAX; + +#define LOG_LINE_MAX 1024 + +/* record buffer */ +#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) static char __log_buf[__LOG_BUF_LEN]; static char *log_buf = __log_buf; -static int log_buf_len = __LOG_BUF_LEN; -static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ -static int saved_console_loglevel = -1; +static u32 log_buf_len = __LOG_BUF_LEN; + +/* index and sequence number of the first record stored in the buffer */ +static u64 log_first_seq; +static u32 log_first_idx; + +/* index and sequence number of the next record to store in the buffer */ +static u64 log_next_seq; +static u32 log_next_idx; + +/* the next printk record to read after the last 'clear' command */ +static u64 clear_seq; +static u32 clear_idx; + +/* the next printk record to read by syslog(READ) or /proc/kmsg */ +static u64 syslog_seq; +static u32 syslog_idx; + +/* human readable text of the record */ +static char *log_text(const struct log *msg) +{ + return (char *)msg + sizeof(struct log); +} + +/* optional key/value pair dictionary attached to the record */ +static char *log_dict(const struct log *msg) +{ + return (char *)msg + sizeof(struct log) + msg->text_len; +} + +/* get record by index; idx must point to valid msg */ +static struct log *log_from_idx(u32 idx) +{ + struct log *msg = (struct log *)(log_buf + idx); + + /* + * A length == 0 record is the end of buffer marker. Wrap around and + * read the message at the start of the buffer. + */ + if (!msg->len) + return (struct log *)log_buf; + return msg; +} + +/* get next record; idx must point to valid msg */ +static u32 log_next(u32 idx) +{ + struct log *msg = (struct log *)(log_buf + idx); + + /* length == 0 indicates the end of the buffer; wrap */ + /* + * A length == 0 record is the end of buffer marker. Wrap around and + * read the message at the start of the buffer as *this* one, and + * return the one after that. + */ + if (!msg->len) { + msg = (struct log *)log_buf; + return msg->len; + } + return idx + msg->len; +} + +#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define LOG_ALIGN 4 +#else +#define LOG_ALIGN 8 +#endif + +/* insert record into the buffer, discard old ones, update heads */ +static void log_store(int facility, int level, + const char *dict, u16 dict_len, + const char *text, u16 text_len) +{ + struct log *msg; + u32 size, pad_len; + + /* number of '\0' padding bytes to next message */ + size = sizeof(struct log) + text_len + dict_len; + pad_len = (-size) & (LOG_ALIGN - 1); + size += pad_len; + + while (log_first_seq < log_next_seq) { + u32 free; + + if (log_next_idx > log_first_idx) + free = max(log_buf_len - log_next_idx, log_first_idx); + else + free = log_first_idx - log_next_idx; + + if (free > size + sizeof(struct log)) + break; + + /* drop old messages until we have enough contiuous space */ + log_first_idx = log_next(log_first_idx); + log_first_seq++; + } + + if (log_next_idx + size + sizeof(struct log) >= log_buf_len) { + /* + * This message + an additional empty header does not fit + * at the end of the buffer. Add an empty header with len == 0 + * to signify a wrap around. + */ + memset(log_buf + log_next_idx, 0, sizeof(struct log)); + log_next_idx = 0; + } + + /* fill message */ + msg = (struct log *)(log_buf + log_next_idx); + memcpy(log_text(msg), text, text_len); + msg->text_len = text_len; + memcpy(log_dict(msg), dict, dict_len); + msg->dict_len = dict_len; + msg->level = (facility << 3) | (level & 7); + msg->ts_nsec = local_clock(); + memset(log_dict(msg) + dict_len, 0, pad_len); + msg->len = sizeof(struct log) + text_len + dict_len + pad_len; + + /* insert message */ + log_next_idx += msg->len; + log_next_seq++; +} #ifdef CONFIG_KEXEC /* @@ -165,9 +309,9 @@ static int saved_console_loglevel = -1; void log_buf_kexec_setup(void) { VMCOREINFO_SYMBOL(log_buf); - VMCOREINFO_SYMBOL(log_end); VMCOREINFO_SYMBOL(log_buf_len); - VMCOREINFO_SYMBOL(logged_chars); + VMCOREINFO_SYMBOL(log_first_idx); + VMCOREINFO_SYMBOL(log_next_idx); } #endif @@ -191,7 +335,6 @@ early_param("log_buf_len", log_buf_len_setup); void __init setup_log_buf(int early) { unsigned long flags; - unsigned start, dest_idx, offset; char *new_log_buf; int free; @@ -219,20 +362,8 @@ void __init setup_log_buf(int early) log_buf_len = new_log_buf_len; log_buf = new_log_buf; new_log_buf_len = 0; - free = __LOG_BUF_LEN - log_end; - - offset = start = min(con_start, log_start); - dest_idx = 0; - while (start != log_end) { - unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1); - - log_buf[dest_idx] = __log_buf[log_idx_mask]; - start++; - dest_idx++; - } - log_start -= offset; - con_start -= offset; - log_end -= offset; + free = __LOG_BUF_LEN - log_next_idx; + memcpy(log_buf, __log_buf, __LOG_BUF_LEN); raw_spin_unlock_irqrestore(&logbuf_lock, flags); pr_info("log_buf_len: %d\n", log_buf_len); @@ -332,11 +463,165 @@ static int check_syslog_permissions(int type, bool from_file) return 0; } +#if defined(CONFIG_PRINTK_TIME) +static bool printk_time = 1; +#else +static bool printk_time; +#endif +module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); + +static int syslog_print_line(u32 idx, char *text, size_t size) +{ + struct log *msg; + size_t len; + + msg = log_from_idx(idx); + if (!text) { + /* calculate length only */ + len = 3; + + if (msg->level > 9) + len++; + if (msg->level > 99) + len++; + + if (printk_time) + len += 15; + + len += msg->text_len; + len++; + return len; + } + + len = sprintf(text, "<%u>", msg->level); + + if (printk_time) { + unsigned long long t = msg->ts_nsec; + unsigned long rem_ns = do_div(t, 1000000000); + + len += sprintf(text + len, "[%5lu.%06lu] ", + (unsigned long) t, rem_ns / 1000); + } + + if (len + msg->text_len > size) + return -EINVAL; + memcpy(text + len, log_text(msg), msg->text_len); + len += msg->text_len; + text[len++] = '\n'; + return len; +} + +static int syslog_print(char __user *buf, int size) +{ + char *text; + int len; + + text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); + if (!text) + return -ENOMEM; + + raw_spin_lock_irq(&logbuf_lock); + if (syslog_seq < log_first_seq) { + /* messages are gone, move to first one */ + syslog_seq = log_first_seq; + syslog_idx = log_first_idx; + } + len = syslog_print_line(syslog_idx, text, LOG_LINE_MAX); + syslog_idx = log_next(syslog_idx); + syslog_seq++; + raw_spin_unlock_irq(&logbuf_lock); + + if (len > 0 && copy_to_user(buf, text, len)) + len = -EFAULT; + + kfree(text); + return len; +} + +static int syslog_print_all(char __user *buf, int size, bool clear) +{ + char *text; + int len = 0; + + text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); + if (!text) + return -ENOMEM; + + raw_spin_lock_irq(&logbuf_lock); + if (buf) { + u64 next_seq; + u64 seq; + u32 idx; + + if (clear_seq < log_first_seq) { + /* messages are gone, move to first available one */ + clear_seq = log_first_seq; + clear_idx = log_first_idx; + } + + /* + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. + */ + seq = clear_seq; + idx = clear_idx; + while (seq < log_next_seq) { + len += syslog_print_line(idx, NULL, 0); + idx = log_next(idx); + seq++; + } + seq = clear_seq; + idx = clear_idx; + while (len > size && seq < log_next_seq) { + len -= syslog_print_line(idx, NULL, 0); + idx = log_next(idx); + seq++; + } + + /* last message in this dump */ + next_seq = log_next_seq; + + len = 0; + while (len >= 0 && seq < next_seq) { + int textlen; + + textlen = syslog_print_line(idx, text, LOG_LINE_MAX); + if (textlen < 0) { + len = textlen; + break; + } + idx = log_next(idx); + seq++; + + raw_spin_unlock_irq(&logbuf_lock); + if (copy_to_user(buf + len, text, textlen)) + len = -EFAULT; + else + len += textlen; + raw_spin_lock_irq(&logbuf_lock); + + if (seq < log_first_seq) { + /* messages are gone, move to next one */ + seq = log_first_seq; + idx = log_first_idx; + } + } + } + + if (clear) { + clear_seq = log_next_seq; + clear_idx = log_next_idx; + } + raw_spin_unlock_irq(&logbuf_lock); + + kfree(text); + return len; +} + int do_syslog(int type, char __user *buf, int len, bool from_file) { - unsigned i, j, limit, count; - int do_clear = 0; - char c; + bool clear = false; + static int saved_console_loglevel = -1; int error; error = check_syslog_permissions(type, from_file); @@ -364,28 +649,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) goto out; } error = wait_event_interruptible(log_wait, - (log_start - log_end)); + syslog_seq != log_next_seq); if (error) goto out; - i = 0; - raw_spin_lock_irq(&logbuf_lock); - while (!error && (log_start != log_end) && i < len) { - c = LOG_BUF(log_start); - log_start++; - raw_spin_unlock_irq(&logbuf_lock); - error = __put_user(c,buf); - buf++; - i++; - cond_resched(); - raw_spin_lock_irq(&logbuf_lock); - } - raw_spin_unlock_irq(&logbuf_lock); - if (!error) - error = i; + error = syslog_print(buf, len); break; /* Read/clear last kernel messages */ case SYSLOG_ACTION_READ_CLEAR: - do_clear = 1; + clear = true; /* FALL THRU */ /* Read last kernel messages */ case SYSLOG_ACTION_READ_ALL: @@ -399,52 +670,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) error = -EFAULT; goto out; } - count = len; - if (count > log_buf_len) - count = log_buf_len; - raw_spin_lock_irq(&logbuf_lock); - if (count > logged_chars) - count = logged_chars; - if (do_clear) - logged_chars = 0; - limit = log_end; - /* - * __put_user() could sleep, and while we sleep - * printk() could overwrite the messages - * we try to copy to user space. Therefore - * the messages are copied in reverse. - */ - for (i = 0; i < count && !error; i++) { - j = limit-1-i; - if (j + log_buf_len < log_end) - break; - c = LOG_BUF(j); - raw_spin_unlock_irq(&logbuf_lock); - error = __put_user(c,&buf[count-1-i]); - cond_resched(); - raw_spin_lock_irq(&logbuf_lock); - } - raw_spin_unlock_irq(&logbuf_lock); - if (error) - break; - error = i; - if (i != count) { - int offset = count-error; - /* buffer overflow during copy, correct user buffer. */ - for (i = 0; i < error; i++) { - if (__get_user(c,&buf[i+offset]) || - __put_user(c,&buf[i])) { - error = -EFAULT; - break; - } - cond_resched(); - } - } + error = syslog_print_all(buf, len, clear); break; /* Clear ring buffer */ case SYSLOG_ACTION_CLEAR: - logged_chars = 0; - break; + syslog_print_all(NULL, 0, true); /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_OFF: if (saved_console_loglevel == -1) @@ -472,7 +702,33 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - error = log_end - log_start; + raw_spin_lock_irq(&logbuf_lock); + if (syslog_seq < log_first_seq) { + /* messages are gone, move to first one */ + syslog_seq = log_first_seq; + syslog_idx = log_first_idx; + } + if (from_file) { + /* + * Short-cut for poll(/"proc/kmsg") which simply checks + * for pending data, not the size; return the count of + * records, not the length. + */ + error = log_next_idx - syslog_idx; + } else { + u64 seq; + u32 idx; + + error = 0; + seq = syslog_seq; + idx = syslog_idx; + while (seq < log_next_seq) { + error += syslog_print_line(idx, NULL, 0); + idx = log_next(idx); + seq++; + } + } + raw_spin_unlock_irq(&logbuf_lock); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: @@ -501,29 +757,11 @@ void kdb_syslog_data(char *syslog_data[4]) { syslog_data[0] = log_buf; syslog_data[1] = log_buf + log_buf_len; - syslog_data[2] = log_buf + log_end - - (logged_chars < log_buf_len ? logged_chars : log_buf_len); - syslog_data[3] = log_buf + log_end; + syslog_data[2] = log_buf + log_first_idx; + syslog_data[3] = log_buf + log_next_idx; } #endif /* CONFIG_KGDB_KDB */ -/* - * Call the console drivers on a range of log_buf - */ -static void __call_console_drivers(unsigned start, unsigned end) -{ - struct console *con; - - for_each_console(con) { - if (exclusive_console && con != exclusive_console) - continue; - if ((con->flags & CON_ENABLED) && con->write && - (cpu_online(smp_processor_id()) || - (con->flags & CON_ANYTIME))) - con->write(con, &LOG_BUF(start), end - start); - } -} - static bool __read_mostly ignore_loglevel; static int __init ignore_loglevel_setup(char *str) @@ -539,143 +777,34 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" "print all kernel messages to the console."); -/* - * Write out chars from start to end - 1 inclusive - */ -static void _call_console_drivers(unsigned start, - unsigned end, int msg_log_level) -{ - trace_console(&LOG_BUF(0), start, end, log_buf_len); - - if ((msg_log_level < console_loglevel || ignore_loglevel) && - console_drivers && start != end) { - if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { - /* wrapped write */ - __call_console_drivers(start & LOG_BUF_MASK, - log_buf_len); - __call_console_drivers(0, end & LOG_BUF_MASK); - } else { - __call_console_drivers(start, end); - } - } -} - -/* - * Parse the syslog header <[0-9]*>. The decimal value represents 32bit, the - * lower 3 bit are the log level, the rest are the log facility. In case - * userspace passes usual userspace syslog messages to /dev/kmsg or - * /dev/ttyprintk, the log prefix might contain the facility. Printk needs - * to extract the correct log level for in-kernel processing, and not mangle - * the original value. - * - * If a prefix is found, the length of the prefix is returned. If 'level' is - * passed, it will be filled in with the log level without a possible facility - * value. If 'special' is passed, the special printk prefix chars are accepted - * and returned. If no valid header is found, 0 is returned and the passed - * variables are not touched. - */ -static size_t log_prefix(const char *p, unsigned int *level, char *special) -{ - unsigned int lev = 0; - char sp = '\0'; - size_t len; - - if (p[0] != '<' || !p[1]) - return 0; - if (p[2] == '>') { - /* usual single digit level number or special char */ - switch (p[1]) { - case '0' ... '7': - lev = p[1] - '0'; - break; - case 'c': /* KERN_CONT */ - case 'd': /* KERN_DEFAULT */ - sp = p[1]; - break; - default: - return 0; - } - len = 3; - } else { - /* multi digit including the level and facility number */ - char *endp = NULL; - - lev = (simple_strtoul(&p[1], &endp, 10) & 7); - if (endp == NULL || endp[0] != '>') - return 0; - len = (endp + 1) - p; - } - - /* do not accept special char if not asked for */ - if (sp && !special) - return 0; - - if (special) { - *special = sp; - /* return special char, do not touch level */ - if (sp) - return len; - } - - if (level) - *level = lev; - return len; -} - /* * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ -static void call_console_drivers(unsigned start, unsigned end) +static void call_console_drivers(int level, const char *text, size_t len) { - unsigned cur_index, start_print; - static int msg_level = -1; - - BUG_ON(((int)(start - end)) > 0); - - cur_index = start; - start_print = start; - while (cur_index != end) { - if (msg_level < 0 && ((end - cur_index) > 2)) { - /* strip log prefix */ - cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL); - start_print = cur_index; - } - while (cur_index != end) { - char c = LOG_BUF(cur_index); - - cur_index++; - if (c == '\n') { - if (msg_level < 0) { - /* - * printk() has already given us loglevel tags in - * the buffer. This code is here in case the - * log buffer has wrapped right round and scribbled - * on those tags - */ - msg_level = default_message_loglevel; - } - _call_console_drivers(start_print, cur_index, msg_level); - msg_level = -1; - start_print = cur_index; - break; - } - } - } - _call_console_drivers(start_print, end, msg_level); -} + struct console *con; -static void emit_log_char(char c) -{ - LOG_BUF(log_end) = c; - log_end++; - if (log_end - log_start > log_buf_len) - log_start = log_end - log_buf_len; - if (log_end - con_start > log_buf_len) - con_start = log_end - log_buf_len; - if (logged_chars < log_buf_len) - logged_chars++; + trace_console(text, 0, len, len); + + if (level >= console_loglevel && !ignore_loglevel) + return; + if (!console_drivers) + return; + + for_each_console(con) { + if (exclusive_console && con != exclusive_console) + continue; + if (!(con->flags & CON_ENABLED)) + continue; + if (!con->write) + continue; + if (!cpu_online(smp_processor_id()) && + !(con->flags & CON_ANYTIME)) + continue; + con->write(con, text, len); + } } /* @@ -700,16 +829,6 @@ static void zap_locks(void) sema_init(&console_sem, 1); } -#if defined(CONFIG_PRINTK_TIME) -static bool printk_time = 1; -#else -static bool printk_time = 0; -#endif -module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); - -static bool always_kmsg_dump; -module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); - /* Check if we have any console registered that can be called early in boot. */ static int have_callable_console(void) { @@ -722,51 +841,6 @@ static int have_callable_console(void) return 0; } -/** - * printk - print a kernel message - * @fmt: format string - * - * This is printk(). It can be called from any context. We want it to work. - * - * We try to grab the console_lock. If we succeed, it's easy - we log the output and - * call the console drivers. If we fail to get the semaphore we place the output - * into the log buffer and return. The current holder of the console_sem will - * notice the new output in console_unlock(); and will send it to the - * consoles before releasing the lock. - * - * One effect of this deferred printing is that code which calls printk() and - * then changes console_loglevel may break. This is because console_loglevel - * is inspected when the actual printing occurs. - * - * See also: - * printf(3) - * - * See the vsnprintf() documentation for format string extensions over C99. - */ - -asmlinkage int printk(const char *fmt, ...) -{ - va_list args; - int r; - -#ifdef CONFIG_KGDB_KDB - if (unlikely(kdb_trap_printk)) { - va_start(args, fmt); - r = vkdb_printf(fmt, args); - va_end(args); - return r; - } -#endif - va_start(args, fmt); - r = vprintk(fmt, args); - va_end(args); - - return r; -} - -/* cpu currently holding logbuf_lock */ -static volatile unsigned int printk_cpu = UINT_MAX; - /* * Can we actually use the console at this time on this cpu? * @@ -810,17 +884,12 @@ static int console_trylock_for_printk(unsigned int cpu) retval = 0; } } - printk_cpu = UINT_MAX; + logbuf_cpu = UINT_MAX; if (wake) up(&console_sem); raw_spin_unlock(&logbuf_lock); return retval; } -static const char recursion_bug_msg [] = - KERN_CRIT "BUG: recent printk recursion!\n"; -static int recursion_bug; -static int new_text_line = 1; -static char printk_buf[1024]; int printk_delay_msec __read_mostly; @@ -836,15 +905,22 @@ static inline void printk_delay(void) } } -asmlinkage int vprintk(const char *fmt, va_list args) +asmlinkage int vprintk_emit(int facility, int level, + const char *dict, size_t dictlen, + const char *fmt, va_list args) { - int printed_len = 0; - int current_log_level = default_message_loglevel; + static int recursion_bug; + static char buf[LOG_LINE_MAX]; + static size_t buflen; + static int buflevel; + static char textbuf[LOG_LINE_MAX]; + char *text = textbuf; + size_t textlen; unsigned long flags; int this_cpu; - char *p; - size_t plen; - char special; + bool newline = false; + bool cont = false; + int printed_len = 0; boot_delay_msec(); printk_delay(); @@ -856,7 +932,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) /* * Ouch, printk recursed into itself! */ - if (unlikely(printk_cpu == this_cpu)) { + if (unlikely(logbuf_cpu == this_cpu)) { /* * If a crash is occurring during printk() on this CPU, * then try to get the crash message out but make sure @@ -873,97 +949,92 @@ asmlinkage int vprintk(const char *fmt, va_list args) lockdep_off(); raw_spin_lock(&logbuf_lock); - printk_cpu = this_cpu; + logbuf_cpu = this_cpu; if (recursion_bug) { + static const char recursion_msg[] = + "BUG: recent printk recursion!"; + recursion_bug = 0; - strcpy(printk_buf, recursion_bug_msg); - printed_len = strlen(recursion_bug_msg); + printed_len += strlen(recursion_msg); + /* emit KERN_CRIT message */ + log_store(0, 2, NULL, 0, recursion_msg, printed_len); } - /* Emit the output into the temporary buffer */ - printed_len += vscnprintf(printk_buf + printed_len, - sizeof(printk_buf) - printed_len, fmt, args); - p = printk_buf; + /* + * The printf needs to come first; we need the syslog + * prefix which might be passed-in as a parameter. + */ + textlen = vscnprintf(text, sizeof(textbuf), fmt, args); - /* Read log level and handle special printk prefix */ - plen = log_prefix(p, ¤t_log_level, &special); - if (plen) { - p += plen; + /* mark and strip a trailing newline */ + if (textlen && text[textlen-1] == '\n') { + textlen--; + newline = true; + } - switch (special) { - case 'c': /* Strip KERN_CONT, continue line */ - plen = 0; + /* strip syslog prefix and extract log level or flags */ + if (text[0] == '<' && text[1] && text[2] == '>') { + switch (text[1]) { + case '0' ... '7': + if (level == -1) + level = text[1] - '0'; + text += 3; + textlen -= 3; + break; + case 'c': /* KERN_CONT */ + cont = true; + case 'd': /* KERN_DEFAULT */ + text += 3; + textlen -= 3; break; - case 'd': /* Strip KERN_DEFAULT, start new line */ - plen = 0; - default: - if (!new_text_line) { - emit_log_char('\n'); - new_text_line = 1; - } } } - /* - * Copy the output into log_buf. If the caller didn't provide - * the appropriate log prefix, we insert them here - */ - for (; *p; p++) { - if (new_text_line) { - new_text_line = 0; - - if (plen) { - /* Copy original log prefix */ - int i; - - for (i = 0; i < plen; i++) - emit_log_char(printk_buf[i]); - printed_len += plen; - } else { - /* Add log prefix */ - emit_log_char('<'); - emit_log_char(current_log_level + '0'); - emit_log_char('>'); - printed_len += 3; - } + if (buflen && (!cont || dict)) { + /* no continuation; flush existing buffer */ + log_store(facility, buflevel, NULL, 0, buf, buflen); + printed_len += buflen; + buflen = 0; + } - if (printk_time) { - /* Add the current time stamp */ - char tbuf[50], *tp; - unsigned tlen; - unsigned long long t; - unsigned long nanosec_rem; - - t = cpu_clock(printk_cpu); - nanosec_rem = do_div(t, 1000000000); - tlen = sprintf(tbuf, "[%5lu.%06lu] ", - (unsigned long) t, - nanosec_rem / 1000); - - for (tp = tbuf; tp < tbuf + tlen; tp++) - emit_log_char(*tp); - printed_len += tlen; - } + if (buflen == 0) { + /* remember level for first message in the buffer */ + if (level == -1) + buflevel = default_message_loglevel; + else + buflevel = level; + } - if (!*p) - break; - } + if (buflen || !newline) { + /* append to existing buffer, or buffer until next message */ + if (buflen + textlen > sizeof(buf)) + textlen = sizeof(buf) - buflen; + memcpy(buf + buflen, text, textlen); + buflen += textlen; + } - emit_log_char(*p); - if (*p == '\n') - new_text_line = 1; + if (newline) { + /* end of line; flush buffer */ + if (buflen) { + log_store(facility, buflevel, + dict, dictlen, buf, buflen); + printed_len += buflen; + buflen = 0; + } else { + log_store(facility, buflevel, + dict, dictlen, text, textlen); + printed_len += textlen; + } } /* - * Try to acquire and then immediately release the - * console semaphore. The release will do all the - * actual magic (print out buffers, wake up klogd, - * etc). + * Try to acquire and then immediately release the console semaphore. + * The release will print out buffers and wake up /dev/kmsg and syslog() + * users. * - * The console_trylock_for_printk() function - * will release 'logbuf_lock' regardless of whether it - * actually gets the semaphore or not. + * The console_trylock_for_printk() function will release 'logbuf_lock' + * regardless of whether it actually gets the console semaphore or not. */ if (console_trylock_for_printk(this_cpu)) console_unlock(); @@ -974,12 +1045,73 @@ out_restore_irqs: return printed_len; } -EXPORT_SYMBOL(printk); +EXPORT_SYMBOL(vprintk_emit); + +asmlinkage int vprintk(const char *fmt, va_list args) +{ + return vprintk_emit(0, -1, NULL, 0, fmt, args); +} EXPORT_SYMBOL(vprintk); +asmlinkage int printk_emit(int facility, int level, + const char *dict, size_t dictlen, + const char *fmt, ...) +{ + va_list args; + int r; + + va_start(args, fmt); + r = vprintk_emit(facility, level, dict, dictlen, fmt, args); + va_end(args); + + return r; +} +EXPORT_SYMBOL(printk_emit); + +/** + * printk - print a kernel message + * @fmt: format string + * + * This is printk(). It can be called from any context. We want it to work. + * + * We try to grab the console_lock. If we succeed, it's easy - we log the + * output and call the console drivers. If we fail to get the semaphore, we + * place the output into the log buffer and return. The current holder of + * the console_sem will notice the new output in console_unlock(); and will + * send it to the consoles before releasing the lock. + * + * One effect of this deferred printing is that code which calls printk() and + * then changes console_loglevel may break. This is because console_loglevel + * is inspected when the actual printing occurs. + * + * See also: + * printf(3) + * + * See the vsnprintf() documentation for format string extensions over C99. + */ +asmlinkage int printk(const char *fmt, ...) +{ + va_list args; + int r; + +#ifdef CONFIG_KGDB_KDB + if (unlikely(kdb_trap_printk)) { + va_start(args, fmt); + r = vkdb_printf(fmt, args); + va_end(args); + return r; + } +#endif + va_start(args, fmt); + r = vprintk_emit(0, -1, NULL, 0, fmt, args); + va_end(args); + + return r; +} +EXPORT_SYMBOL(printk); #else -static void call_console_drivers(unsigned start, unsigned end) +static void call_console_drivers(int level, const char *text, size_t len) { } @@ -1217,7 +1349,7 @@ int is_console_locked(void) } /* - * Delayed printk facility, for scheduler-internal messages: + * Delayed printk version, for scheduler-internal messages: */ #define PRINTK_BUF_SIZE 512 @@ -1253,6 +1385,10 @@ void wake_up_klogd(void) this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); } +/* the next printk record to write to the console */ +static u64 console_seq; +static u32 console_idx; + /** * console_unlock - unlock the console system * @@ -1263,15 +1399,16 @@ void wake_up_klogd(void) * by printk(). If this is the case, console_unlock(); emits * the output prior to releasing the lock. * - * If there is output waiting for klogd, we wake it up. + * If there is output waiting, we wake it /dev/kmsg and syslog() users. * * console_unlock(); may be called from any context. */ void console_unlock(void) { + static u64 seen_seq; unsigned long flags; - unsigned _con_start, _log_end; - unsigned wake_klogd = 0, retry = 0; + bool wake_klogd = false; + bool retry; if (console_suspended) { up(&console_sem); @@ -1281,17 +1418,41 @@ void console_unlock(void) console_may_schedule = 0; again: - for ( ; ; ) { + for (;;) { + struct log *msg; + static char text[LOG_LINE_MAX]; + size_t len; + int level; + raw_spin_lock_irqsave(&logbuf_lock, flags); - wake_klogd |= log_start - log_end; - if (con_start == log_end) - break; /* Nothing to print */ - _con_start = con_start; - _log_end = log_end; - con_start = log_end; /* Flush */ + if (seen_seq != log_next_seq) { + wake_klogd = true; + seen_seq = log_next_seq; + } + + if (console_seq < log_first_seq) { + /* messages are gone, move to first one */ + console_seq = log_first_seq; + console_idx = log_first_idx; + } + + if (console_seq == log_next_seq) + break; + + msg = log_from_idx(console_idx); + level = msg->level & 7; + len = msg->text_len; + if (len+1 >= sizeof(text)) + len = sizeof(text)-1; + memcpy(text, log_text(msg), len); + text[len++] = '\n'; + + console_idx = log_next(console_idx); + console_seq++; raw_spin_unlock(&logbuf_lock); + stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(_con_start, _log_end); + call_console_drivers(level, text, len); start_critical_timings(); local_irq_restore(flags); } @@ -1312,8 +1473,7 @@ again: * flush, no worries. */ raw_spin_lock(&logbuf_lock); - if (con_start != log_end) - retry = 1; + retry = console_seq != log_next_seq; raw_spin_unlock_irqrestore(&logbuf_lock, flags); if (retry && console_trylock()) @@ -1549,7 +1709,8 @@ void register_console(struct console *newcon) * for us. */ raw_spin_lock_irqsave(&logbuf_lock, flags); - con_start = log_start; + console_seq = syslog_seq; + console_idx = syslog_idx; raw_spin_unlock_irqrestore(&logbuf_lock, flags); /* * We're about to replay the log buffer. Only do this to the @@ -1758,6 +1919,9 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper) } EXPORT_SYMBOL_GPL(kmsg_dump_unregister); +static bool always_kmsg_dump; +module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); + /** * kmsg_dump - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping @@ -1767,8 +1931,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister); */ void kmsg_dump(enum kmsg_dump_reason reason) { - unsigned long end; - unsigned chars; + u64 idx; struct kmsg_dumper *dumper; const char *s1, *s2; unsigned long l1, l2; @@ -1780,24 +1943,27 @@ void kmsg_dump(enum kmsg_dump_reason reason) /* Theoretically, the log could move on after we do this, but there's not a lot we can do about that. The new messages will overwrite the start of what we dump. */ + raw_spin_lock_irqsave(&logbuf_lock, flags); - end = log_end & LOG_BUF_MASK; - chars = logged_chars; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + if (syslog_seq < log_first_seq) + idx = syslog_idx; + else + idx = log_first_idx; - if (chars > end) { - s1 = log_buf + log_buf_len - chars + end; - l1 = chars - end; + if (idx > log_next_idx) { + s1 = log_buf; + l1 = log_next_idx; - s2 = log_buf; - l2 = end; + s2 = log_buf + idx; + l2 = log_buf_len - idx; } else { s1 = ""; l1 = 0; - s2 = log_buf + end - chars; - l2 = chars; + s2 = log_buf + idx; + l2 = log_next_idx - idx; } + raw_spin_unlock_irqrestore(&logbuf_lock, flags); rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) -- cgit v1.2.3 From e11fea92e13fb91c50bacca799a6131c81929986 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 3 May 2012 02:29:41 +0200 Subject: kmsg: export printk records to the /dev/kmsg interface Support for multiple concurrent readers of /dev/kmsg, with read(), seek(), poll() support. Output of message sequence numbers, to allow userspace log consumers to reliably reconnect and reconstruct their state at any given time. After open("/dev/kmsg"), read() always returns *all* buffered records. If only future messages should be read, SEEK_END can be used. In case records get overwritten while /dev/kmsg is held open, or records get faster overwritten than they are read, the next read() will return -EPIPE and the current reading position gets updated to the next available record. The passed sequence numbers allow the log consumer to calculate the amount of lost messages. [root@mop ~]# cat /dev/kmsg 5,0,0;Linux version 3.4.0-rc1+ (kay@mop) (gcc version 4.7.0 20120315 ... 6,159,423091;ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) 7,160,424069;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored) SUBSYSTEM=acpi DEVICE=+acpi:PNP0A03:00 6,339,5140900;NET: Registered protocol family 10 30,340,5690716;udevd[80]: starting version 181 6,341,6081421;FDC 0 is a S82078B 6,345,6154686;microcode: CPU0 sig=0x623, pf=0x0, revision=0x0 7,346,6156968;sr 1:0:0:0: Attached scsi CD-ROM sr0 SUBSYSTEM=scsi DEVICE=+scsi:1:0:0:0 6,347,6289375;microcode: CPU1 sig=0x623, pf=0x0, revision=0x0 Cc: Karel Zak Tested-by: William Douglas Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 313 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 74357329550f..1ccc6d986cb3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -149,6 +150,48 @@ static int console_may_schedule; * length of the message text is stored in the header, the stored message * is not terminated. * + * Optionally, a message can carry a dictionary of properties (key/value pairs), + * to provide userspace with a machine-readable message context. + * + * Examples for well-defined, commonly used property names are: + * DEVICE=b12:8 device identifier + * b12:8 block dev_t + * c127:3 char dev_t + * n8 netdev ifindex + * +sound:card0 subsystem:devname + * SUBSYSTEM=pci driver-core subsystem name + * + * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value + * follows directly after a '=' character. Every property is terminated by + * a '\0' character. The last property is not terminated. + * + * Example of a message structure: + * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec + * 0008 34 00 record is 52 bytes long + * 000a 0b 00 text is 11 bytes long + * 000c 1f 00 dictionary is 23 bytes long + * 000e 03 00 LOG_KERN (facility) LOG_ERR (level) + * 0010 69 74 27 73 20 61 20 6c "it's a l" + * 69 6e 65 "ine" + * 001b 44 45 56 49 43 "DEVIC" + * 45 3d 62 38 3a 32 00 44 "E=b8:2\0D" + * 52 49 56 45 52 3d 62 75 "RIVER=bu" + * 67 "g" + * 0032 00 00 00 padding to next message header + * + * The 'struct log' buffer header must never be directly exported to + * userspace, it is a kernel-private implementation detail that might + * need to be changed in the future, when the requirements change. + * + * /dev/kmsg exports the structured data in the following line format: + * "level,sequnum,timestamp;\n" + * + * The optional key/value pairs are attached as continuation lines starting + * with a space character and terminated by a newline. All possible + * non-prinatable characters are escaped in the "\xff" notation. + * + * Users of the export format should ignore possible additional values + * separated by ',', and find the message after the ';' character. */ struct log { @@ -297,6 +340,276 @@ static void log_store(int facility, int level, log_next_seq++; } +/* /dev/kmsg - userspace message inject/listen interface */ +struct devkmsg_user { + u64 seq; + u32 idx; + struct mutex lock; + char buf[8192]; +}; + +static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + char *buf, *line; + int i; + int level = default_message_loglevel; + int facility = 1; /* LOG_USER */ + size_t len = iov_length(iv, count); + ssize_t ret = len; + + if (len > LOG_LINE_MAX) + return -EINVAL; + buf = kmalloc(len+1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + line = buf; + for (i = 0; i < count; i++) { + if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) + goto out; + line += iv[i].iov_len; + } + + /* + * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace + * the decimal value represents 32bit, the lower 3 bit are the log + * level, the rest are the log facility. + * + * If no prefix or no userspace facility is specified, we + * enforce LOG_USER, to be able to reliably distinguish + * kernel-generated messages from userspace-injected ones. + */ + line = buf; + if (line[0] == '<') { + char *endp = NULL; + + i = simple_strtoul(line+1, &endp, 10); + if (endp && endp[0] == '>') { + level = i & 7; + if (i >> 3) + facility = i >> 3; + endp++; + len -= endp - line; + line = endp; + } + } + line[len] = '\0'; + + printk_emit(facility, level, NULL, 0, "%s", line); +out: + kfree(buf); + return ret; +} + +static ssize_t devkmsg_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct devkmsg_user *user = file->private_data; + struct log *msg; + size_t i; + size_t len; + ssize_t ret; + + if (!user) + return -EBADF; + + mutex_lock(&user->lock); + raw_spin_lock(&logbuf_lock); + while (user->seq == log_next_seq) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + raw_spin_unlock(&logbuf_lock); + goto out; + } + + raw_spin_unlock(&logbuf_lock); + ret = wait_event_interruptible(log_wait, + user->seq != log_next_seq); + if (ret) + goto out; + raw_spin_lock(&logbuf_lock); + } + + if (user->seq < log_first_seq) { + /* our last seen message is gone, return error and reset */ + user->idx = log_first_idx; + user->seq = log_first_seq; + ret = -EPIPE; + raw_spin_unlock(&logbuf_lock); + goto out; + } + + msg = log_from_idx(user->idx); + len = sprintf(user->buf, "%u,%llu,%llu;", + msg->level, user->seq, msg->ts_nsec / 1000); + + /* escape non-printable characters */ + for (i = 0; i < msg->text_len; i++) { + char c = log_text(msg)[i]; + + if (c < ' ' || c >= 128) + len += sprintf(user->buf + len, "\\x%02x", c); + else + user->buf[len++] = c; + } + user->buf[len++] = '\n'; + + if (msg->dict_len) { + bool line = true; + + for (i = 0; i < msg->dict_len; i++) { + char c = log_dict(msg)[i]; + + if (line) { + user->buf[len++] = ' '; + line = false; + } + + if (c == '\0') { + user->buf[len++] = '\n'; + line = true; + continue; + } + + if (c < ' ' || c >= 128) { + len += sprintf(user->buf + len, "\\x%02x", c); + continue; + } + + user->buf[len++] = c; + } + user->buf[len++] = '\n'; + } + + user->idx = log_next(user->idx); + user->seq++; + raw_spin_unlock(&logbuf_lock); + + if (len > count) { + ret = -EINVAL; + goto out; + } + + if (copy_to_user(buf, user->buf, len)) { + ret = -EFAULT; + goto out; + } + ret = len; +out: + mutex_unlock(&user->lock); + return ret; +} + +static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) +{ + struct devkmsg_user *user = file->private_data; + loff_t ret = 0; + + if (!user) + return -EBADF; + if (offset) + return -ESPIPE; + + raw_spin_lock(&logbuf_lock); + switch (whence) { + case SEEK_SET: + /* the first record */ + user->idx = log_first_idx; + user->seq = log_first_seq; + break; + case SEEK_DATA: + /* + * The first record after the last SYSLOG_ACTION_CLEAR, + * like issued by 'dmesg -c'. Reading /dev/kmsg itself + * changes no global state, and does not clear anything. + */ + user->idx = clear_idx; + user->seq = clear_seq; + break; + case SEEK_END: + /* after the last record */ + user->idx = log_next_idx; + user->seq = log_next_seq; + break; + default: + ret = -EINVAL; + } + raw_spin_unlock(&logbuf_lock); + return ret; +} + +static unsigned int devkmsg_poll(struct file *file, poll_table *wait) +{ + struct devkmsg_user *user = file->private_data; + int ret = 0; + + if (!user) + return POLLERR|POLLNVAL; + + poll_wait(file, &log_wait, wait); + + raw_spin_lock(&logbuf_lock); + if (user->seq < log_next_seq) { + /* return error when data has vanished underneath us */ + if (user->seq < log_first_seq) + ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; + ret = POLLIN|POLLRDNORM; + } + raw_spin_unlock(&logbuf_lock); + + return ret; +} + +static int devkmsg_open(struct inode *inode, struct file *file) +{ + struct devkmsg_user *user; + int err; + + /* write-only does not need any file context */ + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + return 0; + + err = security_syslog(SYSLOG_ACTION_READ_ALL); + if (err) + return err; + + user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); + if (!user) + return -ENOMEM; + + mutex_init(&user->lock); + + raw_spin_lock(&logbuf_lock); + user->idx = log_first_idx; + user->seq = log_first_seq; + raw_spin_unlock(&logbuf_lock); + + file->private_data = user; + return 0; +} + +static int devkmsg_release(struct inode *inode, struct file *file) +{ + struct devkmsg_user *user = file->private_data; + + if (!user) + return 0; + + mutex_destroy(&user->lock); + kfree(user); + return 0; +} + +const struct file_operations kmsg_fops = { + .open = devkmsg_open, + .read = devkmsg_read, + .aio_write = devkmsg_writev, + .llseek = devkmsg_llseek, + .poll = devkmsg_poll, + .release = devkmsg_release, +}; + #ifdef CONFIG_KEXEC /* * This appends the listed symbols to /proc/vmcoreinfo -- cgit v1.2.3 From 5fc3249068c1ed87c6fd485f42ced24132405629 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 8 May 2012 13:04:17 +0200 Subject: kmsg: use do_div() to divide 64bit integer On Tue, May 8, 2012 at 10:02 AM, Stephen Rothwell wrote: > kernel/built-in.o: In function `devkmsg_read': > printk.c:(.text+0x27e8): undefined reference to `__udivdi3' > Most probably the "msg->ts_nsec / 1000" since > ts_nsec is a u64 and this is a 32 bit build ... Reported-by: Stephen Rothwell Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 1ccc6d986cb3..96d4cc892255 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -407,6 +407,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, { struct devkmsg_user *user = file->private_data; struct log *msg; + u64 ts_usec; size_t i; size_t len; ssize_t ret; @@ -441,8 +442,10 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, } msg = log_from_idx(user->idx); + ts_usec = msg->ts_nsec; + do_div(ts_usec, 1000); len = sprintf(user->buf, "%u,%llu,%llu;", - msg->level, user->seq, msg->ts_nsec / 1000); + msg->level, user->seq, ts_usec); /* escape non-printable characters */ for (i = 0; i < msg->text_len; i++) { -- cgit v1.2.3 From 7f3a781d6fd81e397c3928c9af33f1fc63232db6 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 9 May 2012 01:37:51 +0200 Subject: printk - fix compilation for CONFIG_PRINTK=n Reported-by: Randy Dunlap Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 96d4cc892255..7b432951f91e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -126,7 +126,6 @@ EXPORT_SYMBOL(console_set_on_cmdline); /* Flag: console code may call schedule() */ static int console_may_schedule; -#ifdef CONFIG_PRINTK /* * The printk log buffer consists of a chain of concatenated variable * length records. Every record starts with a record header, containing @@ -208,16 +207,9 @@ struct log { */ static DEFINE_RAW_SPINLOCK(logbuf_lock); -/* cpu currently holding logbuf_lock */ -static volatile unsigned int logbuf_cpu = UINT_MAX; - -#define LOG_LINE_MAX 1024 - -/* record buffer */ -#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -static char __log_buf[__LOG_BUF_LEN]; -static char *log_buf = __log_buf; -static u32 log_buf_len = __LOG_BUF_LEN; +/* the next printk record to read by syslog(READ) or /proc/kmsg */ +static u64 syslog_seq; +static u32 syslog_idx; /* index and sequence number of the first record stored in the buffer */ static u64 log_first_seq; @@ -225,15 +217,23 @@ static u32 log_first_idx; /* index and sequence number of the next record to store in the buffer */ static u64 log_next_seq; +#ifdef CONFIG_PRINTK static u32 log_next_idx; /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; static u32 clear_idx; -/* the next printk record to read by syslog(READ) or /proc/kmsg */ -static u64 syslog_seq; -static u32 syslog_idx; +#define LOG_LINE_MAX 1024 + +/* record buffer */ +#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +static char __log_buf[__LOG_BUF_LEN]; +static char *log_buf = __log_buf; +static u32 log_buf_len = __LOG_BUF_LEN; + +/* cpu currently holding logbuf_lock */ +static volatile unsigned int logbuf_cpu = UINT_MAX; /* human readable text of the record */ static char *log_text(const struct log *msg) @@ -1425,13 +1425,16 @@ asmlinkage int printk(const char *fmt, ...) return r; } EXPORT_SYMBOL(printk); + #else -static void call_console_drivers(int level, const char *text, size_t len) -{ -} +#define LOG_LINE_MAX 0 +static struct log *log_from_idx(u32 idx) { return NULL; } +static u32 log_next(u32 idx) { return 0; } +static char *log_text(const struct log *msg) { return NULL; } +static void call_console_drivers(int level, const char *text, size_t len) {} -#endif +#endif /* CONFIG_PRINTK */ static int __add_preferred_console(char *name, int idx, char *options, char *brl_options) @@ -1715,7 +1718,7 @@ static u32 console_idx; * by printk(). If this is the case, console_unlock(); emits * the output prior to releasing the lock. * - * If there is output waiting, we wake it /dev/kmsg and syslog() users. + * If there is output waiting, we wake /dev/kmsg and syslog() users. * * console_unlock(); may be called from any context. */ -- cgit v1.2.3 From 5c5d5ca51abd728c8de3be43ffd6bb00f977bfcd Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 10 May 2012 04:32:53 +0200 Subject: printk() - do not merge continuation lines of different threads This prevents the merging of printk() continuation lines of different threads, in the case they race against each other. It should properly isolate "atomic" single-line printk() users from continuation users, to make sure the single-line users will never be merged with the racy continuation ones. Cc: Linus Torvalds Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Sasha Levin Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 7b432951f91e..301fb0f09fbf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1230,12 +1230,13 @@ asmlinkage int vprintk_emit(int facility, int level, static size_t buflen; static int buflevel; static char textbuf[LOG_LINE_MAX]; + static struct task_struct *cont; char *text = textbuf; size_t textlen; unsigned long flags; int this_cpu; bool newline = false; - bool cont = false; + bool prefix = false; int printed_len = 0; boot_delay_msec(); @@ -1295,20 +1296,16 @@ asmlinkage int vprintk_emit(int facility, int level, case '0' ... '7': if (level == -1) level = text[1] - '0'; - text += 3; - textlen -= 3; - break; - case 'c': /* KERN_CONT */ - cont = true; case 'd': /* KERN_DEFAULT */ + prefix = true; + case 'c': /* KERN_CONT */ text += 3; textlen -= 3; - break; } } - if (buflen && (!cont || dict)) { - /* no continuation; flush existing buffer */ + if (buflen && (prefix || dict || cont != current)) { + /* flush existing buffer */ log_store(facility, buflevel, NULL, 0, buf, buflen); printed_len += buflen; buflen = 0; @@ -1342,6 +1339,10 @@ asmlinkage int vprintk_emit(int facility, int level, dict, dictlen, text, textlen); printed_len += textlen; } + cont = NULL; + } else { + /* remember thread which filled the buffer */ + cont = current; } /* -- cgit v1.2.3 From 649e6ee33f73ba1c4f2492c6de9aff2254b540cb Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 10 May 2012 04:30:45 +0200 Subject: printk() - restore timestamp printing at console output The output of the timestamps got lost with the conversion of the kmsg buffer to records; restore the old behavior. Document, that CONFIG_PRINTK_TIME now only controls the output of the timestamps in the syslog() system call and on the console, and not the recording of the timestamps. Cc: Joe Perches Cc: Linus Torvalds Cc: Sasha Levin Cc: Ingo Molnar Reported-by: Yinghai Lu Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 301fb0f09fbf..572941d7e5f7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -786,6 +786,22 @@ static bool printk_time; #endif module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); +static size_t prepend_timestamp(unsigned long long t, char *buf) +{ + unsigned long rem_ns; + + if (!printk_time) + return 0; + + if (!buf) + return 15; + + rem_ns = do_div(t, 1000000000); + + return sprintf(buf, "[%5lu.%06lu] ", + (unsigned long) t, rem_ns / 1000); +} + static int syslog_print_line(u32 idx, char *text, size_t size) { struct log *msg; @@ -800,9 +816,7 @@ static int syslog_print_line(u32 idx, char *text, size_t size) len++; if (msg->level > 99) len++; - - if (printk_time) - len += 15; + len += prepend_timestamp(0, NULL); len += msg->text_len; len++; @@ -810,15 +824,7 @@ static int syslog_print_line(u32 idx, char *text, size_t size) } len = sprintf(text, "<%u>", msg->level); - - if (printk_time) { - unsigned long long t = msg->ts_nsec; - unsigned long rem_ns = do_div(t, 1000000000); - - len += sprintf(text + len, "[%5lu.%06lu] ", - (unsigned long) t, rem_ns / 1000); - } - + len += prepend_timestamp(msg->ts_nsec, text + len); if (len + msg->text_len > size) return -EINVAL; memcpy(text + len, log_text(msg), msg->text_len); @@ -1741,7 +1747,7 @@ again: for (;;) { struct log *msg; static char text[LOG_LINE_MAX]; - size_t len; + size_t len, l; int level; raw_spin_lock_irqsave(&logbuf_lock, flags); @@ -1761,10 +1767,13 @@ again: msg = log_from_idx(console_idx); level = msg->level & 7; - len = msg->text_len; - if (len+1 >= sizeof(text)) - len = sizeof(text)-1; - memcpy(text, log_text(msg), len); + + len = prepend_timestamp(msg->ts_nsec, text); + l = msg->text_len; + if (len + l + 1 >= sizeof(text)) + l = sizeof(text) - len - 1; + memcpy(text + len, log_text(msg), l); + len += l; text[len++] = '\n'; console_idx = log_next(console_idx); -- cgit v1.2.3 From f8450fca6ecdea38b5a882fdf6cd097e3ec8651c Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 10 May 2012 16:14:33 -0600 Subject: printk: correctly align __log_buf __log_buf must be aligned, because a 64-bit value is written directly to it as part of struct log. Alignment of the log entries is typically handled by log_store(), but this only triggers for subsequent entries, not the very first (or wrapped) entries. Cc: Kay Sievers Signed-off-by: Stephen Warren Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 572941d7e5f7..8b027bdf4606 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -227,8 +227,13 @@ static u32 clear_idx; #define LOG_LINE_MAX 1024 /* record buffer */ +#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define LOG_ALIGN 4 +#else +#define LOG_ALIGN 8 +#endif #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -static char __log_buf[__LOG_BUF_LEN]; +static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -279,12 +284,6 @@ static u32 log_next(u32 idx) return idx + msg->len; } -#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -#define LOG_ALIGN 4 -#else -#define LOG_ALIGN 8 -#endif - /* insert record into the buffer, discard old ones, update heads */ static void log_store(int facility, int level, const char *dict, u16 dict_len, -- cgit v1.2.3 From 1fce677971e29ceaa7c569741fa9c685a7b1052a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 11 May 2012 16:36:07 -0700 Subject: printk: add stub for prepend_timestamp() Add a stub for prepend_timestamp() when CONFIG_PRINTK is not enabled. Fixes this build error: kernel/printk.c:1770:3: error: implicit declaration of function 'prepend_timestamp' Cc: Kay Sievers Signed-off-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 8b027bdf4606..c42faf97404a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1439,6 +1439,10 @@ static struct log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } static char *log_text(const struct log *msg) { return NULL; } static void call_console_drivers(int level, const char *text, size_t len) {} +static size_t prepend_timestamp(unsigned long long t, char *buf) +{ + return 0; +} #endif /* CONFIG_PRINTK */ -- cgit v1.2.3 From 3ce9a7c0ac28561567fadedf1a99272e4970f740 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 13 May 2012 23:30:46 +0200 Subject: printk() - restore prefix/timestamp printing for multi-newline strings Calls like: printk("\n *** DEADLOCK ***\n\n"); will print 3 properly indented, separated, syslog + timestamp prefixed lines in the log output. Reported-By: Sasha Levin Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 127 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 76 insertions(+), 51 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index c42faf97404a..915a8be10b5f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -448,7 +448,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, /* escape non-printable characters */ for (i = 0; i < msg->text_len; i++) { - char c = log_text(msg)[i]; + unsigned char c = log_text(msg)[i]; if (c < ' ' || c >= 128) len += sprintf(user->buf + len, "\\x%02x", c); @@ -461,7 +461,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, bool line = true; for (i = 0; i < msg->dict_len; i++) { - char c = log_dict(msg)[i]; + unsigned char c = log_dict(msg)[i]; if (line) { user->buf[len++] = ' '; @@ -785,56 +785,81 @@ static bool printk_time; #endif module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); -static size_t prepend_timestamp(unsigned long long t, char *buf) +static size_t print_prefix(const struct log *msg, bool syslog, char *buf) { - unsigned long rem_ns; + size_t len = 0; - if (!printk_time) - return 0; + if (syslog) { + if (buf) { + len += sprintf(buf, "<%u>", msg->level); + } else { + len += 3; + if (msg->level > 9) + len++; + if (msg->level > 99) + len++; + } + } - if (!buf) - return 15; + if (printk_time) { + if (buf) { + unsigned long long ts = msg->ts_nsec; + unsigned long rem_nsec = do_div(ts, 1000000000); - rem_ns = do_div(t, 1000000000); + len += sprintf(buf + len, "[%5lu.%06lu] ", + (unsigned long) ts, rem_nsec / 1000); + } else { + len += 15; + } + } - return sprintf(buf, "[%5lu.%06lu] ", - (unsigned long) t, rem_ns / 1000); + return len; } -static int syslog_print_line(u32 idx, char *text, size_t size) +static size_t msg_print_text(const struct log *msg, bool syslog, + char *buf, size_t size) { - struct log *msg; - size_t len; + const char *text = log_text(msg); + size_t text_size = msg->text_len; + size_t len = 0; - msg = log_from_idx(idx); - if (!text) { - /* calculate length only */ - len = 3; + do { + const char *next = memchr(text, '\n', text_size); + size_t text_len; - if (msg->level > 9) - len++; - if (msg->level > 99) - len++; - len += prepend_timestamp(0, NULL); + if (next) { + text_len = next - text; + next++; + text_size -= next - text; + } else { + text_len = text_size; + } - len += msg->text_len; - len++; - return len; - } + if (buf) { + if (print_prefix(msg, syslog, NULL) + + text_len + 1>= size - len) + break; + + len += print_prefix(msg, syslog, buf + len); + memcpy(buf + len, text, text_len); + len += text_len; + buf[len++] = '\n'; + } else { + /* SYSLOG_ACTION_* buffer size only calculation */ + len += print_prefix(msg, syslog, NULL); + len += text_len + 1; + } + + text = next; + } while (text); - len = sprintf(text, "<%u>", msg->level); - len += prepend_timestamp(msg->ts_nsec, text + len); - if (len + msg->text_len > size) - return -EINVAL; - memcpy(text + len, log_text(msg), msg->text_len); - len += msg->text_len; - text[len++] = '\n'; return len; } static int syslog_print(char __user *buf, int size) { char *text; + struct log *msg; int len; text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); @@ -847,7 +872,8 @@ static int syslog_print(char __user *buf, int size) syslog_seq = log_first_seq; syslog_idx = log_first_idx; } - len = syslog_print_line(syslog_idx, text, LOG_LINE_MAX); + msg = log_from_idx(syslog_idx); + len = msg_print_text(msg, true, text, LOG_LINE_MAX); syslog_idx = log_next(syslog_idx); syslog_seq++; raw_spin_unlock_irq(&logbuf_lock); @@ -887,14 +913,18 @@ static int syslog_print_all(char __user *buf, int size, bool clear) seq = clear_seq; idx = clear_idx; while (seq < log_next_seq) { - len += syslog_print_line(idx, NULL, 0); + struct log *msg = log_from_idx(idx); + + len += msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; } seq = clear_seq; idx = clear_idx; while (len > size && seq < log_next_seq) { - len -= syslog_print_line(idx, NULL, 0); + struct log *msg = log_from_idx(idx); + + len -= msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; } @@ -904,9 +934,10 @@ static int syslog_print_all(char __user *buf, int size, bool clear) len = 0; while (len >= 0 && seq < next_seq) { + struct log *msg = log_from_idx(idx); int textlen; - textlen = syslog_print_line(idx, text, LOG_LINE_MAX); + textlen = msg_print_text(msg, true, text, LOG_LINE_MAX); if (textlen < 0) { len = textlen; break; @@ -1044,7 +1075,9 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) seq = syslog_seq; idx = syslog_idx; while (seq < log_next_seq) { - error += syslog_print_line(idx, NULL, 0); + struct log *msg = log_from_idx(idx); + + error += msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; } @@ -1439,10 +1472,8 @@ static struct log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } static char *log_text(const struct log *msg) { return NULL; } static void call_console_drivers(int level, const char *text, size_t len) {} -static size_t prepend_timestamp(unsigned long long t, char *buf) -{ - return 0; -} +static size_t msg_print_text(const struct log *msg, bool syslog, + char *buf, size_t size) { return 0; } #endif /* CONFIG_PRINTK */ @@ -1750,7 +1781,7 @@ again: for (;;) { struct log *msg; static char text[LOG_LINE_MAX]; - size_t len, l; + size_t len; int level; raw_spin_lock_irqsave(&logbuf_lock, flags); @@ -1771,13 +1802,7 @@ again: msg = log_from_idx(console_idx); level = msg->level & 7; - len = prepend_timestamp(msg->ts_nsec, text); - l = msg->text_len; - if (len + l + 1 >= sizeof(text)) - l = sizeof(text) - len - 1; - memcpy(text + len, log_text(msg), l); - len += l; - text[len++] = '\n'; + len = msg_print_text(msg, false, text, sizeof(text)); console_idx = log_next(console_idx); console_seq++; -- cgit v1.2.3 From c313af145b9bc4fb8e8e0c83b8cfc10e1b894a50 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 14 May 2012 20:46:27 +0200 Subject: printk() - isolate KERN_CONT users from ordinary complete lines Arrange the continuation printk() buffering to be fully separated from the ordinary full line users. Limit the exposure to races and wrong printk() line merges to users of continuation only. Ordinary full line users racing against continuation users will no longer affect each other. Multiple continuation users from different threads, racing against each other will not wrongly be merged into a single line, but printed as separate lines. Test output of a kernel module which starts two separate threads which race against each other, one of them printing a single full terminated line: printk("(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)\n"); The other one printing the line, every character separate in a continuation loop: printk("(C"); for (i = 0; i < 58; i++) printk(KERN_CONT "C"); printk(KERN_CONT "C)\n"); Behavior of single and non-thread-aware printk() buffer: # modprobe printk-race printk test init (CC(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) C(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) CC(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) C(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) CC(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) C(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) C(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) CC(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) C(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) C(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC) (CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC) New behavior with separate and thread-aware continuation buffer: # modprobe printk-race printk test init (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC) (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) (CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC) (CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC) Cc: Linus Torvalds Cc: Joe Perches Cc: Ted Ts'o Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Sasha Levin Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 105 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 44 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 915a8be10b5f..32462d2b364a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1264,13 +1264,13 @@ asmlinkage int vprintk_emit(int facility, int level, const char *fmt, va_list args) { static int recursion_bug; - static char buf[LOG_LINE_MAX]; - static size_t buflen; - static int buflevel; + static char cont_buf[LOG_LINE_MAX]; + static size_t cont_len; + static int cont_level; + static struct task_struct *cont_task; static char textbuf[LOG_LINE_MAX]; - static struct task_struct *cont; char *text = textbuf; - size_t textlen; + size_t text_len; unsigned long flags; int this_cpu; bool newline = false; @@ -1320,15 +1320,15 @@ asmlinkage int vprintk_emit(int facility, int level, * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. */ - textlen = vscnprintf(text, sizeof(textbuf), fmt, args); + text_len = vscnprintf(text, sizeof(textbuf), fmt, args); /* mark and strip a trailing newline */ - if (textlen && text[textlen-1] == '\n') { - textlen--; + if (text_len && text[text_len-1] == '\n') { + text_len--; newline = true; } - /* strip syslog prefix and extract log level or flags */ + /* strip syslog prefix and extract log level or control flags */ if (text[0] == '<' && text[1] && text[2] == '>') { switch (text[1]) { case '0' ... '7': @@ -1338,49 +1338,67 @@ asmlinkage int vprintk_emit(int facility, int level, prefix = true; case 'c': /* KERN_CONT */ text += 3; - textlen -= 3; + text_len -= 3; } } - if (buflen && (prefix || dict || cont != current)) { - /* flush existing buffer */ - log_store(facility, buflevel, NULL, 0, buf, buflen); - printed_len += buflen; - buflen = 0; - } + if (level == -1) + level = default_message_loglevel; - if (buflen == 0) { - /* remember level for first message in the buffer */ - if (level == -1) - buflevel = default_message_loglevel; - else - buflevel = level; + if (dict) { + prefix = true; + newline = true; } - if (buflen || !newline) { - /* append to existing buffer, or buffer until next message */ - if (buflen + textlen > sizeof(buf)) - textlen = sizeof(buf) - buflen; - memcpy(buf + buflen, text, textlen); - buflen += textlen; - } + if (!newline) { + if (cont_len && (prefix || cont_task != current)) { + /* + * Flush earlier buffer, which is either from a + * different thread, or when we got a new prefix. + */ + log_store(facility, cont_level, NULL, 0, cont_buf, cont_len); + cont_len = 0; + } - if (newline) { - /* end of line; flush buffer */ - if (buflen) { - log_store(facility, buflevel, - dict, dictlen, buf, buflen); - printed_len += buflen; - buflen = 0; - } else { - log_store(facility, buflevel, - dict, dictlen, text, textlen); - printed_len += textlen; + if (!cont_len) { + cont_level = level; + cont_task = current; } - cont = NULL; + + /* buffer or append to earlier buffer from the same thread */ + if (cont_len + text_len > sizeof(cont_buf)) + text_len = sizeof(cont_buf) - cont_len; + memcpy(cont_buf + cont_len, text, text_len); + cont_len += text_len; } else { - /* remember thread which filled the buffer */ - cont = current; + if (cont_len && cont_task == current) { + if (prefix) { + /* + * New prefix from the same thread; flush. We + * either got no earlier newline, or we race + * with an interrupt. + */ + log_store(facility, cont_level, + NULL, 0, cont_buf, cont_len); + cont_len = 0; + } + + /* append to the earlier buffer and flush */ + if (cont_len + text_len > sizeof(cont_buf)) + text_len = sizeof(cont_buf) - cont_len; + memcpy(cont_buf + cont_len, text, text_len); + cont_len += text_len; + log_store(facility, cont_level, + NULL, 0, cont_buf, cont_len); + cont_len = 0; + cont_task = NULL; + printed_len = cont_len; + } else { + /* ordinary single and terminated line */ + log_store(facility, level, + dict, dictlen, text, text_len); + printed_len = text_len; + } } /* @@ -1470,7 +1488,6 @@ EXPORT_SYMBOL(printk); #define LOG_LINE_MAX 0 static struct log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } -static char *log_text(const struct log *msg) { return NULL; } static void call_console_drivers(int level, const char *text, size_t len) {} static size_t msg_print_text(const struct log *msg, bool syslog, char *buf, size_t size) { return 0; } -- cgit v1.2.3