diff options
Diffstat (limited to 'kernel')
48 files changed, 2525 insertions, 607 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 298437bb2c6a..33824f0385b3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -127,7 +127,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz -cmd_genikh = $(srctree)/kernel/gen_ikh_data.sh $@ +cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_ikh_data.sh $@ $(obj)/kheaders_data.tar.xz: FORCE $(call cmd,genikh) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 327f37c9fdfa..217cec4e22c6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3540,17 +3540,84 @@ static int cpu_stat_show(struct seq_file *seq, void *v) #ifdef CONFIG_PSI static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { - return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_IO); + struct cgroup *cgroup = seq_css(seq)->cgroup; + struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi; + + return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { - return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_MEM); + struct cgroup *cgroup = seq_css(seq)->cgroup; + struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi; + + return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { - return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_CPU); + struct cgroup *cgroup = seq_css(seq)->cgroup; + struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi; + + return psi_show(seq, psi, PSI_CPU); } -#endif + +static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, enum psi_res res) +{ + struct psi_trigger *new; + struct cgroup *cgrp; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENODEV; + + cgroup_get(cgrp); + cgroup_kn_unlock(of->kn); + + new = psi_trigger_create(&cgrp->psi, buf, nbytes, res); + if (IS_ERR(new)) { + cgroup_put(cgrp); + return PTR_ERR(new); + } + + psi_trigger_replace(&of->priv, new); + + cgroup_put(cgrp); + + return nbytes; +} + +static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + return cgroup_pressure_write(of, buf, nbytes, PSI_IO); +} + +static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + return cgroup_pressure_write(of, buf, nbytes, PSI_MEM); +} + +static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + return cgroup_pressure_write(of, buf, nbytes, PSI_CPU); +} + +static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, + poll_table *pt) +{ + return psi_trigger_poll(&of->priv, of->file, pt); +} + +static void cgroup_pressure_release(struct kernfs_open_file *of) +{ + psi_trigger_replace(&of->priv, NULL); +} +#endif /* CONFIG_PSI */ static int cgroup_freeze_show(struct seq_file *seq, void *v) { @@ -4743,20 +4810,26 @@ static struct cftype cgroup_base_files[] = { #ifdef CONFIG_PSI { .name = "io.pressure", - .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_io_pressure_show, + .write = cgroup_io_pressure_write, + .poll = cgroup_pressure_poll, + .release = cgroup_pressure_release, }, { .name = "memory.pressure", - .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_memory_pressure_show, + .write = cgroup_memory_pressure_write, + .poll = cgroup_pressure_poll, + .release = cgroup_pressure_release, }, { .name = "cpu.pressure", - .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_cpu_pressure_show, + .write = cgroup_cpu_pressure_write, + .poll = cgroup_pressure_poll, + .release = cgroup_pressure_release, }, -#endif +#endif /* CONFIG_PSI */ { } /* terminate */ }; diff --git a/kernel/compat.c b/kernel/compat.c index d8a36c6ad7c9..b5f7063c0db6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -346,8 +346,11 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat) return -EFAULT; switch (_NSIG_WORDS) { case 4: set->sig[3] = v.sig[6] | (((long)v.sig[7]) << 32 ); + /* fall through */ case 3: set->sig[2] = v.sig[4] | (((long)v.sig[5]) << 32 ); + /* fall through */ case 2: set->sig[1] = v.sig[2] | (((long)v.sig[3]) << 32 ); + /* fall through */ case 1: set->sig[0] = v.sig[0] | (((long)v.sig[1]) << 32 ); } #else diff --git a/kernel/exit.c b/kernel/exit.c index 2166c2d92ddc..8361a560cd1d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -422,7 +422,7 @@ retry: * freed task structure. */ if (atomic_read(&mm->mm_users) <= 1) { - mm->owner = NULL; + WRITE_ONCE(mm->owner, NULL); return; } @@ -462,7 +462,7 @@ retry: * most likely racing with swapoff (try_to_unuse()) or /proc or * ptrace or page migration (get_task_mm()). Mark owner as NULL. */ - mm->owner = NULL; + WRITE_ONCE(mm->owner, NULL); return; assign_new_owner: @@ -483,7 +483,7 @@ assign_new_owner: put_task_struct(c); goto retry; } - mm->owner = c; + WRITE_ONCE(mm->owner, c); task_unlock(c); put_task_struct(c); } diff --git a/kernel/fork.c b/kernel/fork.c index 737db1828437..b4cba953040a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -955,6 +955,15 @@ static void mm_init_aio(struct mm_struct *mm) #endif } +static __always_inline void mm_clear_owner(struct mm_struct *mm, + struct task_struct *p) +{ +#ifdef CONFIG_MEMCG + if (mm->owner == p) + WRITE_ONCE(mm->owner, NULL); +#endif +} + static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { #ifdef CONFIG_MEMCG @@ -1343,6 +1352,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk, free_pt: /* don't put binfmt in mmput, we haven't got module yet */ mm->binfmt = NULL; + mm_init_owner(mm, NULL); mmput(mm); fail_nomem: @@ -1726,6 +1736,21 @@ static int pidfd_create(struct pid *pid) return fd; } +static void __delayed_free_task(struct rcu_head *rhp) +{ + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + free_task(tsk); +} + +static __always_inline void delayed_free_task(struct task_struct *tsk) +{ + if (IS_ENABLED(CONFIG_MEMCG)) + call_rcu(&tsk->rcu, __delayed_free_task); + else + free_task(tsk); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2068,7 +2093,7 @@ static __latent_entropy struct task_struct *copy_process( #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif - clear_all_latency_tracing(p); + clear_tsk_latency_tracing(p); /* ok, now we should be set up.. */ p->pid = pid_nr(pid); @@ -2233,8 +2258,10 @@ bad_fork_cleanup_io: bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: - if (p->mm) + if (p->mm) { + mm_clear_owner(p->mm, p); mmput(p->mm); + } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); @@ -2265,7 +2292,7 @@ bad_fork_cleanup_count: bad_fork_free: p->state = TASK_DEAD; put_task_stack(p); - free_task(p); + delayed_free_task(p); fork_out: spin_lock_irq(¤t->sighand->siglock); hlist_del_init(&delayed.node); diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 1e3823fa799b..f71c1adcff31 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -53,6 +53,7 @@ config GCOV_PROFILE_ALL choice prompt "Specify GCOV format" depends on GCOV_KERNEL + depends on CC_IS_GCC ---help--- The gcov format is usually determined by the GCC version, and the default is chosen according to your GCC version. However, there are @@ -62,7 +63,7 @@ choice config GCOV_FORMAT_3_4 bool "GCC 3.4 format" - depends on CC_IS_GCC && GCC_VERSION < 40700 + depends on GCC_VERSION < 40700 ---help--- Select this option to use the format defined by GCC 3.4. diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index ff06d64df397..d66a74b0f100 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -2,5 +2,6 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' obj-y := base.o fs.o -obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o -obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o +obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_base.o gcc_3_4.o +obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_base.o gcc_4_7.o +obj-$(CONFIG_CC_IS_CLANG) += clang.o diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 9c7c8d5c18f2..0ffe9f194080 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -22,88 +22,8 @@ #include <linux/sched.h> #include "gcov.h" -static int gcov_events_enabled; -static DEFINE_MUTEX(gcov_lock); - -/* - * __gcov_init is called by gcc-generated constructor code for each object - * file compiled with -fprofile-arcs. - */ -void __gcov_init(struct gcov_info *info) -{ - static unsigned int gcov_version; - - mutex_lock(&gcov_lock); - if (gcov_version == 0) { - gcov_version = gcov_info_version(info); - /* - * Printing gcc's version magic may prove useful for debugging - * incompatibility reports. - */ - pr_info("version magic: 0x%x\n", gcov_version); - } - /* - * Add new profiling data structure to list and inform event - * listener. - */ - gcov_info_link(info); - if (gcov_events_enabled) - gcov_event(GCOV_ADD, info); - mutex_unlock(&gcov_lock); -} -EXPORT_SYMBOL(__gcov_init); - -/* - * These functions may be referenced by gcc-generated profiling code but serve - * no function for kernel profiling. - */ -void __gcov_flush(void) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_flush); - -void __gcov_merge_add(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_add); - -void __gcov_merge_single(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_single); - -void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_delta); - -void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_ior); - -void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_time_profile); - -void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_icall_topn); - -void __gcov_exit(void) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_exit); +int gcov_events_enabled; +DEFINE_MUTEX(gcov_lock); /** * gcov_enable_events - enable event reporting through gcov_event() @@ -144,7 +64,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, /* Remove entries located in module from linked list. */ while ((info = gcov_info_next(info))) { - if (within_module((unsigned long)info, mod)) { + if (gcov_info_within_module(info, mod)) { gcov_info_unlink(prev, info); if (gcov_events_enabled) gcov_event(GCOV_REMOVE, info); diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c new file mode 100644 index 000000000000..c94b820a1b62 --- /dev/null +++ b/kernel/gcov/clang.c @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Google, Inc. + * modified from kernel/gcov/gcc_4_7.c + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * LLVM uses profiling data that's deliberately similar to GCC, but has a + * very different way of exporting that data. LLVM calls llvm_gcov_init() once + * per module, and provides a couple of callbacks that we can use to ask for + * more data. + * + * We care about the "writeout" callback, which in turn calls back into + * compiler-rt/this module to dump all the gathered coverage data to disk: + * + * llvm_gcda_start_file() + * llvm_gcda_emit_function() + * llvm_gcda_emit_arcs() + * llvm_gcda_emit_function() + * llvm_gcda_emit_arcs() + * [... repeats for each function ...] + * llvm_gcda_summary_info() + * llvm_gcda_end_file() + * + * This design is much more stateless and unstructured than gcc's, and is + * intended to run at process exit. This forces us to keep some local state + * about which module we're dealing with at the moment. On the other hand, it + * also means we don't depend as much on how LLVM represents profiling data + * internally. + * + * See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more + * details on how this works, particularly GCOVProfiler::emitProfileArcs(), + * GCOVProfiler::insertCounterWriteout(), and + * GCOVProfiler::insertFlush(). + */ + +#define pr_fmt(fmt) "gcov: " fmt + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/printk.h> +#include <linux/ratelimit.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include "gcov.h" + +typedef void (*llvm_gcov_callback)(void); + +struct gcov_info { + struct list_head head; + + const char *filename; + unsigned int version; + u32 checksum; + + struct list_head functions; +}; + +struct gcov_fn_info { + struct list_head head; + + u32 ident; + u32 checksum; + u8 use_extra_checksum; + u32 cfg_checksum; + + u32 num_counters; + u64 *counters; + const char *function_name; +}; + +static struct gcov_info *current_info; + +static LIST_HEAD(clang_gcov_list); + +void llvm_gcov_init(llvm_gcov_callback writeout, llvm_gcov_callback flush) +{ + struct gcov_info *info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return; + + INIT_LIST_HEAD(&info->head); + INIT_LIST_HEAD(&info->functions); + + mutex_lock(&gcov_lock); + + list_add_tail(&info->head, &clang_gcov_list); + current_info = info; + writeout(); + current_info = NULL; + if (gcov_events_enabled) + gcov_event(GCOV_ADD, info); + + mutex_unlock(&gcov_lock); +} +EXPORT_SYMBOL(llvm_gcov_init); + +void llvm_gcda_start_file(const char *orig_filename, const char version[4], + u32 checksum) +{ + current_info->filename = orig_filename; + memcpy(¤t_info->version, version, sizeof(current_info->version)); + current_info->checksum = checksum; +} +EXPORT_SYMBOL(llvm_gcda_start_file); + +void llvm_gcda_emit_function(u32 ident, const char *function_name, + u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum) +{ + struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return; + + INIT_LIST_HEAD(&info->head); + info->ident = ident; + info->checksum = func_checksum; + info->use_extra_checksum = use_extra_checksum; + info->cfg_checksum = cfg_checksum; + if (function_name) + info->function_name = kstrdup(function_name, GFP_KERNEL); + + list_add_tail(&info->head, ¤t_info->functions); +} +EXPORT_SYMBOL(llvm_gcda_emit_function); + +void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters) +{ + struct gcov_fn_info *info = list_last_entry(¤t_info->functions, + struct gcov_fn_info, head); + + info->num_counters = num_counters; + info->counters = counters; +} +EXPORT_SYMBOL(llvm_gcda_emit_arcs); + +void llvm_gcda_summary_info(void) +{ +} +EXPORT_SYMBOL(llvm_gcda_summary_info); + +void llvm_gcda_end_file(void) +{ +} +EXPORT_SYMBOL(llvm_gcda_end_file); + +/** + * gcov_info_filename - return info filename + * @info: profiling data set + */ +const char *gcov_info_filename(struct gcov_info *info) +{ + return info->filename; +} + +/** + * gcov_info_version - return info version + * @info: profiling data set + */ +unsigned int gcov_info_version(struct gcov_info *info) +{ + return info->version; +} + +/** + * gcov_info_next - return next profiling data set + * @info: profiling data set + * + * Returns next gcov_info following @info or first gcov_info in the chain if + * @info is %NULL. + */ +struct gcov_info *gcov_info_next(struct gcov_info *info) +{ + if (!info) + return list_first_entry_or_null(&clang_gcov_list, + struct gcov_info, head); + if (list_is_last(&info->head, &clang_gcov_list)) + return NULL; + return list_next_entry(info, head); +} + +/** + * gcov_info_link - link/add profiling data set to the list + * @info: profiling data set + */ +void gcov_info_link(struct gcov_info *info) +{ + list_add_tail(&info->head, &clang_gcov_list); +} + +/** + * gcov_info_unlink - unlink/remove profiling data set from the list + * @prev: previous profiling data set + * @info: profiling data set + */ +void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) +{ + /* Generic code unlinks while iterating. */ + __list_del_entry(&info->head); +} + +/** + * gcov_info_within_module - check if a profiling data set belongs to a module + * @info: profiling data set + * @mod: module + * + * Returns true if profiling data belongs module, false otherwise. + */ +bool gcov_info_within_module(struct gcov_info *info, struct module *mod) +{ + return within_module((unsigned long)info->filename, mod); +} + +/* Symbolic links to be created for each profiling data file. */ +const struct gcov_link gcov_link[] = { + { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ + { 0, NULL}, +}; + +/** + * gcov_info_reset - reset profiling data to zero + * @info: profiling data set + */ +void gcov_info_reset(struct gcov_info *info) +{ + struct gcov_fn_info *fn; + + list_for_each_entry(fn, &info->functions, head) + memset(fn->counters, 0, + sizeof(fn->counters[0]) * fn->num_counters); +} + +/** + * gcov_info_is_compatible - check if profiling data can be added + * @info1: first profiling data set + * @info2: second profiling data set + * + * Returns non-zero if profiling data can be added, zero otherwise. + */ +int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2) +{ + struct gcov_fn_info *fn_ptr1 = list_first_entry_or_null( + &info1->functions, struct gcov_fn_info, head); + struct gcov_fn_info *fn_ptr2 = list_first_entry_or_null( + &info2->functions, struct gcov_fn_info, head); + + if (info1->checksum != info2->checksum) + return false; + if (!fn_ptr1) + return fn_ptr1 == fn_ptr2; + while (!list_is_last(&fn_ptr1->head, &info1->functions) && + !list_is_last(&fn_ptr2->head, &info2->functions)) { + if (fn_ptr1->checksum != fn_ptr2->checksum) + return false; + if (fn_ptr1->use_extra_checksum != fn_ptr2->use_extra_checksum) + return false; + if (fn_ptr1->use_extra_checksum && + fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum) + return false; + fn_ptr1 = list_next_entry(fn_ptr1, head); + fn_ptr2 = list_next_entry(fn_ptr2, head); + } + return list_is_last(&fn_ptr1->head, &info1->functions) && + list_is_last(&fn_ptr2->head, &info2->functions); +} + +/** + * gcov_info_add - add up profiling data + * @dest: profiling data set to which data is added + * @source: profiling data set which is added + * + * Adds profiling counts of @source to @dest. + */ +void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) +{ + struct gcov_fn_info *dfn_ptr; + struct gcov_fn_info *sfn_ptr = list_first_entry_or_null(&src->functions, + struct gcov_fn_info, head); + + list_for_each_entry(dfn_ptr, &dst->functions, head) { + u32 i; + + for (i = 0; i < sfn_ptr->num_counters; i++) + dfn_ptr->counters[i] += sfn_ptr->counters[i]; + } +} + +static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) +{ + size_t cv_size; /* counter values size */ + struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn), + GFP_KERNEL); + if (!fn_dup) + return NULL; + INIT_LIST_HEAD(&fn_dup->head); + + fn_dup->function_name = kstrdup(fn->function_name, GFP_KERNEL); + if (!fn_dup->function_name) + goto err_name; + + cv_size = fn->num_counters * sizeof(fn->counters[0]); + fn_dup->counters = vmalloc(cv_size); + if (!fn_dup->counters) + goto err_counters; + memcpy(fn_dup->counters, fn->counters, cv_size); + + return fn_dup; + +err_counters: + kfree(fn_dup->function_name); +err_name: + kfree(fn_dup); + return NULL; +} + +/** + * gcov_info_dup - duplicate profiling data set + * @info: profiling data set to duplicate + * + * Return newly allocated duplicate on success, %NULL on error. + */ +struct gcov_info *gcov_info_dup(struct gcov_info *info) +{ + struct gcov_info *dup; + struct gcov_fn_info *fn; + + dup = kmemdup(info, sizeof(*dup), GFP_KERNEL); + if (!dup) + return NULL; + INIT_LIST_HEAD(&dup->head); + INIT_LIST_HEAD(&dup->functions); + dup->filename = kstrdup(info->filename, GFP_KERNEL); + if (!dup->filename) + goto err; + + list_for_each_entry(fn, &info->functions, head) { + struct gcov_fn_info *fn_dup = gcov_fn_info_dup(fn); + + if (!fn_dup) + goto err; + list_add_tail(&fn_dup->head, &dup->functions); + } + + return dup; + +err: + gcov_info_free(dup); + return NULL; +} + +/** + * gcov_info_free - release memory for profiling data set duplicate + * @info: profiling data set duplicate to free + */ +void gcov_info_free(struct gcov_info *info) +{ + struct gcov_fn_info *fn, *tmp; + + list_for_each_entry_safe(fn, tmp, &info->functions, head) { + kfree(fn->function_name); + vfree(fn->counters); + list_del(&fn->head); + kfree(fn); + } + kfree(info->filename); + kfree(info); +} + +#define ITER_STRIDE PAGE_SIZE + +/** + * struct gcov_iterator - specifies current file position in logical records + * @info: associated profiling data + * @buffer: buffer containing file data + * @size: size of buffer + * @pos: current position in file + */ +struct gcov_iterator { + struct gcov_info *info; + void *buffer; + size_t size; + loff_t pos; +}; + +/** + * store_gcov_u32 - store 32 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't + * store anything. + */ +static size_t store_gcov_u32(void *buffer, size_t off, u32 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + *data = v; + } + + return sizeof(*data); +} + +/** + * store_gcov_u64 - store 64 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. 64 bit numbers are stored as two 32 bit numbers, the low part + * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store + * anything. + */ +static size_t store_gcov_u64(void *buffer, size_t off, u64 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + + data[0] = (v & 0xffffffffUL); + data[1] = (v >> 32); + } + + return sizeof(*data) * 2; +} + +/** + * convert_to_gcda - convert profiling data set to gcda file format + * @buffer: the buffer to store file data or %NULL if no data should be stored + * @info: profiling data set to be converted + * + * Returns the number of bytes that were/would have been stored into the buffer. + */ +static size_t convert_to_gcda(char *buffer, struct gcov_info *info) +{ + struct gcov_fn_info *fi_ptr; + size_t pos = 0; + + /* File header. */ + pos += store_gcov_u32(buffer, pos, GCOV_DATA_MAGIC); + pos += store_gcov_u32(buffer, pos, info->version); + pos += store_gcov_u32(buffer, pos, info->checksum); + + list_for_each_entry(fi_ptr, &info->functions, head) { + u32 i; + u32 len = 2; + + if (fi_ptr->use_extra_checksum) + len++; + + pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION); + pos += store_gcov_u32(buffer, pos, len); + pos += store_gcov_u32(buffer, pos, fi_ptr->ident); + pos += store_gcov_u32(buffer, pos, fi_ptr->checksum); + if (fi_ptr->use_extra_checksum) + pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); + + pos += store_gcov_u32(buffer, pos, GCOV_TAG_COUNTER_BASE); + pos += store_gcov_u32(buffer, pos, fi_ptr->num_counters * 2); + for (i = 0; i < fi_ptr->num_counters; i++) + pos += store_gcov_u64(buffer, pos, fi_ptr->counters[i]); + } + + return pos; +} + +/** + * gcov_iter_new - allocate and initialize profiling data iterator + * @info: profiling data set to be iterated + * + * Return file iterator on success, %NULL otherwise. + */ +struct gcov_iterator *gcov_iter_new(struct gcov_info *info) +{ + struct gcov_iterator *iter; + + iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); + if (!iter) + goto err_free; + + iter->info = info; + /* Dry-run to get the actual buffer size. */ + iter->size = convert_to_gcda(NULL, info); + iter->buffer = vmalloc(iter->size); + if (!iter->buffer) + goto err_free; + + convert_to_gcda(iter->buffer, info); + + return iter; + +err_free: + kfree(iter); + return NULL; +} + + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +void gcov_iter_free(struct gcov_iterator *iter) +{ + vfree(iter->buffer); + kfree(iter); +} + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) +{ + return iter->info; +} + +/** + * gcov_iter_start - reset file iterator to starting position + * @iter: file iterator + */ +void gcov_iter_start(struct gcov_iterator *iter) +{ + iter->pos = 0; +} + +/** + * gcov_iter_next - advance file iterator to next logical record + * @iter: file iterator + * + * Return zero if new position is valid, non-zero if iterator has reached end. + */ +int gcov_iter_next(struct gcov_iterator *iter) +{ + if (iter->pos < iter->size) + iter->pos += ITER_STRIDE; + + if (iter->pos >= iter->size) + return -EINVAL; + + return 0; +} + +/** + * gcov_iter_write - write data for current pos to seq_file + * @iter: file iterator + * @seq: seq_file handle + * + * Return zero on success, non-zero otherwise. + */ +int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) +{ + size_t len; + + if (iter->pos >= iter->size) + return -EINVAL; + + len = ITER_STRIDE; + if (iter->pos + len > iter->size) + len = iter->size - iter->pos; + + seq_write(seq, iter->buffer + iter->pos, len); + + return 0; +} diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c index 2dddecbdbe6e..801ee4b0b969 100644 --- a/kernel/gcov/gcc_3_4.c +++ b/kernel/gcov/gcc_3_4.c @@ -137,6 +137,18 @@ void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) gcov_info_head = info->next; } +/** + * gcov_info_within_module - check if a profiling data set belongs to a module + * @info: profiling data set + * @mod: module + * + * Returns true if profiling data belongs module, false otherwise. + */ +bool gcov_info_within_module(struct gcov_info *info, struct module *mod) +{ + return within_module((unsigned long)info, mod); +} + /* Symbolic links to be created for each profiling data file. */ const struct gcov_link gcov_link[] = { { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index ca5e5c0ef853..ec37563674d6 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -150,6 +150,18 @@ void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) gcov_info_head = info->next; } +/** + * gcov_info_within_module - check if a profiling data set belongs to a module + * @info: profiling data set + * @mod: module + * + * Returns true if profiling data belongs module, false otherwise. + */ +bool gcov_info_within_module(struct gcov_info *info, struct module *mod) +{ + return within_module((unsigned long)info, mod); +} + /* Symbolic links to be created for each profiling data file. */ const struct gcov_link gcov_link[] = { { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ diff --git a/kernel/gcov/gcc_base.c b/kernel/gcov/gcc_base.c new file mode 100644 index 000000000000..3cf736b9f880 --- /dev/null +++ b/kernel/gcov/gcc_base.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include "gcov.h" + +/* + * __gcov_init is called by gcc-generated constructor code for each object + * file compiled with -fprofile-arcs. + */ +void __gcov_init(struct gcov_info *info) +{ + static unsigned int gcov_version; + + mutex_lock(&gcov_lock); + if (gcov_version == 0) { + gcov_version = gcov_info_version(info); + /* + * Printing gcc's version magic may prove useful for debugging + * incompatibility reports. + */ + pr_info("version magic: 0x%x\n", gcov_version); + } + /* + * Add new profiling data structure to list and inform event + * listener. + */ + gcov_info_link(info); + if (gcov_events_enabled) + gcov_event(GCOV_ADD, info); + mutex_unlock(&gcov_lock); +} +EXPORT_SYMBOL(__gcov_init); + +/* + * These functions may be referenced by gcc-generated profiling code but serve + * no function for kernel profiling. + */ +void __gcov_flush(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_flush); + +void __gcov_merge_add(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_add); + +void __gcov_merge_single(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_single); + +void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_delta); + +void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_ior); + +void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_time_profile); + +void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_icall_topn); + +void __gcov_exit(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_exit); diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h index de118ad4a024..6ab2c1808c9d 100644 --- a/kernel/gcov/gcov.h +++ b/kernel/gcov/gcov.h @@ -15,6 +15,7 @@ #ifndef GCOV_H #define GCOV_H GCOV_H +#include <linux/module.h> #include <linux/types.h> /* @@ -46,6 +47,7 @@ unsigned int gcov_info_version(struct gcov_info *info); struct gcov_info *gcov_info_next(struct gcov_info *info); void gcov_info_link(struct gcov_info *info); void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info); +bool gcov_info_within_module(struct gcov_info *info, struct module *mod); /* Base interface. */ enum gcov_action { @@ -83,4 +85,7 @@ struct gcov_link { }; extern const struct gcov_link gcov_link[]; +extern int gcov_events_enabled; +extern struct mutex gcov_lock; + #endif /* GCOV_H */ diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5f3e2baefca9..8fee06625c37 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -91,6 +91,9 @@ config GENERIC_MSI_IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY select GENERIC_MSI_IRQ +config IRQ_MSI_IOMMU + bool + config HANDLE_DOMAIN_IRQ bool diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 51128bea3846..29d6c7d070b4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1459,6 +1459,33 @@ int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) return -ENOSYS; } EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent); + +/** + * irq_chip_request_resources_parent - Request resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +int irq_chip_request_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + + if (data->chip->irq_request_resources) + return data->chip->irq_request_resources(data); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); + +/** + * irq_chip_release_resources_parent - Release resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_release_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_release_resources) + data->chip->irq_release_resources(data); +} +EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 9ed29e4a7dbf..a453e229f99c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1297,7 +1297,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, /** * __irq_domain_alloc_irqs - Allocate IRQs from domain * @domain: domain to allocate from - * @irq_base: allocate specified IRQ nubmer if irq_base >= 0 + * @irq_base: allocate specified IRQ number if irq_base >= 0 * @nr_irqs: number of IRQs to allocate * @node: NUMA node id for memory allocation * @arg: domain specific argument diff --git a/kernel/kthread.c b/kernel/kthread.c index 5942eeafb9ac..be4e8795561a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -11,6 +11,7 @@ #include <linux/kthread.h> #include <linux/completion.h> #include <linux/err.h> +#include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/unistd.h> #include <linux/file.h> diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 99a5b5f46dc5..871734ea2f04 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -67,13 +67,10 @@ static struct latency_record latency_record[MAXLR]; int latencytop_enabled; -void clear_all_latency_tracing(struct task_struct *p) +void clear_tsk_latency_tracing(struct task_struct *p) { unsigned long flags; - if (!latencytop_enabled) - return; - raw_spin_lock_irqsave(&latency_lock, flags); memset(&p->latency_record, 0, sizeof(p->latency_record)); p->latency_record_count = 0; @@ -96,9 +93,6 @@ account_global_scheduler_latency(struct task_struct *tsk, int firstnonnull = MAXLR + 1; int i; - if (!latencytop_enabled) - return; - /* skip kernel threads for now */ if (!tsk->mm) return; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index f6fbaff10e71..91cd519756d3 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -1208,14 +1208,6 @@ void klp_module_going(struct module *mod) static int __init klp_init(void) { - int ret; - - ret = klp_check_compiler_support(); - if (ret) { - pr_info("Your compiler is too old; turning off.\n"); - return -EINVAL; - } - klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj); if (!klp_root_kobj) return -ENOMEM; diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 6b3ee9948bf1..0b1f77957240 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, { struct rwsem_waiter *waiter, *tmp; long oldcount, woken = 0, adjustment = 0; + struct list_head wlist; /* * Take a peek at the queue head waiter such that we can determine @@ -188,18 +189,43 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * of the queue. We know that woken will be at least 1 as we accounted * for above. Note we increment the 'active part' of the count by the * number of readers before waking any processes up. + * + * We have to do wakeup in 2 passes to prevent the possibility that + * the reader count may be decremented before it is incremented. It + * is because the to-be-woken waiter may not have slept yet. So it + * may see waiter->task got cleared, finish its critical section and + * do an unlock before the reader count increment. + * + * 1) Collect the read-waiters in a separate list, count them and + * fully increment the reader count in rwsem. + * 2) For each waiters in the new list, clear waiter->task and + * put them into wake_q to be woken up later. */ - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { - struct task_struct *tsk; - + list_for_each_entry(waiter, &sem->wait_list, list) { if (waiter->type == RWSEM_WAITING_FOR_WRITE) break; woken++; - tsk = waiter->task; + } + list_cut_before(&wlist, &sem->wait_list, &waiter->list); + + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + lockevent_cond_inc(rwsem_wake_reader, woken); + if (list_empty(&sem->wait_list)) { + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; + } + + if (adjustment) + atomic_long_add(adjustment, &sem->count); + + /* 2nd pass */ + list_for_each_entry_safe(waiter, tmp, &wlist, list) { + struct task_struct *tsk; + tsk = waiter->task; get_task_struct(tsk); - list_del(&waiter->list); + /* * Ensure calling get_task_struct() before setting the reader * waiter to nil such that rwsem_down_read_failed() cannot @@ -213,16 +239,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, */ wake_q_add_safe(wake_q, tsk); } - - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; - lockevent_cond_inc(rwsem_wake_reader, woken); - if (list_empty(&sem->wait_list)) { - /* hit end of list above */ - adjustment -= RWSEM_WAITING_BIAS; - } - - if (adjustment) - atomic_long_add(adjustment, &sem->count); } /* diff --git a/kernel/notifier.c b/kernel/notifier.c index 6196af8a8223..bfc95b3e4235 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -22,6 +22,7 @@ static int notifier_chain_register(struct notifier_block **nl, struct notifier_block *n) { while ((*nl) != NULL) { + WARN_ONCE(((*nl) == n), "double register detected"); if (n->priority > (*nl)->priority) break; nl = &((*nl)->next); diff --git a/kernel/panic.c b/kernel/panic.c index c1fcaad337b7..b4543a31a495 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -51,6 +51,7 @@ EXPORT_SYMBOL_GPL(panic_timeout); #define PANIC_PRINT_TIMER_INFO 0x00000004 #define PANIC_PRINT_LOCK_INFO 0x00000008 #define PANIC_PRINT_FTRACE_INFO 0x00000010 +#define PANIC_PRINT_ALL_PRINTK_MSG 0x00000020 unsigned long panic_print; ATOMIC_NOTIFIER_HEAD(panic_notifier_list); @@ -134,6 +135,9 @@ EXPORT_SYMBOL(nmi_panic); static void panic_print_sys_info(void) { + if (panic_print & PANIC_PRINT_ALL_PRINTK_MSG) + console_flush_on_panic(CONSOLE_REPLAY_ALL); + if (panic_print & PANIC_PRINT_TASK_INFO) show_state(); @@ -277,7 +281,7 @@ void panic(const char *fmt, ...) * panic() is not being callled from OOPS. */ debug_locks_off(); - console_flush_on_panic(); + console_flush_on_panic(CONSOLE_FLUSH_PENDING); panic_print_sys_info(); @@ -306,6 +310,8 @@ void panic(const char *fmt, ...) * shutting down. But if there is a chance of * rebooting the system it will be rebooted. */ + if (panic_reboot_mode != REBOOT_UNDEFINED) + reboot_mode = panic_reboot_mode; emergency_restart(); } #ifdef __sparc__ @@ -321,6 +327,9 @@ void panic(const char *fmt, ...) disabled_wait(); #endif pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); + + /* Do not scroll important messages printed above */ + suppress_printk = 1; local_irq_enable(); for (i = 0; ; i += PANIC_TIMER_STEP) { touch_softlockup_watchdog(); diff --git a/kernel/pid.c b/kernel/pid.c index 20881598bdfa..89548d35eefb 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -32,7 +32,6 @@ #include <linux/init.h> #include <linux/rculist.h> #include <linux/memblock.h> -#include <linux/hash.h> #include <linux/pid_namespace.h> #include <linux/init_task.h> #include <linux/syscalls.h> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 02ca827b8fac..a6e06fe38e41 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -86,6 +86,12 @@ static DEFINE_SEMAPHORE(console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); +/* + * System may need to suppress printk message under certain + * circumstances, like after kernel panic happens. + */ +int __read_mostly suppress_printk; + #ifdef CONFIG_LOCKDEP static struct lockdep_map console_lock_dep_map = { .name = "console_lock" @@ -1943,6 +1949,10 @@ asmlinkage int vprintk_emit(int facility, int level, unsigned long flags; u64 curr_log_seq; + /* Suppress unimportant messages after panic happens */ + if (unlikely(suppress_printk)) + return 0; + if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; @@ -2525,10 +2535,11 @@ void console_unblank(void) /** * console_flush_on_panic - flush console content on panic + * @mode: flush all messages in buffer or just the pending ones * * Immediately output all pending messages no matter what. */ -void console_flush_on_panic(void) +void console_flush_on_panic(enum con_flush_mode mode) { /* * If someone else is holding the console lock, trylock will fail @@ -2539,6 +2550,15 @@ void console_flush_on_panic(void) */ console_trylock(); console_may_schedule = 0; + + if (mode == CONSOLE_REPLAY_ALL) { + unsigned long flags; + + logbuf_lock_irqsave(flags); + console_seq = log_first_seq; + console_idx = log_first_idx; + logbuf_unlock_irqrestore(flags); + } console_unlock(); } diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 4b58c907b4b7..390aab20115e 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -11,11 +11,6 @@ #define __LINUX_RCU_H #include <trace/events/rcu.h> -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(stmt) stmt -#else /* #ifdef CONFIG_RCU_TRACE */ -#define RCU_TRACE(stmt) -#endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ #define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) @@ -216,12 +211,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) rcu_lock_acquire(&rcu_callback_map); if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);) + trace_rcu_invoke_kfree_callback(rn, head, offset); kfree((void *)head - offset); rcu_lock_release(&rcu_callback_map); return true; } else { - RCU_TRACE(trace_rcu_invoke_callback(rn, head);) + trace_rcu_invoke_callback(rn, head); f = head->func; WRITE_ONCE(head->func, (rcu_callback_t)0L); f(head); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b4d88a594785..980ca3ca643f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1969,14 +1969,14 @@ rcu_check_quiescent_state(struct rcu_data *rdp) */ int rcutree_dying_cpu(unsigned int cpu) { - RCU_TRACE(bool blkd;) - RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(&rcu_data);) - RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) + bool blkd; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp = rdp->mynode; if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return 0; - RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);) + blkd = !!(rnp->qsmask & rdp->grpmask); trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); return 0; diff --git a/kernel/reboot.c b/kernel/reboot.c index e1b79b6a2735..b9e79e8c7226 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(cad_pid); #define DEFAULT_REBOOT_MODE #endif enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; +enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED; /* * This variable is used privately to keep track of whether or not @@ -519,6 +520,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot); static int __init reboot_setup(char *str) { for (;;) { + enum reboot_mode *mode; + /* * Having anything passed on the command line via * reboot= will cause us to disable DMI checking @@ -526,17 +529,24 @@ static int __init reboot_setup(char *str) */ reboot_default = 0; + if (!strncmp(str, "panic_", 6)) { + mode = &panic_reboot_mode; + str += 6; + } else { + mode = &reboot_mode; + } + switch (*str) { case 'w': - reboot_mode = REBOOT_WARM; + *mode = REBOOT_WARM; break; case 'c': - reboot_mode = REBOOT_COLD; + *mode = REBOOT_COLD; break; case 'h': - reboot_mode = REBOOT_HARD; + *mode = REBOOT_HARD; break; case 's': @@ -553,11 +563,11 @@ static int __init reboot_setup(char *str) if (rc) return rc; } else - reboot_mode = REBOOT_SOFT; + *mode = REBOOT_SOFT; break; } case 'g': - reboot_mode = REBOOT_GPIO; + *mode = REBOOT_GPIO; break; case 'b': diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 0e97ca9306ef..7acc632c3b82 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -4,6 +4,9 @@ * Copyright (c) 2018 Facebook, Inc. * Author: Johannes Weiner <hannes@cmpxchg.org> * + * Polling support by Suren Baghdasaryan <surenb@google.com> + * Copyright (c) 2018 Google, Inc. + * * When CPU, memory and IO are contended, tasks experience delays that * reduce throughput and introduce latencies into the workload. Memory * and IO contention, in addition, can cause a full loss of forward @@ -129,9 +132,13 @@ #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/seqlock.h> +#include <linux/uaccess.h> #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/ctype.h> +#include <linux/file.h> +#include <linux/poll.h> #include <linux/psi.h> #include "sched.h" @@ -140,9 +147,9 @@ static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); #ifdef CONFIG_PSI_DEFAULT_DISABLED -bool psi_enable; +static bool psi_enable; #else -bool psi_enable = true; +static bool psi_enable = true; #endif static int __init setup_psi(char *str) { @@ -156,16 +163,21 @@ __setup("psi=", setup_psi); #define EXP_60s 1981 /* 1/exp(2s/60s) */ #define EXP_300s 2034 /* 1/exp(2s/300s) */ +/* PSI trigger definitions */ +#define WINDOW_MIN_US 500000 /* Min window size is 500ms */ +#define WINDOW_MAX_US 10000000 /* Max window size is 10s */ +#define UPDATES_PER_WINDOW 10 /* 10 updates per window */ + /* Sampling frequency in nanoseconds */ static u64 psi_period __read_mostly; /* System-level pressure and stall tracking */ static DEFINE_PER_CPU(struct psi_group_cpu, system_group_pcpu); -static struct psi_group psi_system = { +struct psi_group psi_system = { .pcpu = &system_group_pcpu, }; -static void psi_update_work(struct work_struct *work); +static void psi_avgs_work(struct work_struct *work); static void group_init(struct psi_group *group) { @@ -173,9 +185,20 @@ static void group_init(struct psi_group *group) for_each_possible_cpu(cpu) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); - group->next_update = sched_clock() + psi_period; - INIT_DELAYED_WORK(&group->clock_work, psi_update_work); - mutex_init(&group->stat_lock); + group->avg_next_update = sched_clock() + psi_period; + INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); + mutex_init(&group->avgs_lock); + /* Init trigger-related members */ + atomic_set(&group->poll_scheduled, 0); + mutex_init(&group->trigger_lock); + INIT_LIST_HEAD(&group->triggers); + memset(group->nr_triggers, 0, sizeof(group->nr_triggers)); + group->poll_states = 0; + group->poll_min_period = U32_MAX; + memset(group->polling_total, 0, sizeof(group->polling_total)); + group->polling_next_update = ULLONG_MAX; + group->polling_until = 0; + rcu_assign_pointer(group->poll_kworker, NULL); } void __init psi_init(void) @@ -210,20 +233,24 @@ static bool test_state(unsigned int *tasks, enum psi_states state) } } -static void get_recent_times(struct psi_group *group, int cpu, u32 *times) +static void get_recent_times(struct psi_group *group, int cpu, + enum psi_aggregators aggregator, u32 *times, + u32 *pchanged_states) { struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu); - unsigned int tasks[NR_PSI_TASK_COUNTS]; u64 now, state_start; + enum psi_states s; unsigned int seq; - int s; + u32 state_mask; + + *pchanged_states = 0; /* Snapshot a coherent view of the CPU state */ do { seq = read_seqcount_begin(&groupc->seq); now = cpu_clock(cpu); memcpy(times, groupc->times, sizeof(groupc->times)); - memcpy(tasks, groupc->tasks, sizeof(groupc->tasks)); + state_mask = groupc->state_mask; state_start = groupc->state_start; } while (read_seqcount_retry(&groupc->seq, seq)); @@ -239,13 +266,15 @@ static void get_recent_times(struct psi_group *group, int cpu, u32 *times) * (u32) and our reported pressure close to what's * actually happening. */ - if (test_state(tasks, s)) + if (state_mask & (1 << s)) times[s] += now - state_start; - delta = times[s] - groupc->times_prev[s]; - groupc->times_prev[s] = times[s]; + delta = times[s] - groupc->times_prev[aggregator][s]; + groupc->times_prev[aggregator][s] = times[s]; times[s] = delta; + if (delta) + *pchanged_states |= (1 << s); } } @@ -269,17 +298,16 @@ static void calc_avgs(unsigned long avg[3], int missed_periods, avg[2] = calc_load(avg[2], EXP_300s, pct); } -static bool update_stats(struct psi_group *group) +static void collect_percpu_times(struct psi_group *group, + enum psi_aggregators aggregator, + u32 *pchanged_states) { u64 deltas[NR_PSI_STATES - 1] = { 0, }; - unsigned long missed_periods = 0; unsigned long nonidle_total = 0; - u64 now, expires, period; + u32 changed_states = 0; int cpu; int s; - mutex_lock(&group->stat_lock); - /* * Collect the per-cpu time buckets and average them into a * single time sample that is normalized to wallclock time. @@ -291,8 +319,11 @@ static bool update_stats(struct psi_group *group) for_each_possible_cpu(cpu) { u32 times[NR_PSI_STATES]; u32 nonidle; + u32 cpu_changed_states; - get_recent_times(group, cpu, times); + get_recent_times(group, cpu, aggregator, times, + &cpu_changed_states); + changed_states |= cpu_changed_states; nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]); nonidle_total += nonidle; @@ -315,13 +346,22 @@ static bool update_stats(struct psi_group *group) /* total= */ for (s = 0; s < NR_PSI_STATES - 1; s++) - group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL)); + group->total[aggregator][s] += + div_u64(deltas[s], max(nonidle_total, 1UL)); + + if (pchanged_states) + *pchanged_states = changed_states; +} + +static u64 update_averages(struct psi_group *group, u64 now) +{ + unsigned long missed_periods = 0; + u64 expires, period; + u64 avg_next_update; + int s; /* avgX= */ - now = sched_clock(); - expires = group->next_update; - if (now < expires) - goto out; + expires = group->avg_next_update; if (now - expires >= psi_period) missed_periods = div_u64(now - expires, psi_period); @@ -332,14 +372,14 @@ static bool update_stats(struct psi_group *group) * But the deltas we sample out of the per-cpu buckets above * are based on the actual time elapsing between clock ticks. */ - group->next_update = expires + ((1 + missed_periods) * psi_period); - period = now - (group->last_update + (missed_periods * psi_period)); - group->last_update = now; + avg_next_update = expires + ((1 + missed_periods) * psi_period); + period = now - (group->avg_last_update + (missed_periods * psi_period)); + group->avg_last_update = now; for (s = 0; s < NR_PSI_STATES - 1; s++) { u32 sample; - sample = group->total[s] - group->total_prev[s]; + sample = group->total[PSI_AVGS][s] - group->avg_total[s]; /* * Due to the lockless sampling of the time buckets, * recorded time deltas can slip into the next period, @@ -359,23 +399,30 @@ static bool update_stats(struct psi_group *group) */ if (sample > period) sample = period; - group->total_prev[s] += sample; + group->avg_total[s] += sample; calc_avgs(group->avg[s], missed_periods, sample, period); } -out: - mutex_unlock(&group->stat_lock); - return nonidle_total; + + return avg_next_update; } -static void psi_update_work(struct work_struct *work) +static void psi_avgs_work(struct work_struct *work) { struct delayed_work *dwork; struct psi_group *group; + u32 changed_states; bool nonidle; + u64 now; dwork = to_delayed_work(work); - group = container_of(dwork, struct psi_group, clock_work); + group = container_of(dwork, struct psi_group, avgs_work); + + mutex_lock(&group->avgs_lock); + now = sched_clock(); + + collect_percpu_times(group, PSI_AVGS, &changed_states); + nonidle = changed_states & (1 << PSI_NONIDLE); /* * If there is task activity, periodically fold the per-cpu * times and feed samples into the running averages. If things @@ -383,18 +430,196 @@ static void psi_update_work(struct work_struct *work) * Once restarted, we'll catch up the running averages in one * go - see calc_avgs() and missed_periods. */ - - nonidle = update_stats(group); + if (now >= group->avg_next_update) + group->avg_next_update = update_averages(group, now); if (nonidle) { - unsigned long delay = 0; - u64 now; + schedule_delayed_work(dwork, nsecs_to_jiffies( + group->avg_next_update - now) + 1); + } + + mutex_unlock(&group->avgs_lock); +} + +/* Trigger tracking window manupulations */ +static void window_reset(struct psi_window *win, u64 now, u64 value, + u64 prev_growth) +{ + win->start_time = now; + win->start_value = value; + win->prev_growth = prev_growth; +} + +/* + * PSI growth tracking window update and growth calculation routine. + * + * This approximates a sliding tracking window by interpolating + * partially elapsed windows using historical growth data from the + * previous intervals. This minimizes memory requirements (by not storing + * all the intermediate values in the previous window) and simplifies + * the calculations. It works well because PSI signal changes only in + * positive direction and over relatively small window sizes the growth + * is close to linear. + */ +static u64 window_update(struct psi_window *win, u64 now, u64 value) +{ + u64 elapsed; + u64 growth; + + elapsed = now - win->start_time; + growth = value - win->start_value; + /* + * After each tracking window passes win->start_value and + * win->start_time get reset and win->prev_growth stores + * the average per-window growth of the previous window. + * win->prev_growth is then used to interpolate additional + * growth from the previous window assuming it was linear. + */ + if (elapsed > win->size) + window_reset(win, now, value, growth); + else { + u32 remaining; + + remaining = win->size - elapsed; + growth += div_u64(win->prev_growth * remaining, win->size); + } + + return growth; +} + +static void init_triggers(struct psi_group *group, u64 now) +{ + struct psi_trigger *t; + + list_for_each_entry(t, &group->triggers, node) + window_reset(&t->win, now, + group->total[PSI_POLL][t->state], 0); + memcpy(group->polling_total, group->total[PSI_POLL], + sizeof(group->polling_total)); + group->polling_next_update = now + group->poll_min_period; +} + +static u64 update_triggers(struct psi_group *group, u64 now) +{ + struct psi_trigger *t; + bool new_stall = false; + u64 *total = group->total[PSI_POLL]; + + /* + * On subsequent updates, calculate growth deltas and let + * watchers know when their specified thresholds are exceeded. + */ + list_for_each_entry(t, &group->triggers, node) { + u64 growth; + + /* Check for stall activity */ + if (group->polling_total[t->state] == total[t->state]) + continue; + + /* + * Multiple triggers might be looking at the same state, + * remember to update group->polling_total[] once we've + * been through all of them. Also remember to extend the + * polling time if we see new stall activity. + */ + new_stall = true; + + /* Calculate growth since last update */ + growth = window_update(&t->win, now, total[t->state]); + if (growth < t->threshold) + continue; + + /* Limit event signaling to once per window */ + if (now < t->last_event_time + t->win.size) + continue; + + /* Generate an event */ + if (cmpxchg(&t->event, 0, 1) == 0) + wake_up_interruptible(&t->event_wait); + t->last_event_time = now; + } + + if (new_stall) + memcpy(group->polling_total, total, + sizeof(group->polling_total)); + + return now + group->poll_min_period; +} + +/* + * Schedule polling if it's not already scheduled. It's safe to call even from + * hotpath because even though kthread_queue_delayed_work takes worker->lock + * spinlock that spinlock is never contended due to poll_scheduled atomic + * preventing such competition. + */ +static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay) +{ + struct kthread_worker *kworker; + + /* Do not reschedule if already scheduled */ + if (atomic_cmpxchg(&group->poll_scheduled, 0, 1) != 0) + return; + + rcu_read_lock(); - now = sched_clock(); - if (group->next_update > now) - delay = nsecs_to_jiffies(group->next_update - now) + 1; - schedule_delayed_work(dwork, delay); + kworker = rcu_dereference(group->poll_kworker); + /* + * kworker might be NULL in case psi_trigger_destroy races with + * psi_task_change (hotpath) which can't use locks + */ + if (likely(kworker)) + kthread_queue_delayed_work(kworker, &group->poll_work, delay); + else + atomic_set(&group->poll_scheduled, 0); + + rcu_read_unlock(); +} + +static void psi_poll_work(struct kthread_work *work) +{ + struct kthread_delayed_work *dwork; + struct psi_group *group; + u32 changed_states; + u64 now; + + dwork = container_of(work, struct kthread_delayed_work, work); + group = container_of(dwork, struct psi_group, poll_work); + + atomic_set(&group->poll_scheduled, 0); + + mutex_lock(&group->trigger_lock); + + now = sched_clock(); + + collect_percpu_times(group, PSI_POLL, &changed_states); + + if (changed_states & group->poll_states) { + /* Initialize trigger windows when entering polling mode */ + if (now > group->polling_until) + init_triggers(group, now); + + /* + * Keep the monitor active for at least the duration of the + * minimum tracking window as long as monitor states are + * changing. + */ + group->polling_until = now + + group->poll_min_period * UPDATES_PER_WINDOW; + } + + if (now > group->polling_until) { + group->polling_next_update = ULLONG_MAX; + goto out; } + + if (now >= group->polling_next_update) + group->polling_next_update = update_triggers(group, now); + + psi_schedule_poll_work(group, + nsecs_to_jiffies(group->polling_next_update - now) + 1); + +out: + mutex_unlock(&group->trigger_lock); } static void record_times(struct psi_group_cpu *groupc, int cpu, @@ -407,15 +632,15 @@ static void record_times(struct psi_group_cpu *groupc, int cpu, delta = now - groupc->state_start; groupc->state_start = now; - if (test_state(groupc->tasks, PSI_IO_SOME)) { + if (groupc->state_mask & (1 << PSI_IO_SOME)) { groupc->times[PSI_IO_SOME] += delta; - if (test_state(groupc->tasks, PSI_IO_FULL)) + if (groupc->state_mask & (1 << PSI_IO_FULL)) groupc->times[PSI_IO_FULL] += delta; } - if (test_state(groupc->tasks, PSI_MEM_SOME)) { + if (groupc->state_mask & (1 << PSI_MEM_SOME)) { groupc->times[PSI_MEM_SOME] += delta; - if (test_state(groupc->tasks, PSI_MEM_FULL)) + if (groupc->state_mask & (1 << PSI_MEM_FULL)) groupc->times[PSI_MEM_FULL] += delta; else if (memstall_tick) { u32 sample; @@ -436,18 +661,20 @@ static void record_times(struct psi_group_cpu *groupc, int cpu, } } - if (test_state(groupc->tasks, PSI_CPU_SOME)) + if (groupc->state_mask & (1 << PSI_CPU_SOME)) groupc->times[PSI_CPU_SOME] += delta; - if (test_state(groupc->tasks, PSI_NONIDLE)) + if (groupc->state_mask & (1 << PSI_NONIDLE)) groupc->times[PSI_NONIDLE] += delta; } -static void psi_group_change(struct psi_group *group, int cpu, - unsigned int clear, unsigned int set) +static u32 psi_group_change(struct psi_group *group, int cpu, + unsigned int clear, unsigned int set) { struct psi_group_cpu *groupc; unsigned int t, m; + enum psi_states s; + u32 state_mask = 0; groupc = per_cpu_ptr(group->pcpu, cpu); @@ -480,7 +707,16 @@ static void psi_group_change(struct psi_group *group, int cpu, if (set & (1 << t)) groupc->tasks[t]++; + /* Calculate state mask representing active states */ + for (s = 0; s < NR_PSI_STATES; s++) { + if (test_state(groupc->tasks, s)) + state_mask |= (1 << s); + } + groupc->state_mask = state_mask; + write_seqcount_end(&groupc->seq); + + return state_mask; } static struct psi_group *iterate_groups(struct task_struct *task, void **iter) @@ -537,13 +773,17 @@ void psi_task_change(struct task_struct *task, int clear, int set) */ if (unlikely((clear & TSK_RUNNING) && (task->flags & PF_WQ_WORKER) && - wq_worker_last_func(task) == psi_update_work)) + wq_worker_last_func(task) == psi_avgs_work)) wake_clock = false; while ((group = iterate_groups(task, &iter))) { - psi_group_change(group, cpu, clear, set); - if (wake_clock && !delayed_work_pending(&group->clock_work)) - schedule_delayed_work(&group->clock_work, PSI_FREQ); + u32 state_mask = psi_group_change(group, cpu, clear, set); + + if (state_mask & group->poll_states) + psi_schedule_poll_work(group, 1); + + if (wake_clock && !delayed_work_pending(&group->avgs_work)) + schedule_delayed_work(&group->avgs_work, PSI_FREQ); } } @@ -640,8 +880,10 @@ void psi_cgroup_free(struct cgroup *cgroup) if (static_branch_likely(&psi_disabled)) return; - cancel_delayed_work_sync(&cgroup->psi.clock_work); + cancel_delayed_work_sync(&cgroup->psi.avgs_work); free_percpu(cgroup->psi.pcpu); + /* All triggers must be removed by now */ + WARN_ONCE(cgroup->psi.poll_states, "psi: trigger leak\n"); } /** @@ -697,11 +939,18 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) { int full; + u64 now; if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; - update_stats(group); + /* Update averages before reporting them */ + mutex_lock(&group->avgs_lock); + now = sched_clock(); + collect_percpu_times(group, PSI_AVGS, NULL); + if (now >= group->avg_next_update) + group->avg_next_update = update_averages(group, now); + mutex_unlock(&group->avgs_lock); for (full = 0; full < 2 - (res == PSI_CPU); full++) { unsigned long avg[3]; @@ -710,7 +959,8 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) for (w = 0; w < 3; w++) avg[w] = group->avg[res * 2 + full][w]; - total = div_u64(group->total[res * 2 + full], NSEC_PER_USEC); + total = div_u64(group->total[PSI_AVGS][res * 2 + full], + NSEC_PER_USEC); seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n", full ? "full" : "some", @@ -753,25 +1003,270 @@ static int psi_cpu_open(struct inode *inode, struct file *file) return single_open(file, psi_cpu_show, NULL); } +struct psi_trigger *psi_trigger_create(struct psi_group *group, + char *buf, size_t nbytes, enum psi_res res) +{ + struct psi_trigger *t; + enum psi_states state; + u32 threshold_us; + u32 window_us; + + if (static_branch_likely(&psi_disabled)) + return ERR_PTR(-EOPNOTSUPP); + + if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2) + state = PSI_IO_SOME + res * 2; + else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2) + state = PSI_IO_FULL + res * 2; + else + return ERR_PTR(-EINVAL); + + if (state >= PSI_NONIDLE) + return ERR_PTR(-EINVAL); + + if (window_us < WINDOW_MIN_US || + window_us > WINDOW_MAX_US) + return ERR_PTR(-EINVAL); + + /* Check threshold */ + if (threshold_us == 0 || threshold_us > window_us) + return ERR_PTR(-EINVAL); + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return ERR_PTR(-ENOMEM); + + t->group = group; + t->state = state; + t->threshold = threshold_us * NSEC_PER_USEC; + t->win.size = window_us * NSEC_PER_USEC; + window_reset(&t->win, 0, 0, 0); + + t->event = 0; + t->last_event_time = 0; + init_waitqueue_head(&t->event_wait); + kref_init(&t->refcount); + + mutex_lock(&group->trigger_lock); + + if (!rcu_access_pointer(group->poll_kworker)) { + struct sched_param param = { + .sched_priority = MAX_RT_PRIO - 1, + }; + struct kthread_worker *kworker; + + kworker = kthread_create_worker(0, "psimon"); + if (IS_ERR(kworker)) { + kfree(t); + mutex_unlock(&group->trigger_lock); + return ERR_CAST(kworker); + } + sched_setscheduler(kworker->task, SCHED_FIFO, ¶m); + kthread_init_delayed_work(&group->poll_work, + psi_poll_work); + rcu_assign_pointer(group->poll_kworker, kworker); + } + + list_add(&t->node, &group->triggers); + group->poll_min_period = min(group->poll_min_period, + div_u64(t->win.size, UPDATES_PER_WINDOW)); + group->nr_triggers[t->state]++; + group->poll_states |= (1 << t->state); + + mutex_unlock(&group->trigger_lock); + + return t; +} + +static void psi_trigger_destroy(struct kref *ref) +{ + struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); + struct psi_group *group = t->group; + struct kthread_worker *kworker_to_destroy = NULL; + + if (static_branch_likely(&psi_disabled)) + return; + + /* + * Wakeup waiters to stop polling. Can happen if cgroup is deleted + * from under a polling process. + */ + wake_up_interruptible(&t->event_wait); + + mutex_lock(&group->trigger_lock); + + if (!list_empty(&t->node)) { + struct psi_trigger *tmp; + u64 period = ULLONG_MAX; + + list_del(&t->node); + group->nr_triggers[t->state]--; + if (!group->nr_triggers[t->state]) + group->poll_states &= ~(1 << t->state); + /* reset min update period for the remaining triggers */ + list_for_each_entry(tmp, &group->triggers, node) + period = min(period, div_u64(tmp->win.size, + UPDATES_PER_WINDOW)); + group->poll_min_period = period; + /* Destroy poll_kworker when the last trigger is destroyed */ + if (group->poll_states == 0) { + group->polling_until = 0; + kworker_to_destroy = rcu_dereference_protected( + group->poll_kworker, + lockdep_is_held(&group->trigger_lock)); + rcu_assign_pointer(group->poll_kworker, NULL); + } + } + + mutex_unlock(&group->trigger_lock); + + /* + * Wait for both *trigger_ptr from psi_trigger_replace and + * poll_kworker RCUs to complete their read-side critical sections + * before destroying the trigger and optionally the poll_kworker + */ + synchronize_rcu(); + /* + * Destroy the kworker after releasing trigger_lock to prevent a + * deadlock while waiting for psi_poll_work to acquire trigger_lock + */ + if (kworker_to_destroy) { + kthread_cancel_delayed_work_sync(&group->poll_work); + kthread_destroy_worker(kworker_to_destroy); + } + kfree(t); +} + +void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) +{ + struct psi_trigger *old = *trigger_ptr; + + if (static_branch_likely(&psi_disabled)) + return; + + rcu_assign_pointer(*trigger_ptr, new); + if (old) + kref_put(&old->refcount, psi_trigger_destroy); +} + +__poll_t psi_trigger_poll(void **trigger_ptr, + struct file *file, poll_table *wait) +{ + __poll_t ret = DEFAULT_POLLMASK; + struct psi_trigger *t; + + if (static_branch_likely(&psi_disabled)) + return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; + + rcu_read_lock(); + + t = rcu_dereference(*(void __rcu __force **)trigger_ptr); + if (!t) { + rcu_read_unlock(); + return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; + } + kref_get(&t->refcount); + + rcu_read_unlock(); + + poll_wait(file, &t->event_wait, wait); + + if (cmpxchg(&t->event, 1, 0) == 1) + ret |= EPOLLPRI; + + kref_put(&t->refcount, psi_trigger_destroy); + + return ret; +} + +static ssize_t psi_write(struct file *file, const char __user *user_buf, + size_t nbytes, enum psi_res res) +{ + char buf[32]; + size_t buf_size; + struct seq_file *seq; + struct psi_trigger *new; + + if (static_branch_likely(&psi_disabled)) + return -EOPNOTSUPP; + + buf_size = min(nbytes, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size - 1] = '\0'; + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) + return PTR_ERR(new); + + seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ + mutex_lock(&seq->lock); + psi_trigger_replace(&seq->private, new); + mutex_unlock(&seq->lock); + + return nbytes; +} + +static ssize_t psi_io_write(struct file *file, const char __user *user_buf, + size_t nbytes, loff_t *ppos) +{ + return psi_write(file, user_buf, nbytes, PSI_IO); +} + +static ssize_t psi_memory_write(struct file *file, const char __user *user_buf, + size_t nbytes, loff_t *ppos) +{ + return psi_write(file, user_buf, nbytes, PSI_MEM); +} + +static ssize_t psi_cpu_write(struct file *file, const char __user *user_buf, + size_t nbytes, loff_t *ppos) +{ + return psi_write(file, user_buf, nbytes, PSI_CPU); +} + +static __poll_t psi_fop_poll(struct file *file, poll_table *wait) +{ + struct seq_file *seq = file->private_data; + + return psi_trigger_poll(&seq->private, file, wait); +} + +static int psi_fop_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + psi_trigger_replace(&seq->private, NULL); + return single_release(inode, file); +} + static const struct file_operations psi_io_fops = { .open = psi_io_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .write = psi_io_write, + .poll = psi_fop_poll, + .release = psi_fop_release, }; static const struct file_operations psi_memory_fops = { .open = psi_memory_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .write = psi_memory_write, + .poll = psi_fop_poll, + .release = psi_fop_release, }; static const struct file_operations psi_cpu_fops = { .open = psi_cpu_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .write = psi_cpu_write, + .poll = psi_fop_poll, + .release = psi_fop_release, }; static int __init psi_proc_init(void) diff --git a/kernel/signal.c b/kernel/signal.c index 62f9aea4a15a..a1eb44dc9ff5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -840,6 +840,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info, */ if (!sid || sid == task_session(current)) break; + /* fall through */ default: return -EPERM; } @@ -2112,6 +2113,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t preempt_enable_no_resched(); cgroup_enter_frozen(); freezable_schedule(); + cgroup_leave_frozen(true); } else { /* * By the time we got the lock, our tracer went away. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba158f61aab4..943c89178e3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2886,8 +2886,10 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int if (neg) continue; val = convmul * val / convdiv; - if ((min && val < *min) || (max && val > *max)) - continue; + if ((min && val < *min) || (max && val > *max)) { + err = -EINVAL; + break; + } *i = val; } else { val = convdiv * (*i) / convmul; @@ -3170,17 +3172,19 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, if (write) { char *kbuf, *p; + size_t skipped = 0; - if (left > PAGE_SIZE - 1) + if (left > PAGE_SIZE - 1) { left = PAGE_SIZE - 1; + /* How much of the buffer we'll skip this pass */ + skipped = *lenp - left; + } p = kbuf = memdup_user_nul(buffer, left); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len), - sizeof(unsigned long), - GFP_KERNEL); + tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL); if (!tmp_bitmap) { kfree(kbuf); return -ENOMEM; @@ -3189,9 +3193,22 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, while (!err && left) { unsigned long val_a, val_b; bool neg; + size_t saved_left; + /* In case we stop parsing mid-number, we can reset */ + saved_left = left; err = proc_get_long(&p, &left, &val_a, &neg, tr_a, sizeof(tr_a), &c); + /* + * If we consumed the entirety of a truncated buffer or + * only one char is left (may be a "-"), then stop here, + * reset, & come back for more. + */ + if ((left <= 1) && skipped) { + left = saved_left; + break; + } + if (err) break; if (val_a >= bitmap_len || neg) { @@ -3209,6 +3226,15 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, err = proc_get_long(&p, &left, &val_b, &neg, tr_b, sizeof(tr_b), &c); + /* + * If we consumed all of a truncated buffer or + * then stop here, reset, & come back for more. + */ + if (!left && skipped) { + left = saved_left; + break; + } + if (err) break; if (val_b >= bitmap_len || neg || @@ -3227,6 +3253,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, proc_skip_char(&p, &left, '\n'); } kfree(kbuf); + left += skipped; } else { unsigned long bit_a, bit_b = 0; @@ -3271,7 +3298,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, *ppos += *lenp; } - kfree(tmp_bitmap); + bitmap_free(tmp_bitmap); return err; } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index ac5555e25733..8de4f789dc1b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -691,7 +691,7 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, time_constant = max(time_constant, 0l); } - if (txc->modes & ADJ_TAI && txc->constant > 0) + if (txc->modes & ADJ_TAI && txc->constant >= 0) *time_tai = txc->constant; if (txc->modes & ADJ_OFFSET) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b920358dd8f7..a12aff849c04 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -70,12 +70,8 @@ #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), #else #define INIT_OPS_HASH(opsname) -#define ASSIGN_OPS_HASH(opsname, val) #endif enum { @@ -3880,7 +3876,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, static bool module_exists(const char *module) { /* All modules have the symbol __this_module */ - const char this_mod[] = "__this_module"; + static const char this_mod[] = "__this_module"; char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; @@ -6265,6 +6261,9 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { + /* Stub functions don't need to be called nor tested */ + if (op->flags & FTRACE_OPS_FL_STUB) + continue; /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4ee8d8aa3d0f..05b0b3139ebc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4979,7 +4979,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested) cnt = data->cnt + (nested ? 27 : 0); /* Multiply cnt by ~e, to make some unique increment */ - size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1); + size = (cnt * 68 / 25) % (sizeof(rb_string) - 1); len = size + sizeof(struct rb_item); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index ffba6789c0e2..0564f6db0561 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -362,7 +362,7 @@ static void ring_buffer_producer(void) hit--; /* make it non zero */ } - /* Caculate the average time in nanosecs */ + /* Calculate the average time in nanosecs */ avg = NSEC_PER_MSEC / (hit + missed); trace_printk("%ld ns per entry\n", avg); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ec439999f387..2c92b3d9ea30 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1727,6 +1727,10 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); list_for_each_entry_safe(p, n, &postponed_selftests, list) { + /* This loop can take minutes when sanitizers are enabled, so + * lets make sure we allow RCU processing. + */ + cond_resched(); ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ if (ret < 0) { @@ -3045,6 +3049,7 @@ void trace_printk_init_buffers(void) if (global_trace.trace_buffer.buffer) tracing_start_cmdline_record(); } +EXPORT_SYMBOL_GPL(trace_printk_init_buffers); void trace_printk_start_comm(void) { @@ -3205,6 +3210,7 @@ int trace_array_printk(struct trace_array *tr, va_end(ap); return ret; } +EXPORT_SYMBOL_GPL(trace_array_printk); __printf(3, 4) int trace_array_printk_buf(struct ring_buffer *buffer, @@ -3483,33 +3489,68 @@ static void s_stop(struct seq_file *m, void *p) } static void +get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, + unsigned long *entries, int cpu) +{ + unsigned long count; + + count = ring_buffer_entries_cpu(buf->buffer, cpu); + /* + * If this buffer has skipped entries, then we hold all + * entries for the trace and we need to ignore the + * ones before the time stamp. + */ + if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { + count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; + /* total is the same as the entries */ + *total = count; + } else + *total = count + + ring_buffer_overrun_cpu(buf->buffer, cpu); + *entries = count; +} + +static void get_total_entries(struct trace_buffer *buf, unsigned long *total, unsigned long *entries) { - unsigned long count; + unsigned long t, e; int cpu; *total = 0; *entries = 0; for_each_tracing_cpu(cpu) { - count = ring_buffer_entries_cpu(buf->buffer, cpu); - /* - * If this buffer has skipped entries, then we hold all - * entries for the trace and we need to ignore the - * ones before the time stamp. - */ - if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { - count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; - /* total is the same as the entries */ - *total += count; - } else - *total += count + - ring_buffer_overrun_cpu(buf->buffer, cpu); - *entries += count; + get_total_entries_cpu(buf, &t, &e, cpu); + *total += t; + *entries += e; } } +unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) +{ + unsigned long total, entries; + + if (!tr) + tr = &global_trace; + + get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu); + + return entries; +} + +unsigned long trace_total_entries(struct trace_array *tr) +{ + unsigned long total, entries; + + if (!tr) + tr = &global_trace; + + get_total_entries(&tr->trace_buffer, &total, &entries); + + return entries; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" @@ -3548,25 +3589,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char tgid_space[] = " "; - const char space[] = " "; + const char *space = " "; + int prec = tgid ? 10 : 2; print_event_info(buf, m); - seq_printf(m, "# %s _-----=> irqs-off\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s / _----=> need-resched\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s| / _---=> hardirq/softirq\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s|| / _--=> preempt-depth\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s||| / delay\n", - tgid ? tgid_space : space); - seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", - tgid ? " TGID " : space); - seq_printf(m, "# | | %s | |||| | |\n", - tgid ? " | " : space); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void @@ -4692,6 +4726,7 @@ static const char readme_msg[] = " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" " current_tracer\t- function and latency tracers\n" " available_tracers\t- list of configured tracers for current_tracer\n" + " error_log\t- error log for failed commands (that support it)\n" " buffer_size_kb\t- view and modify size of per cpu buffer\n" " buffer_total_size_kb - view total size of all cpu buffers\n\n" " trace_clock\t\t-change the clock used to order events\n" @@ -4712,7 +4747,7 @@ static const char readme_msg[] = " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" " trace_options\t\t- Set format or modify how tracing happens\n" - "\t\t\t Disable an option by adding a suffix 'no' to the\n" + "\t\t\t Disable an option by prefixing 'no' to the\n" "\t\t\t option name\n" " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" #ifdef CONFIG_DYNAMIC_FTRACE @@ -6296,13 +6331,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct ring_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; - const char faulted[] = "<faulted>"; ssize_t written; int size; int len; /* Used in tracing_mark_raw_write() as well */ -#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */ +#define FAULTED_STR "<faulted>" +#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ if (tracing_disabled) return -EINVAL; @@ -6334,7 +6369,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); if (len) { - memcpy(&entry->buf, faulted, FAULTED_SIZE); + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); cnt = FAULTED_SIZE; written = -EFAULT; } else @@ -6375,7 +6410,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct ring_buffer *buffer; struct raw_data_entry *entry; - const char faulted[] = "<faulted>"; unsigned long irq_flags; ssize_t written; int size; @@ -6415,7 +6449,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); if (len) { entry->id = -1; - memcpy(&entry->buf, faulted, FAULTED_SIZE); + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); written = -EFAULT; } else written = cnt; @@ -6868,6 +6902,238 @@ static const struct file_operations snapshot_raw_fops = { #endif /* CONFIG_TRACER_SNAPSHOT */ +#define TRACING_LOG_ERRS_MAX 8 +#define TRACING_LOG_LOC_MAX 128 + +#define CMD_PREFIX " Command: " + +struct err_info { + const char **errs; /* ptr to loc-specific array of err strings */ + u8 type; /* index into errs -> specific err string */ + u8 pos; /* MAX_FILTER_STR_VAL = 256 */ + u64 ts; +}; + +struct tracing_log_err { + struct list_head list; + struct err_info info; + char loc[TRACING_LOG_LOC_MAX]; /* err location */ + char cmd[MAX_FILTER_STR_VAL]; /* what caused err */ +}; + +static DEFINE_MUTEX(tracing_err_log_lock); + +struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) +{ + struct tracing_log_err *err; + + if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { + err = kzalloc(sizeof(*err), GFP_KERNEL); + if (!err) + err = ERR_PTR(-ENOMEM); + tr->n_err_log_entries++; + + return err; + } + + err = list_first_entry(&tr->err_log, struct tracing_log_err, list); + list_del(&err->list); + + return err; +} + +/** + * err_pos - find the position of a string within a command for error careting + * @cmd: The tracing command that caused the error + * @str: The string to position the caret at within @cmd + * + * Finds the position of the first occurence of @str within @cmd. The + * return value can be passed to tracing_log_err() for caret placement + * within @cmd. + * + * Returns the index within @cmd of the first occurence of @str or 0 + * if @str was not found. + */ +unsigned int err_pos(char *cmd, const char *str) +{ + char *found; + + if (WARN_ON(!strlen(cmd))) + return 0; + + found = strstr(cmd, str); + if (found) + return found - cmd; + + return 0; +} + +/** + * tracing_log_err - write an error to the tracing error log + * @tr: The associated trace array for the error (NULL for top level array) + * @loc: A string describing where the error occurred + * @cmd: The tracing command that caused the error + * @errs: The array of loc-specific static error strings + * @type: The index into errs[], which produces the specific static err string + * @pos: The position the caret should be placed in the cmd + * + * Writes an error into tracing/error_log of the form: + * + * <loc>: error: <text> + * Command: <cmd> + * ^ + * + * tracing/error_log is a small log file containing the last + * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated + * unless there has been a tracing error, and the error log can be + * cleared and have its memory freed by writing the empty string in + * truncation mode to it i.e. echo > tracing/error_log. + * + * NOTE: the @errs array along with the @type param are used to + * produce a static error string - this string is not copied and saved + * when the error is logged - only a pointer to it is saved. See + * existing callers for examples of how static strings are typically + * defined for use with tracing_log_err(). + */ +void tracing_log_err(struct trace_array *tr, + const char *loc, const char *cmd, + const char **errs, u8 type, u8 pos) +{ + struct tracing_log_err *err; + + if (!tr) + tr = &global_trace; + + mutex_lock(&tracing_err_log_lock); + err = get_tracing_log_err(tr); + if (PTR_ERR(err) == -ENOMEM) { + mutex_unlock(&tracing_err_log_lock); + return; + } + + snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); + snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd); + + err->info.errs = errs; + err->info.type = type; + err->info.pos = pos; + err->info.ts = local_clock(); + + list_add_tail(&err->list, &tr->err_log); + mutex_unlock(&tracing_err_log_lock); +} + +static void clear_tracing_err_log(struct trace_array *tr) +{ + struct tracing_log_err *err, *next; + + mutex_lock(&tracing_err_log_lock); + list_for_each_entry_safe(err, next, &tr->err_log, list) { + list_del(&err->list); + kfree(err); + } + + tr->n_err_log_entries = 0; + mutex_unlock(&tracing_err_log_lock); +} + +static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) +{ + struct trace_array *tr = m->private; + + mutex_lock(&tracing_err_log_lock); + + return seq_list_start(&tr->err_log, *pos); +} + +static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_array *tr = m->private; + + return seq_list_next(v, &tr->err_log, pos); +} + +static void tracing_err_log_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&tracing_err_log_lock); +} + +static void tracing_err_log_show_pos(struct seq_file *m, u8 pos) +{ + u8 i; + + for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) + seq_putc(m, ' '); + for (i = 0; i < pos; i++) + seq_putc(m, ' '); + seq_puts(m, "^\n"); +} + +static int tracing_err_log_seq_show(struct seq_file *m, void *v) +{ + struct tracing_log_err *err = v; + + if (err) { + const char *err_text = err->info.errs[err->info.type]; + u64 sec = err->info.ts; + u32 nsec; + + nsec = do_div(sec, NSEC_PER_SEC); + seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, + err->loc, err_text); + seq_printf(m, "%s", err->cmd); + tracing_err_log_show_pos(m, err->info.pos); + } + + return 0; +} + +static const struct seq_operations tracing_err_log_seq_ops = { + .start = tracing_err_log_seq_start, + .next = tracing_err_log_seq_next, + .stop = tracing_err_log_seq_stop, + .show = tracing_err_log_seq_show +}; + +static int tracing_err_log_open(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + int ret = 0; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + /* If this file was opened for write, then erase contents */ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) + clear_tracing_err_log(tr); + + if (file->f_mode & FMODE_READ) { + ret = seq_open(file, &tracing_err_log_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = tr; + } else { + trace_array_put(tr); + } + } + return ret; +} + +static ssize_t tracing_err_log_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + return count; +} + +static const struct file_operations tracing_err_log_fops = { + .open = tracing_err_log_open, + .write = tracing_err_log_write, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release_generic_tr, +}; + static int tracing_buffers_open(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -8033,7 +8299,7 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } -static int instance_mkdir(const char *name) +struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; int ret; @@ -8072,6 +8338,7 @@ static int instance_mkdir(const char *name) INIT_LIST_HEAD(&tr->systems); INIT_LIST_HEAD(&tr->events); INIT_LIST_HEAD(&tr->hist_vars); + INIT_LIST_HEAD(&tr->err_log); if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; @@ -8097,7 +8364,7 @@ static int instance_mkdir(const char *name) mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return 0; + return tr; out_free_tr: free_trace_buffers(tr); @@ -8109,33 +8376,21 @@ static int instance_mkdir(const char *name) mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return ret; + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(trace_array_create); +static int instance_mkdir(const char *name) +{ + return PTR_ERR_OR_ZERO(trace_array_create(name)); } -static int instance_rmdir(const char *name) +static int __remove_instance(struct trace_array *tr) { - struct trace_array *tr; - int found = 0; - int ret; int i; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); - - ret = -ENODEV; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) { - found = 1; - break; - } - } - if (!found) - goto out_unlock; - - ret = -EBUSY; if (tr->ref || (tr->current_trace && tr->current_trace->ref)) - goto out_unlock; + return -EBUSY; list_del(&tr->list); @@ -8161,10 +8416,46 @@ static int instance_rmdir(const char *name) free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); + tr = NULL; - ret = 0; + return 0; +} + +int trace_array_destroy(struct trace_array *tr) +{ + int ret; + + if (!tr) + return -EINVAL; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = __remove_instance(tr); + + mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(trace_array_destroy); + +static int instance_rmdir(const char *name) +{ + struct trace_array *tr; + int ret; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = -ENODEV; + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, name) == 0) { + ret = __remove_instance(tr); + break; + } + } - out_unlock: mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); @@ -8254,6 +8545,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) tr, &snapshot_fops); #endif + trace_create_file("error_log", 0644, d_tracer, + tr, &tracing_err_log_fops); + for_each_tracing_cpu(cpu) tracing_init_tracefs_percpu(tr, cpu); @@ -8839,6 +9133,7 @@ __init static int tracer_alloc_buffers(void) INIT_LIST_HEAD(&global_trace.systems); INIT_LIST_HEAD(&global_trace.events); INIT_LIST_HEAD(&global_trace.hist_vars); + INIT_LIST_HEAD(&global_trace.err_log); list_add(&global_trace.list, &ftrace_trace_arrays); apply_trace_boot_options(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 639047b259d7..1974ce818ddb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -293,11 +293,13 @@ struct trace_array { int nr_topts; bool clear_trace; int buffer_percent; + unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; + struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; @@ -719,6 +721,9 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); +unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); +unsigned long trace_total_entries(struct trace_array *tr); + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, @@ -1545,7 +1550,8 @@ extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); -extern int create_event_filter(struct trace_event_call *call, +extern int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); @@ -1876,6 +1882,11 @@ extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); +extern unsigned int err_pos(char *cmd, const char *str); +extern void tracing_log_err(struct trace_array *tr, + const char *loc, const char *cmd, + const char **errs, u8 type, u8 pos); + /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5b3b0c3c8a47..0ce3db67f556 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -832,6 +832,7 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) return ret; } +EXPORT_SYMBOL_GPL(ftrace_set_clr_event); /** * trace_set_clr_event - enable or disable an event @@ -1318,9 +1319,6 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) char buf[32]; int len; - if (*ppos) - return 0; - if (unlikely(!id)) return -ENODEV; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 05a66493a164..d3e59312ef40 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -66,7 +66,8 @@ static const char * ops[] = { OPS }; C(INVALID_FILTER, "Meaningless filter expression"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ - C(NO_FILTER, "No filter found"), + C(ERRNO, "Error"), \ + C(NO_FILTER, "No filter found") #undef C #define C(a, b) FILT_ERR_##a @@ -76,7 +77,7 @@ enum { ERRORS }; #undef C #define C(a, b) b -static char *err_text[] = { ERRORS }; +static const char *err_text[] = { ERRORS }; /* Called after a '!' character but "!=" and "!~" are not "not"s */ static bool is_not(const char *str) @@ -919,7 +920,8 @@ static void remove_filter_string(struct event_filter *filter) filter->filter_string = NULL; } -static void append_filter_err(struct filter_parse_error *pe, +static void append_filter_err(struct trace_array *tr, + struct filter_parse_error *pe, struct event_filter *filter) { struct trace_seq *s; @@ -947,8 +949,14 @@ static void append_filter_err(struct filter_parse_error *pe, if (pe->lasterr > 0) { trace_seq_printf(s, "\n%*s", pos, "^"); trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); + tracing_log_err(tr, "event filter parse error", + filter->filter_string, err_text, + pe->lasterr, pe->lasterr_pos); } else { trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); + tracing_log_err(tr, "event filter parse error", + filter->filter_string, err_text, + FILT_ERR_ERRNO, 0); } trace_seq_putc(s, 0); buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL); @@ -1214,30 +1222,30 @@ static int parse_pred(const char *str, void *data, * (perf doesn't use it) and grab everything. */ if (strcmp(field->name, "ip") != 0) { - parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); - goto err_free; - } - pred->fn = filter_pred_none; - - /* - * Quotes are not required, but if they exist then we need - * to read them till we hit a matching one. - */ - if (str[i] == '\'' || str[i] == '"') - q = str[i]; - else - q = 0; - - for (i++; str[i]; i++) { - if (q && str[i] == q) - break; - if (!q && (str[i] == ')' || str[i] == '&' || - str[i] == '|')) - break; - } - /* Skip quotes */ - if (q) - s++; + parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); + goto err_free; + } + pred->fn = filter_pred_none; + + /* + * Quotes are not required, but if they exist then we need + * to read them till we hit a matching one. + */ + if (str[i] == '\'' || str[i] == '"') + q = str[i]; + else + q = 0; + + for (i++; str[i]; i++) { + if (q && str[i] == q) + break; + if (!q && (str[i] == ')' || str[i] == '&' || + str[i] == '|')) + break; + } + /* Skip quotes */ + if (q) + s++; len = i - s; if (len >= MAX_FILTER_STR_VAL) { parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); @@ -1600,7 +1608,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, if (err) { filter_disable(file); parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); - append_filter_err(pe, filter); + append_filter_err(tr, pe, filter); } else event_set_filtered_flag(file); @@ -1712,7 +1720,8 @@ static void create_filter_finish(struct filter_parse_error *pe) * information if @set_str is %true and the caller is responsible for * freeing it. */ -static int create_filter(struct trace_event_call *call, +static int create_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_string, bool set_str, struct event_filter **filterp) { @@ -1729,17 +1738,18 @@ static int create_filter(struct trace_event_call *call, err = process_preds(call, filter_string, *filterp, pe); if (err && set_str) - append_filter_err(pe, *filterp); + append_filter_err(tr, pe, *filterp); create_filter_finish(pe); return err; } -int create_event_filter(struct trace_event_call *call, +int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { - return create_filter(call, filter_str, set_str, filterp); + return create_filter(tr, call, filter_str, set_str, filterp); } /** @@ -1766,7 +1776,7 @@ static int create_system_filter(struct trace_subsystem_dir *dir, kfree((*filterp)->filter_string); (*filterp)->filter_string = NULL; } else { - append_filter_err(pe, *filterp); + append_filter_err(tr, pe, *filterp); } } create_filter_finish(pe); @@ -1797,7 +1807,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string) return 0; } - err = create_filter(call, filter_string, true, &filter); + err = create_filter(file->tr, call, filter_string, true, &filter); /* * Always swap the call filter with the new filter @@ -2053,7 +2063,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, if (event->filter) goto out_unlock; - err = create_filter(call, filter_str, false, &filter); + err = create_filter(NULL, call, filter_str, false, &filter); if (err) goto free_filter; @@ -2202,8 +2212,8 @@ static __init int ftrace_test_event_filter(void) struct test_filter_data_t *d = &test_filter_data[i]; int err; - err = create_filter(&event_ftrace_test_filter, d->filter, - false, &filter); + err = create_filter(NULL, &event_ftrace_test_filter, + d->filter, false, &filter); if (err) { printk(KERN_INFO "Failed to get filter for '%s', err %d\n", diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index a1d20421f4b0..7fca3457c705 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -22,6 +22,57 @@ #define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ +#define ERRORS \ + C(NONE, "No error"), \ + C(DUPLICATE_VAR, "Variable already defined"), \ + C(VAR_NOT_UNIQUE, "Variable name not unique, need to use fully qualified name (subsys.event.var) for variable"), \ + C(TOO_MANY_VARS, "Too many variables defined"), \ + C(MALFORMED_ASSIGNMENT, "Malformed assignment"), \ + C(NAMED_MISMATCH, "Named hist trigger doesn't match existing named trigger (includes variables)"), \ + C(TRIGGER_EEXIST, "Hist trigger already exists"), \ + C(TRIGGER_ENOENT_CLEAR, "Can't clear or continue a nonexistent hist trigger"), \ + C(SET_CLOCK_FAIL, "Couldn't set trace_clock"), \ + C(BAD_FIELD_MODIFIER, "Invalid field modifier"), \ + C(TOO_MANY_SUBEXPR, "Too many subexpressions (3 max)"), \ + C(TIMESTAMP_MISMATCH, "Timestamp units in expression don't match"), \ + C(TOO_MANY_FIELD_VARS, "Too many field variables defined"), \ + C(EVENT_FILE_NOT_FOUND, "Event file not found"), \ + C(HIST_NOT_FOUND, "Matching event histogram not found"), \ + C(HIST_CREATE_FAIL, "Couldn't create histogram for field"), \ + C(SYNTH_VAR_NOT_FOUND, "Couldn't find synthetic variable"), \ + C(SYNTH_EVENT_NOT_FOUND,"Couldn't find synthetic event"), \ + C(SYNTH_TYPE_MISMATCH, "Param type doesn't match synthetic event field type"), \ + C(SYNTH_COUNT_MISMATCH, "Param count doesn't match synthetic event field count"), \ + C(FIELD_VAR_PARSE_FAIL, "Couldn't parse field variable"), \ + C(VAR_CREATE_FIND_FAIL, "Couldn't create or find variable"), \ + C(ONX_NOT_VAR, "For onmax(x) or onchange(x), x must be a variable"), \ + C(ONX_VAR_NOT_FOUND, "Couldn't find onmax or onchange variable"), \ + C(ONX_VAR_CREATE_FAIL, "Couldn't create onmax or onchange variable"), \ + C(FIELD_VAR_CREATE_FAIL,"Couldn't create field variable"), \ + C(TOO_MANY_PARAMS, "Too many action params"), \ + C(PARAM_NOT_FOUND, "Couldn't find param"), \ + C(INVALID_PARAM, "Invalid action param"), \ + C(ACTION_NOT_FOUND, "No action found"), \ + C(NO_SAVE_PARAMS, "No params found for save()"), \ + C(TOO_MANY_SAVE_ACTIONS,"Can't have more than one save() action per hist"), \ + C(ACTION_MISMATCH, "Handler doesn't support action"), \ + C(NO_CLOSING_PAREN, "No closing paren found"), \ + C(SUBSYS_NOT_FOUND, "Missing subsystem"), \ + C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ + C(INVALID_REF_KEY, "Using variable references as keys not supported"), \ + C(VAR_NOT_FOUND, "Couldn't find variable"), \ + C(FIELD_NOT_FOUND, "Couldn't find field"), + +#undef C +#define C(a, b) HIST_ERR_##a + +enum { ERRORS }; + +#undef C +#define C(a, b) b + +static const char *err_text[] = { ERRORS }; + struct hist_field; typedef u64 (*hist_field_fn_t) (struct hist_field *field, @@ -535,62 +586,49 @@ static struct track_data *track_data_alloc(unsigned int key_len, return data; } -static char last_hist_cmd[MAX_FILTER_STR_VAL]; -static char hist_err_str[MAX_FILTER_STR_VAL]; +static char last_cmd[MAX_FILTER_STR_VAL]; +static char last_cmd_loc[MAX_FILTER_STR_VAL]; -static void last_cmd_set(char *str) +static int errpos(char *str) { - if (!str) - return; - - strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); + return err_pos(last_cmd, str); } -static void hist_err(char *str, char *var) +static void last_cmd_set(struct trace_event_file *file, char *str) { - int maxlen = MAX_FILTER_STR_VAL - 1; + const char *system = NULL, *name = NULL; + struct trace_event_call *call; if (!str) return; - if (strlen(hist_err_str)) - return; + strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); - if (!var) - var = ""; + if (file) { + call = file->event_call; - if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen) - return; + system = call->class->system; + if (system) { + name = trace_event_name(call); + if (!name) + system = NULL; + } + } - strcat(hist_err_str, str); - strcat(hist_err_str, var); + if (system) + snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } -static void hist_err_event(char *str, char *system, char *event, char *var) +static void hist_err(struct trace_array *tr, u8 err_type, u8 err_pos) { - char err[MAX_FILTER_STR_VAL]; - - if (system && var) - snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); - else if (system) - snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); - else - strscpy(err, var, MAX_FILTER_STR_VAL); - - hist_err(str, err); + tracing_log_err(tr, last_cmd_loc, last_cmd, err_text, + err_type, err_pos); } static void hist_err_clear(void) { - hist_err_str[0] = '\0'; -} - -static bool have_hist_err(void) -{ - if (strlen(hist_err_str)) - return true; - - return false; + last_cmd[0] = '\0'; + last_cmd_loc[0] = '\0'; } struct synth_trace_event { @@ -1719,7 +1757,7 @@ static struct trace_event_file *find_var_file(struct trace_array *tr, if (find_var_field(var_hist_data, var_name)) { if (found) { - hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return NULL; } @@ -1770,7 +1808,8 @@ find_match_var(struct hist_trigger_data *hist_data, char *var_name) hist_field = find_file_var(file, var_name); if (hist_field) { if (found) { - hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, + errpos(var_name)); return ERR_PTR(-EINVAL); } @@ -2002,11 +2041,11 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) attrs->n_actions++; ret = 0; } - return ret; } -static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) +static int parse_assignment(struct trace_array *tr, + char *str, struct hist_trigger_attrs *attrs) { int ret = 0; @@ -2062,7 +2101,7 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) char *assignment; if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { - hist_err("Too many variables defined: ", str); + hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(str)); ret = -EINVAL; goto out; } @@ -2079,7 +2118,8 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) return ret; } -static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) +static struct hist_trigger_attrs * +parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) { struct hist_trigger_attrs *attrs; int ret = 0; @@ -2092,7 +2132,7 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) char *str = strsep(&trigger_str, ":"); if (strchr(str, '=')) { - ret = parse_assignment(str, attrs); + ret = parse_assignment(tr, str, attrs); if (ret) goto free; } else if (strcmp(str, "pause") == 0) @@ -2648,6 +2688,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, char *var_name) { struct hist_field *var_field = NULL, *ref_field = NULL; + struct trace_array *tr = hist_data->event_file->tr; if (!is_var_ref(var_name)) return NULL; @@ -2660,8 +2701,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, system, event_name); if (!ref_field) - hist_err_event("Couldn't find variable: $", - system, event_name, var_name); + hist_err(tr, HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); return ref_field; } @@ -2672,6 +2712,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, { struct ftrace_event_field *field = NULL; char *field_name, *modifier, *str; + struct trace_array *tr = file->tr; modifier = str = kstrdup(field_str, GFP_KERNEL); if (!modifier) @@ -2695,7 +2736,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else if (strcmp(modifier, "usecs") == 0) *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; else { - hist_err("Invalid field modifier: ", modifier); + hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); field = ERR_PTR(-EINVAL); goto out; } @@ -2711,7 +2752,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err("Couldn't find field: ", field_name); + hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); field = ERR_PTR(-EINVAL); goto out; } @@ -2773,7 +2814,8 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var); if (!s) { - hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); + hist_field = parse_var_ref(hist_data, ref_system, + ref_event, ref_var); if (hist_field) { if (var_name) { hist_field = create_alias(hist_data, hist_field, var_name); @@ -2822,7 +2864,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, /* we support only -(xxx) i.e. explicit parens required */ if (level > 3) { - hist_err("Too many subexpressions (3 max): ", str); + hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; } @@ -2877,7 +2919,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, return ERR_PTR(ret); } -static int check_expr_operands(struct hist_field *operand1, +static int check_expr_operands(struct trace_array *tr, + struct hist_field *operand1, struct hist_field *operand2) { unsigned long operand1_flags = operand1->flags; @@ -2905,7 +2948,7 @@ static int check_expr_operands(struct hist_field *operand1, if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { - hist_err("Timestamp units in expression don't match", NULL); + hist_err(tr, HIST_ERR_TIMESTAMP_MISMATCH, 0); return -EINVAL; } @@ -2923,7 +2966,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, char *sep, *operand1_str; if (level > 3) { - hist_err("Too many subexpressions (3 max): ", str); + hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } @@ -2968,7 +3011,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, goto free; } - ret = check_expr_operands(operand1, operand2); + ret = check_expr_operands(file->tr, operand1, operand2); if (ret) goto free; @@ -3161,16 +3204,14 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, int ret; if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { - hist_err_event("trace action: Too many field variables defined: ", - subsys_name, event_name, field_name); + hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); return ERR_PTR(-EINVAL); } file = event_file(tr, subsys_name, event_name); if (IS_ERR(file)) { - hist_err_event("trace action: Event file not found: ", - subsys_name, event_name, field_name); + hist_err(tr, HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); ret = PTR_ERR(file); return ERR_PTR(ret); } @@ -3183,8 +3224,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, */ hist_data = find_compatible_hist(target_hist_data, file); if (!hist_data) { - hist_err_event("trace action: Matching event histogram not found: ", - subsys_name, event_name, field_name); + hist_err(tr, HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3245,8 +3285,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); - hist_err_event("trace action: Couldn't create histogram for field: ", - subsys_name, event_name, field_name); + hist_err(tr, HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); return ERR_PTR(ret); } @@ -3258,8 +3297,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, if (IS_ERR_OR_NULL(event_var)) { kfree(var_hist->cmd); kfree(var_hist); - hist_err_event("trace action: Couldn't find synthetic variable: ", - subsys_name, event_name, field_name); + hist_err(tr, HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3392,25 +3430,26 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, { struct hist_field *val = NULL, *var = NULL; unsigned long flags = HIST_FIELD_FL_VAR; + struct trace_array *tr = file->tr; struct field_var *field_var; int ret = 0; if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { - hist_err("Too many field variables defined: ", field_name); + hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); ret = -EINVAL; goto err; } val = parse_atom(hist_data, file, field_name, &flags, NULL); if (IS_ERR(val)) { - hist_err("Couldn't parse field variable: ", field_name); + hist_err(tr, HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); ret = PTR_ERR(val); goto err; } var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { - hist_err("Couldn't create or find variable: ", field_name); + hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); kfree(val); ret = PTR_ERR(var); goto err; @@ -3737,19 +3776,20 @@ static int track_data_create(struct hist_trigger_data *hist_data, { struct hist_field *var_field, *ref_field, *track_var = NULL; struct trace_event_file *file = hist_data->event_file; + struct trace_array *tr = file->tr; char *track_data_var_str; int ret = 0; track_data_var_str = data->track_data.var_str; if (track_data_var_str[0] != '$') { - hist_err("For onmax(x) or onchange(x), x must be a variable: ", track_data_var_str); + hist_err(tr, HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); return -EINVAL; } track_data_var_str++; var_field = find_target_event_var(hist_data, NULL, NULL, track_data_var_str); if (!var_field) { - hist_err("Couldn't find onmax or onchange variable: ", track_data_var_str); + hist_err(tr, HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); return -EINVAL; } @@ -3762,7 +3802,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONMAX) track_var = create_var(hist_data, file, "__max", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err("Couldn't create onmax variable: ", "__max"); + hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3770,7 +3810,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONCHANGE) track_var = create_var(hist_data, file, "__change", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err("Couldn't create onchange variable: ", "__change"); + hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3781,7 +3821,8 @@ static int track_data_create(struct hist_trigger_data *hist_data, return ret; } -static int parse_action_params(char *params, struct action_data *data) +static int parse_action_params(struct trace_array *tr, char *params, + struct action_data *data) { char *param, *saved_param; bool first_param = true; @@ -3789,20 +3830,20 @@ static int parse_action_params(char *params, struct action_data *data) while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { - hist_err("Too many action params", ""); + hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); goto out; } param = strsep(¶ms, ","); if (!param) { - hist_err("No action param found", ""); + hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, 0); ret = -EINVAL; goto out; } param = strstrip(param); if (strlen(param) < 2) { - hist_err("Invalid action param: ", param); + hist_err(tr, HIST_ERR_INVALID_PARAM, errpos(param)); ret = -EINVAL; goto out; } @@ -3826,7 +3867,7 @@ static int parse_action_params(char *params, struct action_data *data) return ret; } -static int action_parse(char *str, struct action_data *data, +static int action_parse(struct trace_array *tr, char *str, struct action_data *data, enum handler_id handler) { char *action_name; @@ -3834,14 +3875,14 @@ static int action_parse(char *str, struct action_data *data, strsep(&str, "."); if (!str) { - hist_err("action parsing: No action found", ""); + hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } action_name = strsep(&str, "("); if (!action_name || !str) { - hist_err("action parsing: No action found", ""); + hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } @@ -3850,12 +3891,12 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!params) { - hist_err("action parsing: No params found for %s", "save"); + hist_err(tr, HIST_ERR_NO_SAVE_PARAMS, 0); ret = -EINVAL; goto out; } - ret = parse_action_params(params, data); + ret = parse_action_params(tr, params, data); if (ret) goto out; @@ -3864,7 +3905,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err("action parsing: Handler doesn't support action: ", action_name); + hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3876,7 +3917,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!str) { - hist_err("action parsing: No closing paren found: %s", params); + hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(params)); ret = -EINVAL; goto out; } @@ -3886,7 +3927,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err("action parsing: Handler doesn't support action: ", action_name); + hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3901,7 +3942,7 @@ static int action_parse(char *str, struct action_data *data, data->use_trace_keyword = true; if (params) { - ret = parse_action_params(params, data); + ret = parse_action_params(tr, params, data); if (ret) goto out; } @@ -3954,7 +3995,7 @@ static struct action_data *track_data_parse(struct hist_trigger_data *hist_data, goto free; } - ret = action_parse(str, data, handler); + ret = action_parse(hist_data->event_file->tr, str, data, handler); if (ret) goto free; out: @@ -4024,6 +4065,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, struct action_data *data, char *system, char *event, char *var) { + struct trace_array *tr = hist_data->event_file->tr; struct hist_field *hist_field; var++; /* skip '$' */ @@ -4039,7 +4081,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, } if (!hist_field) - hist_err_event("trace action: Couldn't find param: $", system, event, var); + hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, errpos(var)); return hist_field; } @@ -4097,6 +4139,7 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data, static int trace_action_create(struct hist_trigger_data *hist_data, struct action_data *data) { + struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; unsigned int i, var_ref_idx; @@ -4114,7 +4157,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event = find_synth_event(synth_event_name); if (!event) { - hist_err("trace action: Couldn't find synthetic event: ", synth_event_name); + hist_err(tr, HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); return -EINVAL; } @@ -4175,15 +4218,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, continue; } - hist_err_event("trace action: Param type doesn't match synthetic event field type: ", - system, event_name, param); + hist_err(tr, HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); kfree(p); ret = -EINVAL; goto err; } if (field_pos != event->n_fields) { - hist_err("trace action: Param count doesn't match synthetic event field count: ", event->name); + hist_err(tr, HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); ret = -EINVAL; goto err; } @@ -4202,6 +4244,7 @@ static int action_create(struct hist_trigger_data *hist_data, struct action_data *data) { struct trace_event_file *file = hist_data->event_file; + struct trace_array *tr = file->tr; struct track_data *track_data; struct field_var *field_var; unsigned int i; @@ -4229,7 +4272,7 @@ static int action_create(struct hist_trigger_data *hist_data, if (data->action == ACTION_SAVE) { if (hist_data->n_save_vars) { ret = -EEXIST; - hist_err("save action: Can't have more than one save() action per hist", ""); + hist_err(tr, HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); goto out; } @@ -4242,7 +4285,8 @@ static int action_create(struct hist_trigger_data *hist_data, field_var = create_target_field_var(hist_data, NULL, NULL, param); if (IS_ERR(field_var)) { - hist_err("save action: Couldn't create field variable: ", param); + hist_err(tr, HIST_ERR_FIELD_VAR_CREATE_FAIL, + errpos(param)); ret = PTR_ERR(field_var); kfree(param); goto out; @@ -4276,19 +4320,18 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) match_event = strsep(&str, ")"); if (!match_event || !str) { - hist_err("onmatch: Missing closing paren: ", match_event); + hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); goto free; } match_event_system = strsep(&match_event, "."); if (!match_event) { - hist_err("onmatch: Missing subsystem for match event: ", match_event_system); + hist_err(tr, HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); goto free; } if (IS_ERR(event_file(tr, match_event_system, match_event))) { - hist_err_event("onmatch: Invalid subsystem or event name: ", - match_event_system, match_event, NULL); + hist_err(tr, HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); goto free; } @@ -4304,7 +4347,7 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) goto free; } - ret = action_parse(str, data, HANDLER_ONMATCH); + ret = action_parse(tr, str, data, HANDLER_ONMATCH); if (ret) goto free; out: @@ -4373,13 +4416,14 @@ static int create_var_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *var_name, char *expr_str) { + struct trace_array *tr = hist_data->event_file->tr; unsigned long flags = 0; if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) return -EINVAL; if (find_var(hist_data, file, var_name) && !hist_data->remove) { - hist_err("Variable already defined: ", var_name); + hist_err(tr, HIST_ERR_DUPLICATE_VAR, errpos(var_name)); return -EINVAL; } @@ -4436,8 +4480,8 @@ static int create_key_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *field_str) { + struct trace_array *tr = hist_data->event_file->tr; struct hist_field *hist_field = NULL; - unsigned long flags = 0; unsigned int key_size; int ret = 0; @@ -4460,7 +4504,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { - hist_err("Using variable references as keys not supported: ", field_str); + hist_err(tr, HIST_ERR_INVALID_REF_KEY, errpos(field_str)); destroy_hist_field(hist_field, 0); ret = -EINVAL; goto out; @@ -4561,6 +4605,7 @@ static void free_var_defs(struct hist_trigger_data *hist_data) static int parse_var_defs(struct hist_trigger_data *hist_data) { + struct trace_array *tr = hist_data->event_file->tr; char *s, *str, *var_name, *field_str; unsigned int i, j, n_vars = 0; int ret = 0; @@ -4574,13 +4619,14 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) var_name = strsep(&field_str, "="); if (!var_name || !field_str) { - hist_err("Malformed assignment: ", var_name); + hist_err(tr, HIST_ERR_MALFORMED_ASSIGNMENT, + errpos(var_name)); ret = -EINVAL; goto free; } if (n_vars == TRACING_MAP_VARS_MAX) { - hist_err("Too many variables defined: ", var_name); + hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(var_name)); ret = -EINVAL; goto free; } @@ -5431,11 +5477,6 @@ static int hist_show(struct seq_file *m, void *v) hist_trigger_show(m, data, n++); } - if (have_hist_err()) { - seq_printf(m, "\nERROR: %s\n", hist_err_str); - seq_printf(m, " Last command: %s\n", last_hist_cmd); - } - out_unlock: mutex_unlock(&event_mutex); @@ -5800,6 +5841,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, { struct hist_trigger_data *hist_data = data->private_data; struct event_trigger_data *test, *named_data = NULL; + struct trace_array *tr = file->tr; int ret = 0; if (hist_data->attrs->name) { @@ -5807,7 +5849,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (named_data) { if (!hist_trigger_match(data, named_data, named_data, true)) { - hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name); + hist_err(tr, HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); ret = -EINVAL; goto out; } @@ -5828,7 +5870,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, else if (hist_data->attrs->clear) hist_clear(test); else { - hist_err("Hist trigger already exists", NULL); + hist_err(tr, HIST_ERR_TRIGGER_EEXIST, 0); ret = -EEXIST; } goto out; @@ -5836,7 +5878,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, } new: if (hist_data->attrs->cont || hist_data->attrs->clear) { - hist_err("Can't clear or continue a nonexistent hist trigger", NULL); + hist_err(tr, HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); ret = -ENOENT; goto out; } @@ -5861,7 +5903,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, ret = tracing_set_clock(file->tr, hist_data->attrs->clock); if (ret) { - hist_err("Couldn't set trace_clock: ", clock); + hist_err(tr, HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); goto out; } @@ -6037,8 +6079,8 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, lockdep_assert_held(&event_mutex); if (glob && strlen(glob)) { - last_cmd_set(param); hist_err_clear(); + last_cmd_set(file, param); } if (!param) @@ -6079,7 +6121,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, trigger = strstrip(trigger); } - attrs = parse_hist_trigger_attrs(trigger); + attrs = parse_hist_trigger_attrs(file->tr, trigger); if (IS_ERR(attrs)) return PTR_ERR(attrs); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index cd12ecb66eb9..2a2912cb4533 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -731,7 +731,8 @@ int set_trigger_filter(char *filter_str, goto out; /* The filter is for the 'trigger' event, not the triggered event */ - ret = create_event_filter(file->event_call, filter_str, false, &filter); + ret = create_event_filter(file->tr, file->event_call, + filter_str, false, &filter); /* * If create_event_filter() fails, filter still needs to be freed. * Which the calling code will do with data->filter. diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 810d78a8d14c..6c1ae6b752d1 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -17,29 +17,25 @@ #include "trace.h" #include "trace_output.h" -static void ftrace_dump_buf(int skip_lines, long cpu_file) +static struct trace_iterator iter; +static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; + +static void ftrace_dump_buf(int skip_entries, long cpu_file) { - /* use static because iter can be a bit big for the stack */ - static struct trace_iterator iter; - static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; struct trace_array *tr; unsigned int old_userobj; int cnt = 0, cpu; - trace_init_global_iter(&iter); - iter.buffer_iter = buffer_iter; tr = iter.tr; - for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - old_userobj = tr->trace_flags; /* don't look at user memory in panic mode */ tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; kdb_printf("Dumping ftrace buffer:\n"); + if (skip_entries) + kdb_printf("(skipping %d entries)\n", skip_entries); /* reset all but tr, trace, and overruns */ memset(&iter.seq, 0, @@ -70,11 +66,11 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) kdb_printf("---------------------------------\n"); cnt++; - if (!skip_lines) { + if (!skip_entries) { print_trace_line(&iter); trace_printk_seq(&iter.seq); } else { - skip_lines--; + skip_entries--; } if (KDB_FLAG(CMD_INTERRUPT)) @@ -90,10 +86,6 @@ out: tr->trace_flags = old_userobj; for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - - for_each_tracing_cpu(cpu) { if (iter.buffer_iter[cpu]) { ring_buffer_read_finish(iter.buffer_iter[cpu]); iter.buffer_iter[cpu] = NULL; @@ -106,17 +98,19 @@ out: */ static int kdb_ftdump(int argc, const char **argv) { - int skip_lines = 0; + int skip_entries = 0; long cpu_file; char *cp; + int cnt; + int cpu; if (argc > 2) return KDB_ARGCOUNT; if (argc) { - skip_lines = simple_strtol(argv[1], &cp, 0); + skip_entries = simple_strtol(argv[1], &cp, 0); if (*cp) - skip_lines = 0; + skip_entries = 0; } if (argc == 2) { @@ -129,7 +123,29 @@ static int kdb_ftdump(int argc, const char **argv) } kdb_trap_printk++; - ftrace_dump_buf(skip_lines, cpu_file); + + trace_init_global_iter(&iter); + iter.buffer_iter = buffer_iter; + + for_each_tracing_cpu(cpu) { + atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } + + /* A negative skip_entries means skip all but the last entries */ + if (skip_entries < 0) { + if (cpu_file == RING_BUFFER_ALL_CPUS) + cnt = trace_total_entries(NULL); + else + cnt = trace_total_entries_cpu(NULL, cpu_file); + skip_entries = max(cnt + skip_entries, 0); + } + + ftrace_dump_buf(skip_entries, cpu_file); + + for_each_tracing_cpu(cpu) { + atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } + kdb_trap_printk--; return 0; @@ -137,8 +153,9 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { - kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", - "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); + kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]", + "Dump ftrace log; -skip dumps last #entries", 0, + KDB_ENABLE_ALWAYS_SAFE); return 0; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5d5129b05df7..7d736248a070 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -441,13 +441,8 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(&tk->rp.kp); - if (ret == 0) { + if (ret == 0) tk->tp.flags |= TP_FLAG_REGISTERED; - } else if (ret == -EILSEQ) { - pr_warn("Probing address(0x%p) is not an instruction boundary.\n", - tk->rp.kp.addr); - ret = -EINVAL; - } return ret; } @@ -591,7 +586,7 @@ static int trace_kprobe_create(int argc, const char *argv[]) * Type of args: * FETCHARG:TYPE : use TYPE instead of unsigned long. */ - struct trace_kprobe *tk; + struct trace_kprobe *tk = NULL; int i, len, ret = 0; bool is_return = false; char *symbol = NULL, *tmp = NULL; @@ -615,44 +610,50 @@ static int trace_kprobe_create(int argc, const char *argv[]) if (argc < 2) return -ECANCELED; + trace_probe_log_init("trace_kprobe", argc, argv); + event = strchr(&argv[0][1], ':'); if (event) event++; if (isdigit(argv[0][1])) { if (!is_return) { - pr_info("Maxactive is not for kprobe"); - return -EINVAL; + trace_probe_log_err(1, MAXACT_NO_KPROBE); + goto parse_error; } if (event) len = event - &argv[0][1] - 1; else len = strlen(&argv[0][1]); - if (len > MAX_EVENT_NAME_LEN - 1) - return -E2BIG; + if (len > MAX_EVENT_NAME_LEN - 1) { + trace_probe_log_err(1, BAD_MAXACT); + goto parse_error; + } memcpy(buf, &argv[0][1], len); buf[len] = '\0'; ret = kstrtouint(buf, 0, &maxactive); if (ret || !maxactive) { - pr_info("Invalid maxactive number\n"); - return ret; + trace_probe_log_err(1, BAD_MAXACT); + goto parse_error; } /* kretprobes instances are iterated over via a list. The * maximum should stay reasonable. */ if (maxactive > KRETPROBE_MAXACTIVE_MAX) { - pr_info("Maxactive is too big (%d > %d).\n", - maxactive, KRETPROBE_MAXACTIVE_MAX); - return -E2BIG; + trace_probe_log_err(1, MAXACT_TOO_BIG); + goto parse_error; } } /* try to parse an address. if that fails, try to read the * input as a symbol. */ if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) { + trace_probe_log_set_index(1); /* Check whether uprobe event specified */ - if (strchr(argv[1], '/') && strchr(argv[1], ':')) - return -ECANCELED; + if (strchr(argv[1], '/') && strchr(argv[1], ':')) { + ret = -ECANCELED; + goto error; + } /* a symbol specified */ symbol = kstrdup(argv[1], GFP_KERNEL); if (!symbol) @@ -660,23 +661,23 @@ static int trace_kprobe_create(int argc, const char *argv[]) /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); if (ret || offset < 0 || offset > UINT_MAX) { - pr_info("Failed to parse either an address or a symbol.\n"); - goto out; + trace_probe_log_err(0, BAD_PROBE_ADDR); + goto parse_error; } if (kprobe_on_func_entry(NULL, symbol, offset)) flags |= TPARG_FL_FENTRY; if (offset && is_return && !(flags & TPARG_FL_FENTRY)) { - pr_info("Given offset is not valid for return probe.\n"); - ret = -EINVAL; - goto out; + trace_probe_log_err(0, BAD_RETPROBE); + goto parse_error; } } - argc -= 2; argv += 2; + trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf); + ret = traceprobe_parse_event_name(&event, &group, buf, + event - argv[0]); if (ret) - goto out; + goto parse_error; } else { /* Make a new event name */ if (symbol) @@ -691,13 +692,14 @@ static int trace_kprobe_create(int argc, const char *argv[]) /* setup a probe */ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, - argc, is_return); + argc - 2, is_return); if (IS_ERR(tk)) { ret = PTR_ERR(tk); - /* This must return -ENOMEM otherwise there is a bug */ + /* This must return -ENOMEM, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM); - goto out; + goto out; /* We know tk is not allocated */ } + argc -= 2; argv += 2; /* parse arguments */ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { @@ -707,19 +709,32 @@ static int trace_kprobe_create(int argc, const char *argv[]) goto error; } + trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags); kfree(tmp); if (ret) - goto error; + goto error; /* This can be -ENOMEM */ } ret = register_trace_kprobe(tk); - if (ret) + if (ret) { + trace_probe_log_set_index(1); + if (ret == -EILSEQ) + trace_probe_log_err(0, BAD_INSN_BNDRY); + else if (ret == -ENOENT) + trace_probe_log_err(0, BAD_PROBE_ADDR); + else if (ret != -ENOMEM) + trace_probe_log_err(0, FAIL_REG_PROBE); goto error; + } + out: + trace_probe_log_clear(); kfree(symbol); return ret; +parse_error: + ret = -EINVAL; error: free_trace_kprobe(tk); goto out; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8f8411e7835f..a347faced959 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -13,6 +13,11 @@ #include "trace_probe.h" +#undef C +#define C(a, b) b + +static const char *trace_probe_err_text[] = { ERRORS }; + static const char *reserved_field_names[] = { "common_type", "common_flags", @@ -133,6 +138,60 @@ fail: return NULL; } +static struct trace_probe_log trace_probe_log; + +void trace_probe_log_init(const char *subsystem, int argc, const char **argv) +{ + trace_probe_log.subsystem = subsystem; + trace_probe_log.argc = argc; + trace_probe_log.argv = argv; + trace_probe_log.index = 0; +} + +void trace_probe_log_clear(void) +{ + memset(&trace_probe_log, 0, sizeof(trace_probe_log)); +} + +void trace_probe_log_set_index(int index) +{ + trace_probe_log.index = index; +} + +void __trace_probe_log_err(int offset, int err_type) +{ + char *command, *p; + int i, len = 0, pos = 0; + + if (!trace_probe_log.argv) + return; + + /* Recalcurate the length and allocate buffer */ + for (i = 0; i < trace_probe_log.argc; i++) { + if (i == trace_probe_log.index) + pos = len; + len += strlen(trace_probe_log.argv[i]) + 1; + } + command = kzalloc(len, GFP_KERNEL); + if (!command) + return; + + /* And make a command string from argv array */ + p = command; + for (i = 0; i < trace_probe_log.argc; i++) { + len = strlen(trace_probe_log.argv[i]); + strcpy(p, trace_probe_log.argv[i]); + p[len] = ' '; + p += len + 1; + } + *(p - 1) = '\0'; + + tracing_log_err(NULL, trace_probe_log.subsystem, command, + trace_probe_err_text, err_type, pos + offset); + + kfree(command); +} + /* Split symbol and offset. */ int traceprobe_split_symbol_offset(char *symbol, long *offset) { @@ -156,7 +215,7 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) /* @buf must has MAX_EVENT_NAME_LEN size */ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, - char *buf) + char *buf, int offset) { const char *slash, *event = *pevent; int len; @@ -164,32 +223,33 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, slash = strchr(event, '/'); if (slash) { if (slash == event) { - pr_info("Group name is not specified\n"); + trace_probe_log_err(offset, NO_GROUP_NAME); return -EINVAL; } if (slash - event + 1 > MAX_EVENT_NAME_LEN) { - pr_info("Group name is too long\n"); - return -E2BIG; + trace_probe_log_err(offset, GROUP_TOO_LONG); + return -EINVAL; } strlcpy(buf, event, slash - event + 1); if (!is_good_name(buf)) { - pr_info("Group name must follow the same rules as C identifiers\n"); + trace_probe_log_err(offset, BAD_GROUP_NAME); return -EINVAL; } *pgroup = buf; *pevent = slash + 1; + offset += slash - event + 1; event = *pevent; } len = strlen(event); if (len == 0) { - pr_info("Event name is not specified\n"); + trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; } else if (len > MAX_EVENT_NAME_LEN) { - pr_info("Event name is too long\n"); - return -E2BIG; + trace_probe_log_err(offset, EVENT_TOO_LONG); + return -EINVAL; } if (!is_good_name(event)) { - pr_info("Event name must follow the same rules as C identifiers\n"); + trace_probe_log_err(offset, BAD_EVENT_NAME); return -EINVAL; } return 0; @@ -198,56 +258,67 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_insn *code, unsigned int flags) + struct fetch_insn *code, unsigned int flags, int offs) { unsigned long param; int ret = 0; int len; if (strcmp(arg, "retval") == 0) { - if (flags & TPARG_FL_RETURN) + if (flags & TPARG_FL_RETURN) { code->op = FETCH_OP_RETVAL; - else + } else { + trace_probe_log_err(offs, RETVAL_ON_PROBE); ret = -EINVAL; + } } else if ((len = str_has_prefix(arg, "stack"))) { if (arg[len] == '\0') { code->op = FETCH_OP_STACKP; } else if (isdigit(arg[len])) { ret = kstrtoul(arg + len, 10, ¶m); - if (ret || ((flags & TPARG_FL_KERNEL) && - param > PARAM_MAX_STACK)) + if (ret) { + goto inval_var; + } else if ((flags & TPARG_FL_KERNEL) && + param > PARAM_MAX_STACK) { + trace_probe_log_err(offs, BAD_STACK_NUM); ret = -EINVAL; - else { + } else { code->op = FETCH_OP_STACK; code->param = (unsigned int)param; } } else - ret = -EINVAL; + goto inval_var; } else if (strcmp(arg, "comm") == 0) { code->op = FETCH_OP_COMM; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API } else if (((flags & TPARG_FL_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) && (len = str_has_prefix(arg, "arg"))) { - if (!isdigit(arg[len])) - return -EINVAL; ret = kstrtoul(arg + len, 10, ¶m); - if (ret || !param || param > PARAM_MAX_STACK) + if (ret) { + goto inval_var; + } else if (!param || param > PARAM_MAX_STACK) { + trace_probe_log_err(offs, BAD_ARG_NUM); return -EINVAL; + } code->op = FETCH_OP_ARG; code->param = (unsigned int)param - 1; #endif } else - ret = -EINVAL; + goto inval_var; return ret; + +inval_var: + trace_probe_log_err(offs, BAD_VAR); + return -EINVAL; } /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, struct fetch_insn **pcode, struct fetch_insn *end, - unsigned int flags) + unsigned int flags, int offs) { struct fetch_insn *code = *pcode; unsigned long param; @@ -257,7 +328,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, type, code, flags); + ret = parse_probe_vars(arg + 1, type, code, flags, offs); break; case '%': /* named register */ @@ -266,47 +337,57 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->op = FETCH_OP_REG; code->param = (unsigned int)ret; ret = 0; - } + } else + trace_probe_log_err(offs, BAD_REG_NAME); break; case '@': /* memory, file-offset or symbol */ if (isdigit(arg[1])) { ret = kstrtoul(arg + 1, 0, ¶m); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_MEM_ADDR); break; + } /* load address */ code->op = FETCH_OP_IMM; code->immediate = param; } else if (arg[1] == '+') { /* kprobes don't support file offsets */ - if (flags & TPARG_FL_KERNEL) + if (flags & TPARG_FL_KERNEL) { + trace_probe_log_err(offs, FILE_ON_KPROBE); return -EINVAL; - + } ret = kstrtol(arg + 2, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_FILE_OFFS); break; + } code->op = FETCH_OP_FOFFS; code->immediate = (unsigned long)offset; // imm64? } else { /* uprobes don't support symbols */ - if (!(flags & TPARG_FL_KERNEL)) + if (!(flags & TPARG_FL_KERNEL)) { + trace_probe_log_err(offs, SYM_ON_UPROBE); return -EINVAL; - + } /* Preserve symbol for updating */ code->op = FETCH_NOP_SYMBOL; code->data = kstrdup(arg + 1, GFP_KERNEL); if (!code->data) return -ENOMEM; - if (++code == end) - return -E2BIG; - + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } code->op = FETCH_OP_IMM; code->immediate = 0; } /* These are fetching from memory */ - if (++code == end) - return -E2BIG; + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } *pcode = code; code->op = FETCH_OP_DEREF; code->offset = offset; @@ -317,28 +398,38 @@ parse_probe_arg(char *arg, const struct fetch_type *type, /* fall through */ case '-': tmp = strchr(arg, '('); - if (!tmp) + if (!tmp) { + trace_probe_log_err(offs, DEREF_NEED_BRACE); return -EINVAL; - + } *tmp = '\0'; ret = kstrtol(arg, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_DEREF_OFFS); break; - + } + offs += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0); arg = tmp + 1; tmp = strrchr(arg, ')'); - - if (tmp) { + if (!tmp) { + trace_probe_log_err(offs + strlen(arg), + DEREF_OPEN_BRACE); + return -EINVAL; + } else { const struct fetch_type *t2 = find_fetch_type(NULL); *tmp = '\0'; - ret = parse_probe_arg(arg, t2, &code, end, flags); + ret = parse_probe_arg(arg, t2, &code, end, flags, offs); if (ret) break; - if (code->op == FETCH_OP_COMM) + if (code->op == FETCH_OP_COMM) { + trace_probe_log_err(offs, COMM_CANT_DEREF); return -EINVAL; - if (++code == end) - return -E2BIG; + } + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } *pcode = code; code->op = FETCH_OP_DEREF; @@ -348,6 +439,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, } if (!ret && code->op == FETCH_OP_NOP) { /* Parsed, but do not find fetch method */ + trace_probe_log_err(offs, BAD_FETCH_ARG); ret = -EINVAL; } return ret; @@ -379,7 +471,7 @@ static int __parse_bitfield_probe_arg(const char *bf, return -EINVAL; code++; if (code->op != FETCH_OP_NOP) - return -E2BIG; + return -EINVAL; *pcode = code; code->op = FETCH_OP_MOD_BF; @@ -392,44 +484,66 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, - struct probe_arg *parg, unsigned int flags) + struct probe_arg *parg, unsigned int flags, int offset) { struct fetch_insn *code, *scode, *tmp = NULL; - char *t, *t2; + char *t, *t2, *t3; int ret, len; - if (strlen(arg) > MAX_ARGSTR_LEN) { - pr_info("Argument is too long.: %s\n", arg); - return -ENOSPC; + len = strlen(arg); + if (len > MAX_ARGSTR_LEN) { + trace_probe_log_err(offset, ARG_TOO_LONG); + return -EINVAL; + } else if (len == 0) { + trace_probe_log_err(offset, NO_ARG_BODY); + return -EINVAL; } + parg->comm = kstrdup(arg, GFP_KERNEL); - if (!parg->comm) { - pr_info("Failed to allocate memory for command '%s'.\n", arg); + if (!parg->comm) return -ENOMEM; - } + t = strchr(arg, ':'); if (t) { *t = '\0'; t2 = strchr(++t, '['); if (t2) { - *t2 = '\0'; - parg->count = simple_strtoul(t2 + 1, &t2, 0); - if (strcmp(t2, "]") || parg->count == 0) + *t2++ = '\0'; + t3 = strchr(t2, ']'); + if (!t3) { + offset += t2 + strlen(t2) - arg; + trace_probe_log_err(offset, + ARRAY_NO_CLOSE); + return -EINVAL; + } else if (t3[1] != '\0') { + trace_probe_log_err(offset + t3 + 1 - arg, + BAD_ARRAY_SUFFIX); return -EINVAL; - if (parg->count > MAX_ARRAY_LEN) - return -E2BIG; + } + *t3 = '\0'; + if (kstrtouint(t2, 0, &parg->count) || !parg->count) { + trace_probe_log_err(offset + t2 - arg, + BAD_ARRAY_NUM); + return -EINVAL; + } + if (parg->count > MAX_ARRAY_LEN) { + trace_probe_log_err(offset + t2 - arg, + ARRAY_TOO_BIG); + return -EINVAL; + } } } - /* - * The default type of $comm should be "string", and it can't be - * dereferenced. - */ - if (!t && strcmp(arg, "$comm") == 0) + + /* Since $comm can not be dereferred, we can find $comm by strcmp */ + if (strcmp(arg, "$comm") == 0) { + /* The type of $comm must be "string", and not an array. */ + if (parg->count || (t && strcmp(t, "string"))) + return -EINVAL; parg->type = find_fetch_type("string"); - else + } else parg->type = find_fetch_type(t); if (!parg->type) { - pr_info("Unsupported type: %s\n", t); + trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE); return -EINVAL; } parg->offset = *size; @@ -444,13 +558,13 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, parg->count); } - code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); + code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); if (!code) return -ENOMEM; code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], - flags); + flags, offset); if (ret) goto fail; @@ -458,7 +572,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if (!strcmp(parg->type->name, "string")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { - pr_info("string only accepts memory or address.\n"); + trace_probe_log_err(offset + (t ? (t - arg) : 0), + BAD_STRING); ret = -EINVAL; goto fail; } @@ -470,7 +585,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, */ code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } } @@ -483,7 +599,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } else { code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } code->op = FETCH_OP_ST_RAW; @@ -493,20 +610,24 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, /* Modify operation */ if (t != NULL) { ret = __parse_bitfield_probe_arg(t, parg->type, &code); - if (ret) + if (ret) { + trace_probe_log_err(offset + t - arg, BAD_BITFIELD); goto fail; + } } /* Loop(Array) operation */ if (parg->count) { if (scode->op != FETCH_OP_ST_MEM && scode->op != FETCH_OP_ST_STRING) { - pr_info("array only accepts memory or address\n"); + trace_probe_log_err(offset + (t ? (t - arg) : 0), + BAD_STRING); ret = -EINVAL; goto fail; } code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } code->op = FETCH_OP_LP_ARRAY; @@ -516,7 +637,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, code->op = FETCH_OP_END; /* Shrink down the code buffer */ - parg->code = kzalloc(sizeof(*code) * (code - tmp + 1), GFP_KERNEL); + parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL); if (!parg->code) ret = -ENOMEM; else @@ -555,15 +676,19 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg, { struct probe_arg *parg = &tp->args[i]; char *body; - int ret; /* Increment count for freeing args in error case */ tp->nr_args++; body = strchr(arg, '='); if (body) { - if (body - arg > MAX_ARG_NAME_LEN || body == arg) + if (body - arg > MAX_ARG_NAME_LEN) { + trace_probe_log_err(0, ARG_NAME_TOO_LONG); + return -EINVAL; + } else if (body == arg) { + trace_probe_log_err(0, NO_ARG_NAME); return -EINVAL; + } parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL); body++; } else { @@ -575,22 +700,16 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg, return -ENOMEM; if (!is_good_name(parg->name)) { - pr_info("Invalid argument[%d] name: %s\n", - i, parg->name); + trace_probe_log_err(0, BAD_ARG_NAME); return -EINVAL; } - if (traceprobe_conflict_field_name(parg->name, tp->args, i)) { - pr_info("Argument[%d]: '%s' conflicts with another field.\n", - i, parg->name); + trace_probe_log_err(0, USED_ARG_NAME); return -EINVAL; } - /* Parse fetch argument */ - ret = traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags); - if (ret) - pr_info("Parse error at argument[%d]. (%d)\n", i, ret); - return ret; + return traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags, + body - arg); } void traceprobe_free_probe_arg(struct probe_arg *arg) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2177c206de15..f9a8c632188b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -124,6 +124,7 @@ struct fetch_insn { /* fetch + deref*N + store + mod + end <= 16, this allows N=12, enough */ #define FETCH_INSN_MAX 16 +#define FETCH_TOKEN_COMM (-ECOMM) /* Fetch type information table */ struct fetch_type { @@ -280,8 +281,8 @@ extern int traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, long *offset); -extern int traceprobe_parse_event_name(const char **pevent, - const char **pgroup, char *buf); +int traceprobe_parse_event_name(const char **pevent, const char **pgroup, + char *buf, int offset); extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return); @@ -298,3 +299,76 @@ extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, size_t offset, struct trace_probe *tp); + +#undef ERRORS +#define ERRORS \ + C(FILE_NOT_FOUND, "Failed to find the given file"), \ + C(NO_REGULAR_FILE, "Not a regular file"), \ + C(BAD_REFCNT, "Invalid reference counter offset"), \ + C(REFCNT_OPEN_BRACE, "Reference counter brace is not closed"), \ + C(BAD_REFCNT_SUFFIX, "Reference counter has wrong suffix"), \ + C(BAD_UPROBE_OFFS, "Invalid uprobe offset"), \ + C(MAXACT_NO_KPROBE, "Maxactive is not for kprobe"), \ + C(BAD_MAXACT, "Invalid maxactive number"), \ + C(MAXACT_TOO_BIG, "Maxactive is too big"), \ + C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \ + C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ + C(NO_GROUP_NAME, "Group name is not specified"), \ + C(GROUP_TOO_LONG, "Group name is too long"), \ + C(BAD_GROUP_NAME, "Group name must follow the same rules as C identifiers"), \ + C(NO_EVENT_NAME, "Event name is not specified"), \ + C(EVENT_TOO_LONG, "Event name is too long"), \ + C(BAD_EVENT_NAME, "Event name must follow the same rules as C identifiers"), \ + C(RETVAL_ON_PROBE, "$retval is not available on probe"), \ + C(BAD_STACK_NUM, "Invalid stack number"), \ + C(BAD_ARG_NUM, "Invalid argument number"), \ + C(BAD_VAR, "Invalid $-valiable specified"), \ + C(BAD_REG_NAME, "Invalid register name"), \ + C(BAD_MEM_ADDR, "Invalid memory address"), \ + C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ + C(BAD_FILE_OFFS, "Invalid file offset value"), \ + C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ + C(TOO_MANY_OPS, "Dereference is too much nested"), \ + C(DEREF_NEED_BRACE, "Dereference needs a brace"), \ + C(BAD_DEREF_OFFS, "Invalid dereference offset"), \ + C(DEREF_OPEN_BRACE, "Dereference brace is not closed"), \ + C(COMM_CANT_DEREF, "$comm can not be dereferenced"), \ + C(BAD_FETCH_ARG, "Invalid fetch argument"), \ + C(ARRAY_NO_CLOSE, "Array is not closed"), \ + C(BAD_ARRAY_SUFFIX, "Array has wrong suffix"), \ + C(BAD_ARRAY_NUM, "Invalid array size"), \ + C(ARRAY_TOO_BIG, "Array number is too big"), \ + C(BAD_TYPE, "Unknown type is specified"), \ + C(BAD_STRING, "String accepts only memory argument"), \ + C(BAD_BITFIELD, "Invalid bitfield"), \ + C(ARG_NAME_TOO_LONG, "Argument name is too long"), \ + C(NO_ARG_NAME, "Argument name is not specified"), \ + C(BAD_ARG_NAME, "Argument name must follow the same rules as C identifiers"), \ + C(USED_ARG_NAME, "This argument name is already used"), \ + C(ARG_TOO_LONG, "Argument expression is too long"), \ + C(NO_ARG_BODY, "No argument expression"), \ + C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ + C(FAIL_REG_PROBE, "Failed to register probe event"), + +#undef C +#define C(a, b) TP_ERR_##a + +/* Define TP_ERR_ */ +enum { ERRORS }; + +/* Error text is defined in trace_probe.c */ + +struct trace_probe_log { + const char *subsystem; + const char **argv; + int argc; + int index; +}; + +void trace_probe_log_init(const char *subsystem, int argc, const char **argv); +void trace_probe_log_set_index(int index); +void trace_probe_log_clear(void); +void __trace_probe_log_err(int offset, int err); + +#define trace_probe_log_err(offs, err) \ + __trace_probe_log_err(offs, TP_ERR_##err) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 4737bb8c07a3..c30c61f12ddd 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -88,7 +88,7 @@ stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { if (code->op == FETCH_OP_ST_STRING) { - ret += fetch_store_strlen(val + code->offset); + ret = fetch_store_strlen(val + code->offset); code++; goto array; } else diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 9d402e7fc949..69ee8ef12cee 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -792,7 +792,10 @@ trace_selftest_startup_function_graph(struct tracer *trace, /* check the trace buffer */ ret = trace_test_buffer(&tr->trace_buffer, &count); - trace->reset(tr); + /* Need to also simulate the tr->reset to remove this fgraph_ops */ + tracing_stop_cmdline_record(); + unregister_ftrace_graph(&fgraph_ops); + tracing_start(); if (!ret && !count) { diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index be78d99ee6bc..eb7e06b54741 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -156,7 +156,10 @@ fetch_store_string(unsigned long addr, void *dest, void *base) if (unlikely(!maxlen)) return -ENOMEM; - ret = strncpy_from_user(dst, src, maxlen); + if (addr == FETCH_TOKEN_COMM) + ret = strlcpy(dst, current->comm, maxlen); + else + ret = strncpy_from_user(dst, src, maxlen); if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; @@ -180,7 +183,10 @@ fetch_store_strlen(unsigned long addr) int len; void __user *vaddr = (void __force __user *) addr; - len = strnlen_user(vaddr, MAX_STRING_SIZE); + if (addr == FETCH_TOKEN_COMM) + len = strlen(current->comm) + 1; + else + len = strnlen_user(vaddr, MAX_STRING_SIZE); return (len > MAX_STRING_SIZE) ? 0 : len; } @@ -220,6 +226,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_IMM: val = code->immediate; break; + case FETCH_OP_COMM: + val = FETCH_TOKEN_COMM; + break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; @@ -457,13 +466,19 @@ static int trace_uprobe_create(int argc, const char **argv) return -ECANCELED; } + trace_probe_log_init("trace_uprobe", argc, argv); + trace_probe_log_set_index(1); /* filename is the 2nd argument */ + *arg++ = '\0'; ret = kern_path(filename, LOOKUP_FOLLOW, &path); if (ret) { + trace_probe_log_err(0, FILE_NOT_FOUND); kfree(filename); + trace_probe_log_clear(); return ret; } if (!d_is_reg(path.dentry)) { + trace_probe_log_err(0, NO_REGULAR_FILE); ret = -EINVAL; goto fail_address_parse; } @@ -472,9 +487,16 @@ static int trace_uprobe_create(int argc, const char **argv) rctr = strchr(arg, '('); if (rctr) { rctr_end = strchr(rctr, ')'); - if (rctr > rctr_end || *(rctr_end + 1) != 0) { + if (!rctr_end) { + ret = -EINVAL; + rctr_end = rctr + strlen(rctr); + trace_probe_log_err(rctr_end - filename, + REFCNT_OPEN_BRACE); + goto fail_address_parse; + } else if (rctr_end[1] != '\0') { ret = -EINVAL; - pr_info("Invalid reference counter offset.\n"); + trace_probe_log_err(rctr_end + 1 - filename, + BAD_REFCNT_SUFFIX); goto fail_address_parse; } @@ -482,22 +504,23 @@ static int trace_uprobe_create(int argc, const char **argv) *rctr_end = '\0'; ret = kstrtoul(rctr, 0, &ref_ctr_offset); if (ret) { - pr_info("Invalid reference counter offset.\n"); + trace_probe_log_err(rctr - filename, BAD_REFCNT); goto fail_address_parse; } } /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS); goto fail_address_parse; - - argc -= 2; - argv += 2; + } /* setup a probe */ + trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf); + ret = traceprobe_parse_event_name(&event, &group, buf, + event - argv[0]); if (ret) goto fail_address_parse; } else { @@ -519,6 +542,9 @@ static int trace_uprobe_create(int argc, const char **argv) kfree(tail); } + argc -= 2; + argv += 2; + tu = alloc_trace_uprobe(group, event, argc, is_return); if (IS_ERR(tu)) { ret = PTR_ERR(tu); @@ -539,6 +565,7 @@ static int trace_uprobe_create(int argc, const char **argv) goto error; } + trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp, is_return ? TPARG_FL_RETURN : 0); kfree(tmp); @@ -547,20 +574,20 @@ static int trace_uprobe_create(int argc, const char **argv) } ret = register_trace_uprobe(tu); - if (ret) - goto error; - return 0; + if (!ret) + goto out; error: free_trace_uprobe(tu); +out: + trace_probe_log_clear(); return ret; fail_address_parse: + trace_probe_log_clear(); path_put(&path); kfree(filename); - pr_info("Failed to parse address or file.\n"); - return ret; } diff --git a/kernel/user.c b/kernel/user.c index 0df9b1640b2a..88b834f0eebc 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -185,7 +185,7 @@ struct user_struct *alloc_uid(kuid_t uid) if (!up) { new = kmem_cache_zalloc(uid_cachep, GFP_KERNEL); if (!new) - goto out_unlock; + return NULL; new->uid = uid; refcount_set(&new->__count, 1); @@ -199,8 +199,6 @@ struct user_struct *alloc_uid(kuid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { - key_put(new->uid_keyring); - key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); } else { uid_hash_insert(new, hashent); @@ -210,9 +208,6 @@ struct user_struct *alloc_uid(kuid_t uid) } return up; - -out_unlock: - return NULL; } static int __init uid_cache_init(void) |