aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/acct.c20
-rw-r--r--kernel/capability.c1
-rw-r--r--kernel/cgroup.c5
-rw-r--r--kernel/cgroup_freezer.c21
-rw-r--r--kernel/compat.c25
-rw-r--r--kernel/cpu.c155
-rw-r--r--kernel/cpuset.c145
-rw-r--r--kernel/cred-internals.h21
-rw-r--r--kernel/cred.c65
-rw-r--r--kernel/debug/Makefile6
-rw-r--r--kernel/debug/debug_core.c983
-rw-r--r--kernel/debug/debug_core.h81
-rw-r--r--kernel/debug/gdbstub.c1017
-rw-r--r--kernel/debug/kdb/.gitignore1
-rw-r--r--kernel/debug/kdb/Makefile25
-rw-r--r--kernel/debug/kdb/kdb_bp.c564
-rw-r--r--kernel/debug/kdb/kdb_bt.c210
-rw-r--r--kernel/debug/kdb/kdb_cmds35
-rw-r--r--kernel/debug/kdb/kdb_debugger.c169
-rw-r--r--kernel/debug/kdb/kdb_io.c826
-rw-r--r--kernel/debug/kdb/kdb_keyboard.c212
-rw-r--r--kernel/debug/kdb/kdb_main.c2849
-rw-r--r--kernel/debug/kdb/kdb_private.h300
-rw-r--r--kernel/debug/kdb/kdb_support.c927
-rw-r--r--kernel/exit.c43
-rw-r--r--kernel/fork.c57
-rw-r--r--kernel/groups.c6
-rw-r--r--kernel/hrtimer.c69
-rw-r--r--kernel/hw_breakpoint.c196
-rw-r--r--kernel/irq/handle.c3
-rw-r--r--kernel/irq/manage.c89
-rw-r--r--kernel/irq/proc.c60
-rw-r--r--kernel/kallsyms.c21
-rw-r--r--kernel/kgdb.c1764
-rw-r--r--kernel/kmod.c193
-rw-r--r--kernel/kprobes.c132
-rw-r--r--kernel/ksysfs.c3
-rw-r--r--kernel/lockdep.c93
-rw-r--r--kernel/lockdep_internals.h72
-rw-r--r--kernel/lockdep_proc.c58
-rw-r--r--kernel/module.c30
-rw-r--r--kernel/padata.c189
-rw-r--r--kernel/panic.c27
-rw-r--r--kernel/perf_event.c770
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/pm_qos_params.c218
-rw-r--r--kernel/posix-cpu-timers.c310
-rw-r--r--kernel/posix-timers.c11
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/power.h27
-rw-r--r--kernel/power/snapshot.c145
-rw-r--r--kernel/power/swap.c333
-rw-r--r--kernel/power/user.c37
-rw-r--r--kernel/printk.c25
-rw-r--r--kernel/profile.c8
-rw-r--r--kernel/ptrace.c38
-rw-r--r--kernel/rcupdate.c19
-rw-r--r--kernel/rcutiny.c35
-rw-r--r--kernel/rcutiny_plugin.h39
-rw-r--r--kernel/rcutorture.c4
-rw-r--r--kernel/rcutree.c131
-rw-r--r--kernel/rcutree.h2
-rw-r--r--kernel/rcutree_plugin.h69
-rw-r--r--kernel/rcutree_trace.c4
-rw-r--r--kernel/relay.c17
-rw-r--r--kernel/resource.c16
-rw-r--r--kernel/sched.c787
-rw-r--r--kernel/sched_clock.c1
-rw-r--r--kernel/sched_debug.c118
-rw-r--r--kernel/sched_fair.c350
-rw-r--r--kernel/sched_features.h55
-rw-r--r--kernel/sched_idletask.c8
-rw-r--r--kernel/sched_rt.c15
-rw-r--r--kernel/signal.c63
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/stop_machine.c537
-rw-r--r--kernel/sys.c37
-rw-r--r--kernel/sysctl.c613
-rw-r--r--kernel/sysctl_binary.c10
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/clocksource.c48
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/tick-sched.c84
-rw-r--r--kernel/time/timekeeping.c35
-rw-r--r--kernel/time/timer_list.c1
-rw-r--r--kernel/timer.c149
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c138
-rw-r--r--kernel/trace/ftrace.c36
-rw-r--r--kernel/trace/kmemtrace.c70
-rw-r--r--kernel/trace/ring_buffer.c179
-rw-r--r--kernel/trace/ring_buffer_benchmark.c5
-rw-r--r--kernel/trace/trace.c196
-rw-r--r--kernel/trace/trace.h56
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_entries.h12
-rw-r--r--kernel/trace/trace_event_perf.c185
-rw-r--r--kernel/trace/trace_events.c139
-rw-r--r--kernel/trace/trace_events_filter.c30
-rw-r--r--kernel/trace/trace_export.c16
-rw-r--r--kernel/trace/trace_functions_graph.c176
-rw-r--r--kernel/trace/trace_hw_branches.c312
-rw-r--r--kernel/trace/trace_irqsoff.c271
-rw-r--r--kernel/trace/trace_kprobe.c648
-rw-r--r--kernel/trace/trace_ksym.c26
-rw-r--r--kernel/trace/trace_output.c155
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_sched_switch.c21
-rw-r--r--kernel/trace/trace_sched_wakeup.c29
-rw-r--r--kernel/trace/trace_selftest.c64
-rw-r--r--kernel/trace/trace_syscalls.c146
-rw-r--r--kernel/trace/trace_workqueue.c26
-rw-r--r--kernel/tracepoint.c91
-rw-r--r--kernel/user.c11
-rw-r--r--kernel/user_namespace.c4
-rw-r--r--kernel/workqueue.c45
120 files changed, 14067 insertions, 6115 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index a987aa1676b..057472fbc27 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -68,14 +68,14 @@ obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
-obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
diff --git a/kernel/acct.c b/kernel/acct.c
index e4c0e1fee9b..385b88461c2 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -216,7 +216,6 @@ static int acct_on(char *name)
{
struct file *file;
struct vfsmount *mnt;
- int error;
struct pid_namespace *ns;
struct bsd_acct_struct *acct = NULL;
@@ -244,13 +243,6 @@ static int acct_on(char *name)
}
}
- error = security_acct(file);
- if (error) {
- kfree(acct);
- filp_close(file, NULL);
- return error;
- }
-
spin_lock(&acct_lock);
if (ns->bacct == NULL) {
ns->bacct = acct;
@@ -281,7 +273,7 @@ static int acct_on(char *name)
*/
SYSCALL_DEFINE1(acct, const char __user *, name)
{
- int error;
+ int error = 0;
if (!capable(CAP_SYS_PACCT))
return -EPERM;
@@ -299,13 +291,11 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
if (acct == NULL)
return 0;
- error = security_acct(NULL);
- if (!error) {
- spin_lock(&acct_lock);
- acct_file_reopen(acct, NULL, NULL);
- spin_unlock(&acct_lock);
- }
+ spin_lock(&acct_lock);
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
}
+
return error;
}
diff --git a/kernel/capability.c b/kernel/capability.c
index 9e4697e9b27..2f05303715a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -15,7 +15,6 @@
#include <linux/syscalls.h>
#include <linux/pid_namespace.h>
#include <asm/uaccess.h>
-#include "cred-internals.h"
/*
* Leveraged for setting/resetting capabilities
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6d870f2d122..422cb19f156 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2994,7 +2994,6 @@ static void cgroup_event_remove(struct work_struct *work)
remove);
struct cgroup *cgrp = event->cgrp;
- /* TODO: check return code */
event->cft->unregister_event(cgrp, event->cft, event->eventfd);
eventfd_ctx_put(event->eventfd);
@@ -3016,7 +3015,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
unsigned long flags = (unsigned long)key;
if (flags & POLLHUP) {
- remove_wait_queue_locked(event->wqh, &event->wait);
+ __remove_wait_queue(event->wqh, &event->wait);
spin_lock(&cgrp->event_list_lock);
list_del(&event->list);
spin_unlock(&cgrp->event_list_lock);
@@ -3615,7 +3614,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
* @ss: the subsystem to load
*
* This function should be called in a modular subsystem's initcall. If the
- * subsytem is built as a module, it will be assigned a new subsys_id and set
+ * subsystem is built as a module, it will be assigned a new subsys_id and set
* up for use. If the subsystem is built-in anyway, work is delegated to the
* simpler cgroup_init_subsys.
*/
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e5c0244962b..ce71ed53e88 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -89,10 +89,10 @@ struct cgroup_subsys freezer_subsys;
/* Locks taken and their ordering
* ------------------------------
- * css_set_lock
* cgroup_mutex (AKA cgroup_lock)
- * task->alloc_lock (AKA task_lock)
* freezer->lock
+ * css_set_lock
+ * task->alloc_lock (AKA task_lock)
* task->sighand->siglock
*
* cgroup code forces css_set_lock to be taken before task->alloc_lock
@@ -100,33 +100,38 @@ struct cgroup_subsys freezer_subsys;
* freezer_create(), freezer_destroy():
* cgroup_mutex [ by cgroup core ]
*
- * can_attach():
- * cgroup_mutex
+ * freezer_can_attach():
+ * cgroup_mutex (held by caller of can_attach)
*
- * cgroup_frozen():
+ * cgroup_freezing_or_frozen():
* task->alloc_lock (to get task's cgroup)
*
* freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
- * task->alloc_lock (to get task's cgroup)
* freezer->lock
* sighand->siglock (if the cgroup is freezing)
*
* freezer_read():
* cgroup_mutex
* freezer->lock
+ * write_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
*
* freezer_write() (freeze):
* cgroup_mutex
* freezer->lock
+ * write_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
- * sighand->siglock
+ * sighand->siglock (fake signal delivery inside freeze_task())
*
* freezer_write() (unfreeze):
* cgroup_mutex
* freezer->lock
+ * write_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock (to prevent races with freeze_task())
+ * task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
* sighand->siglock
*/
static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
diff --git a/kernel/compat.c b/kernel/compat.c
index 7f40e9275fd..5adab05a317 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -495,29 +495,26 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
{
int ret;
cpumask_var_t mask;
- unsigned long *k;
- unsigned int min_length = cpumask_size();
-
- if (nr_cpu_ids <= BITS_PER_COMPAT_LONG)
- min_length = sizeof(compat_ulong_t);
- if (len < min_length)
+ if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+ return -EINVAL;
+ if (len & (sizeof(compat_ulong_t)-1))
return -EINVAL;
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
- if (ret < 0)
- goto out;
+ if (ret == 0) {
+ size_t retlen = min_t(size_t, len, cpumask_size());
- k = cpumask_bits(mask);
- ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
- if (ret == 0)
- ret = min_length;
-
-out:
+ if (compat_put_bitmap(user_mask_ptr, cpumask_bits(mask), retlen * 8))
+ ret = -EFAULT;
+ else
+ ret = retlen;
+ }
free_cpumask_var(mask);
+
return ret;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 25bba73b1be..3097382eb44 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -20,6 +20,20 @@
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
+/*
+ * The following two API's must be used when attempting
+ * to serialize the updates to cpu_online_mask, cpu_present_mask.
+ */
+void cpu_maps_update_begin(void)
+{
+ mutex_lock(&cpu_add_remove_lock);
+}
+
+void cpu_maps_update_done(void)
+{
+ mutex_unlock(&cpu_add_remove_lock);
+}
+
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
@@ -27,6 +41,8 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
*/
static int cpu_hotplug_disabled;
+#ifdef CONFIG_HOTPLUG_CPU
+
static struct {
struct task_struct *active_writer;
struct mutex lock; /* Synchronizes accesses to refcount, */
@@ -41,8 +57,6 @@ static struct {
.refcount = 0,
};
-#ifdef CONFIG_HOTPLUG_CPU
-
void get_online_cpus(void)
{
might_sleep();
@@ -67,22 +81,6 @@ void put_online_cpus(void)
}
EXPORT_SYMBOL_GPL(put_online_cpus);
-#endif /* CONFIG_HOTPLUG_CPU */
-
-/*
- * The following two API's must be used when attempting
- * to serialize the updates to cpu_online_mask, cpu_present_mask.
- */
-void cpu_maps_update_begin(void)
-{
- mutex_lock(&cpu_add_remove_lock);
-}
-
-void cpu_maps_update_done(void)
-{
- mutex_unlock(&cpu_add_remove_lock);
-}
-
/*
* This ensures that the hotplug operation can begin only when the
* refcount goes to zero.
@@ -124,6 +122,12 @@ static void cpu_hotplug_done(void)
cpu_hotplug.active_writer = NULL;
mutex_unlock(&cpu_hotplug.lock);
}
+
+#else /* #if CONFIG_HOTPLUG_CPU */
+static void cpu_hotplug_begin(void) {}
+static void cpu_hotplug_done(void) {}
+#endif /* #esle #if CONFIG_HOTPLUG_CPU */
+
/* Need to know about CPUs going up/down? */
int __ref register_cpu_notifier(struct notifier_block *nb)
{
@@ -134,8 +138,29 @@ int __ref register_cpu_notifier(struct notifier_block *nb)
return ret;
}
+static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
+ int *nr_calls)
+{
+ int ret;
+
+ ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
+ nr_calls);
+
+ return notifier_to_errno(ret);
+}
+
+static int cpu_notify(unsigned long val, void *v)
+{
+ return __cpu_notify(val, v, -1, NULL);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
+static void cpu_notify_nofail(unsigned long val, void *v)
+{
+ BUG_ON(cpu_notify(val, v));
+}
+
EXPORT_SYMBOL(register_cpu_notifier);
void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -164,6 +189,7 @@ static inline void check_for_tasks(int cpu)
}
struct take_cpu_down_param {
+ struct task_struct *caller;
unsigned long mod;
void *hcpu;
};
@@ -172,6 +198,7 @@ struct take_cpu_down_param {
static int __ref take_cpu_down(void *_param)
{
struct take_cpu_down_param *param = _param;
+ unsigned int cpu = (unsigned long)param->hcpu;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
@@ -179,9 +206,10 @@ static int __ref take_cpu_down(void *_param)
if (err < 0)
return err;
- raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
- param->hcpu);
+ cpu_notify(CPU_DYING | param->mod, param->hcpu);
+ if (task_cpu(param->caller) == cpu)
+ move_task_off_dead_cpu(cpu, param->caller);
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
@@ -192,10 +220,10 @@ static int __ref take_cpu_down(void *_param)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
int err, nr_calls = 0;
- cpumask_var_t old_allowed;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
+ .caller = current,
.mod = mod,
.hcpu = hcpu,
};
@@ -206,38 +234,26 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (!cpu_online(cpu))
return -EINVAL;
- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
- return -ENOMEM;
-
cpu_hotplug_begin();
set_cpu_active(cpu, false);
- err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
- hcpu, -1, &nr_calls);
- if (err == NOTIFY_BAD) {
+ err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ if (err) {
set_cpu_active(cpu, true);
nr_calls--;
- __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
- hcpu, nr_calls, NULL);
+ __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n",
__func__, cpu);
- err = -EINVAL;
goto out_release;
}
- /* Ensure that we are not runnable on dying cpu */
- cpumask_copy(old_allowed, &current->cpus_allowed);
- set_cpus_allowed_ptr(current, cpu_active_mask);
-
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
set_cpu_active(cpu, true);
/* CPU didn't die: tell everyone. Can't complain. */
- if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
- hcpu) == NOTIFY_BAD)
- BUG();
+ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
- goto out_allowed;
+ goto out_release;
}
BUG_ON(cpu_online(cpu));
@@ -249,22 +265,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
__cpu_die(cpu);
/* CPU is completely dead: tell everyone. Too late to complain. */
- if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
- hcpu) == NOTIFY_BAD)
- BUG();
+ cpu_notify_nofail(CPU_DEAD | mod, hcpu);
check_for_tasks(cpu);
-out_allowed:
- set_cpus_allowed_ptr(current, old_allowed);
out_release:
cpu_hotplug_done();
- if (!err) {
- if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
- hcpu) == NOTIFY_BAD)
- BUG();
- }
- free_cpumask_var(old_allowed);
+ if (!err)
+ cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
return err;
}
@@ -272,9 +280,6 @@ int __ref cpu_down(unsigned int cpu)
{
int err;
- err = stop_machine_create();
- if (err)
- return err;
cpu_maps_update_begin();
if (cpu_hotplug_disabled) {
@@ -286,7 +291,6 @@ int __ref cpu_down(unsigned int cpu)
out:
cpu_maps_update_done();
- stop_machine_destroy();
return err;
}
EXPORT_SYMBOL(cpu_down);
@@ -303,13 +307,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
return -EINVAL;
cpu_hotplug_begin();
- ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
- -1, &nr_calls);
- if (ret == NOTIFY_BAD) {
+ ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
+ if (ret) {
nr_calls--;
printk("%s: attempt to bring up CPU %u failed\n",
__func__, cpu);
- ret = -EINVAL;
goto out_notify;
}
@@ -322,12 +324,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
set_cpu_active(cpu, true);
/* Now call notifier in preparation. */
- raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
+ cpu_notify(CPU_ONLINE | mod, hcpu);
out_notify:
if (ret != 0)
- __raw_notifier_call_chain(&cpu_chain,
- CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+ __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
cpu_hotplug_done();
return ret;
@@ -336,6 +337,12 @@ out_notify:
int __cpuinit cpu_up(unsigned int cpu)
{
int err = 0;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ int nid;
+ pg_data_t *pgdat;
+#endif
+
if (!cpu_possible(cpu)) {
printk(KERN_ERR "can't online cpu %d because it is not "
"configured as may-hotadd at boot time\n", cpu);
@@ -346,6 +353,28 @@ int __cpuinit cpu_up(unsigned int cpu)
return -EINVAL;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+ nid = cpu_to_node(cpu);
+ if (!node_online(nid)) {
+ err = mem_online_node(nid);
+ if (err)
+ return err;
+ }
+
+ pgdat = NODE_DATA(nid);
+ if (!pgdat) {
+ printk(KERN_ERR
+ "Can't online cpu %d due to NULL pgdat\n", cpu);
+ return -ENOMEM;
+ }
+
+ if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
+ mutex_lock(&zonelists_mutex);
+ build_all_zonelists(NULL);
+ mutex_unlock(&zonelists_mutex);
+ }
+#endif
+
cpu_maps_update_begin();
if (cpu_hotplug_disabled) {
@@ -367,9 +396,6 @@ int disable_nonboot_cpus(void)
{
int cpu, first_cpu, error;
- error = stop_machine_create();
- if (error)
- return error;
cpu_maps_update_begin();
first_cpu = cpumask_first(cpu_online_mask);
/*
@@ -400,7 +426,6 @@ int disable_nonboot_cpus(void)
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
cpu_maps_update_done();
- stop_machine_destroy();
return error;
}
@@ -467,7 +492,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
- raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
+ cpu_notify(val, (void *)(long)cpu);
}
#endif /* CONFIG_SMP */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d10946748ec..02b9611eadd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
* In order to avoid seeing no nodes if the old and new nodes are disjoint,
* we structure updates as setting all new allowed nodes, then clearing newly
* disallowed ones.
- *
- * Called with task's alloc_lock held
*/
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
+repeat:
+ /*
+ * Allow tasks that have access to memory reserves because they have
+ * been OOM killed to get memory anywhere.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ return;
+ if (current->flags & PF_EXITING) /* Let dying task have memory */
+ return;
+
+ task_lock(tsk);
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
- mpol_rebind_task(tsk, &tsk->mems_allowed);
- mpol_rebind_task(tsk, newmems);
+ mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
+
+
+ /*
+ * ensure checking ->mems_allowed_change_disable after setting all new
+ * allowed nodes.
+ *
+ * the read-side task can see an nodemask with new allowed nodes and
+ * old allowed nodes. and if it allocates page when cpuset clears newly
+ * disallowed ones continuous, it can see the new allowed bits.
+ *
+ * And if setting all new allowed nodes is after the checking, setting
+ * all new allowed nodes and clearing newly disallowed ones will be done
+ * continuous, and the read-side task may find no node to alloc page.
+ */
+ smp_mb();
+
+ /*
+ * Allocation of memory is very fast, we needn't sleep when waiting
+ * for the read-side.
+ */
+ while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+ task_unlock(tsk);
+ if (!task_curr(tsk))
+ yield();
+ goto repeat;
+ }
+
+ /*
+ * ensure checking ->mems_allowed_change_disable before clearing all new
+ * disallowed nodes.
+ *
+ * if clearing newly disallowed bits before the checking, the read-side
+ * task may find no node to alloc page.
+ */
+ smp_mb();
+
+ mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
tsk->mems_allowed = *newmems;
+ task_unlock(tsk);
}
/*
@@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
cs = cgroup_cs(scan->cg);
guarantee_online_mems(cs, newmems);
- task_lock(p);
cpuset_change_task_nodemask(p, newmems);
- task_unlock(p);
NODEMASK_FREE(newmems);
@@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
err = set_cpus_allowed_ptr(tsk, cpus_attach);
WARN_ON_ONCE(err);
- task_lock(tsk);
cpuset_change_task_nodemask(tsk, to);
- task_unlock(tsk);
cpuset_update_task_spread_flag(cs, tsk);
}
@@ -2182,19 +2224,52 @@ void __init cpuset_init_smp(void)
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
mutex_lock(&callback_mutex);
- cpuset_cpus_allowed_locked(tsk, pmask);
+ task_lock(tsk);
+ guarantee_online_cpus(task_cs(tsk), pmask);
+ task_unlock(tsk);
mutex_unlock(&callback_mutex);
}
-/**
- * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
- * Must be called with callback_mutex held.
- **/
-void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
+int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
- task_lock(tsk);
- guarantee_online_cpus(task_cs(tsk), pmask);
- task_unlock(tsk);
+ const struct cpuset *cs;
+ int cpu;
+
+ rcu_read_lock();
+ cs = task_cs(tsk);
+ if (cs)
+ cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+ rcu_read_unlock();
+
+ /*
+ * We own tsk->cpus_allowed, nobody can change it under us.
+ *
+ * But we used cs && cs->cpus_allowed lockless and thus can
+ * race with cgroup_attach_task() or update_cpumask() and get
+ * the wrong tsk->cpus_allowed. However, both cases imply the
+ * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
+ * which takes task_rq_lock().
+ *
+ * If we are called after it dropped the lock we must see all
+ * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
+ * set any mask even if it is not right from task_cs() pov,
+ * the pending set_cpus_allowed_ptr() will fix things.
+ */
+
+ cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
+ if (cpu >= nr_cpu_ids) {
+ /*
+ * Either tsk->cpus_allowed is wrong (see above) or it
+ * is actually empty. The latter case is only possible
+ * if we are racing with remove_tasks_in_empty_cpuset().
+ * Like above we can temporary set any mask and rely on
+ * set_cpus_allowed_ptr() as synchronization point.
+ */
+ cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+ cpu = cpumask_any(cpu_active_mask);
+ }
+
+ return cpu;
}
void cpuset_init_current_mems_allowed(void)
@@ -2383,22 +2458,6 @@ int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
}
/**
- * cpuset_lock - lock out any changes to cpuset structures
- *
- * The out of memory (oom) code needs to mutex_lock cpusets
- * from being changed while it scans the tasklist looking for a
- * task in an overlapping cpuset. Expose callback_mutex via this
- * cpuset_lock() routine, so the oom code can lock it, before
- * locking the task list. The tasklist_lock is a spinlock, so
- * must be taken inside callback_mutex.
- */
-
-void cpuset_lock(void)
-{
- mutex_lock(&callback_mutex);
-}
-
-/**
* cpuset_unlock - release lock on cpuset changes
*
* Undo the lock taken in a previous cpuset_lock() call.
@@ -2410,7 +2469,8 @@ void cpuset_unlock(void)
}
/**
- * cpuset_mem_spread_node() - On which node to begin search for a page
+ * cpuset_mem_spread_node() - On which node to begin search for a file page
+ * cpuset_slab_spread_node() - On which node to begin search for a slab page
*
* If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
* tasks in a cpuset with is_spread_page or is_spread_slab set),
@@ -2435,16 +2495,27 @@ void cpuset_unlock(void)
* See kmem_cache_alloc_node().
*/
-int cpuset_mem_spread_node(void)
+static int cpuset_spread_node(int *rotor)
{
int node;
- node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
+ node = next_node(*rotor, current->mems_allowed);
if (node == MAX_NUMNODES)
node = first_node(current->mems_allowed);
- current->cpuset_mem_spread_rotor = node;
+ *rotor = node;
return node;
}
+
+int cpuset_mem_spread_node(void)
+{
+ return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
+}
+
+int cpuset_slab_spread_node(void)
+{
+ return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
+}
+
EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
/**
diff --git a/kernel/cred-internals.h b/kernel/cred-internals.h
deleted file mode 100644
index 2dc4fc2d0bf..00000000000
--- a/kernel/cred-internals.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Internal credentials stuff
- *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-/*
- * user.c
- */
-static inline void sched_switch_user(struct task_struct *p)
-{
-#ifdef CONFIG_USER_SCHED
- sched_move_task(p);
-#endif /* CONFIG_USER_SCHED */
-}
-
diff --git a/kernel/cred.c b/kernel/cred.c
index 62af1816c23..a2d5504fbcc 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -17,7 +17,6 @@
#include <linux/init_task.h>
#include <linux/security.h>
#include <linux/cn_proc.h>
-#include "cred-internals.h"
#if 0
#define kdebug(FMT, ...) \
@@ -348,66 +347,6 @@ struct cred *prepare_exec_creds(void)
}
/*
- * prepare new credentials for the usermode helper dispatcher
- */
-struct cred *prepare_usermodehelper_creds(void)
-{
-#ifdef CONFIG_KEYS
- struct thread_group_cred *tgcred = NULL;
-#endif
- struct cred *new;
-
-#ifdef CONFIG_KEYS
- tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC);
- if (!tgcred)
- return NULL;
-#endif
-
- new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
- if (!new)
- goto free_tgcred;
-
- kdebug("prepare_usermodehelper_creds() alloc %p", new);
-
- memcpy(new, &init_cred, sizeof(struct cred));
-
- atomic_set(&new->usage, 1);
- set_cred_subscribers(new, 0);
- get_group_info(new->group_info);
- get_uid(new->user);
-
-#ifdef CONFIG_KEYS
- new->thread_keyring = NULL;
- new->request_key_auth = NULL;
- new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT;
-
- atomic_set(&tgcred->usage, 1);
- spin_lock_init(&tgcred->lock);
- new->tgcred = tgcred;
-#endif
-
-#ifdef CONFIG_SECURITY
- new->security = NULL;
-#endif
- if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
- goto error;
- validate_creds(new);
-
- BUG_ON(atomic_read(&new->usage) != 1);
- return new;
-
-error:
- put_cred(new);
- return NULL;
-
-free_tgcred:
-#ifdef CONFIG_KEYS
- kfree(tgcred);
-#endif
- return NULL;
-}
-
-/*
* Copy credentials for the new process created by fork()
*
* We share if we can, but under some circumstances we have to generate a new
@@ -523,8 +462,6 @@ int commit_creds(struct cred *new)
#endif
BUG_ON(atomic_read(&new->usage) < 1);
- security_commit_creds(new, old);
-
get_cred(new); /* we will require a ref for the subj creds too */
/* dumpability changes */
@@ -560,8 +497,6 @@ int commit_creds(struct cred *new)
atomic_dec(&old->user->processes);
alter_cred_subscribers(old, -2);
- sched_switch_user(task);
-
/* send notifications */
if (new->uid != old->uid ||
new->euid != old->euid ||
diff --git a/kernel/debug/Makefile b/kernel/debug/Makefile
new file mode 100644
index 00000000000..a85edc33998
--- /dev/null
+++ b/kernel/debug/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the linux kernel debugger
+#
+
+obj-$(CONFIG_KGDB) += debug_core.o gdbstub.o
+obj-$(CONFIG_KGDB_KDB) += kdb/
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
new file mode 100644
index 00000000000..5cb7cd1de10
--- /dev/null
+++ b/kernel/debug/debug_core.c
@@ -0,0 +1,983 @@
+/*
+ * Kernel Debug Core
+ *
+ * Maintainer: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (C) 2000-2001 VERITAS Software Corporation.
+ * Copyright (C) 2002-2004 Timesys Corporation
+ * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
+ * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
+ * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
+ * Copyright (C) 2005-2009 Wind River Systems, Inc.
+ * Copyright (C) 2007 MontaVista Software, Inc.
+ * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Contributors at various stages not listed above:
+ * Jason Wessel ( jason.wessel@windriver.com )
+ * George Anzinger <george@mvista.com>
+ * Anurekh Saxena (anurekh.saxena@timesys.com)
+ * Lake Stevens Instrument Division (Glenn Engel)
+ * Jim Kingdon, Cygnus Support.
+ *
+ * Original KGDB stub: David Grothe <dave@gcom.com>,
+ * Tigran Aivazian <tigran@sco.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#include <linux/pid_namespace.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/console.h>
+#include <linux/threads.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/sysrq.h>
+#include <linux/init.h>
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/pid.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+
+#include "debug_core.h"
+
+static int kgdb_break_asap;
+
+struct debuggerinfo_struct kgdb_info[NR_CPUS];
+
+/**
+ * kgdb_connected - Is a host GDB connected to us?
+ */
+int kgdb_connected;
+EXPORT_SYMBOL_GPL(kgdb_connected);
+
+/* All the KGDB handlers are installed */
+int kgdb_io_module_registered;
+
+/* Guard for recursive entry */
+static int exception_level;
+
+struct kgdb_io *dbg_io_ops;
+static DEFINE_SPINLOCK(kgdb_registration_lock);
+
+/* kgdb console driver is loaded */
+static int kgdb_con_registered;
+/* determine if kgdb console output should be used */
+static int kgdb_use_con;
+/* Flag for alternate operations for early debugging */
+bool dbg_is_early = true;
+/* Next cpu to become the master debug core */
+int dbg_switch_cpu;
+
+/* Use kdb or gdbserver mode */
+int dbg_kdb_mode = 1;
+
+static int __init opt_kgdb_con(char *str)
+{
+ kgdb_use_con = 1;
+ return 0;
+}
+
+early_param("kgdbcon", opt_kgdb_con);
+
+module_param(kgdb_use_con, int, 0644);
+
+/*
+ * Holds information about breakpoints in a kernel. These breakpoints are
+ * added and removed by gdb.
+ */
+static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = {
+ [0 ... KGDB_MAX_BREAKPOINTS-1] = { .state = BP_UNDEFINED }
+};
+
+/*
+ * The CPU# of the active CPU, or -1 if none:
+ */
+atomic_t kgdb_active = ATOMIC_INIT(-1);
+EXPORT_SYMBOL_GPL(kgdb_active);
+
+/*
+ * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
+ * bootup code (which might not have percpu set up yet):
+ */
+static atomic_t passive_cpu_wait[NR_CPUS];
+static atomic_t cpu_in_kgdb[NR_CPUS];
+static atomic_t kgdb_break_tasklet_var;
+atomic_t kgdb_setting_breakpoint;
+
+struct task_struct *kgdb_usethread;
+struct task_struct *kgdb_contthread;
+
+int kgdb_single_step;
+static pid_t kgdb_sstep_pid;
+
+/* to keep track of the CPU which is doing the single stepping*/
+atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
+
+/*
+ * If you are debugging a problem where roundup (the collection of
+ * all other CPUs) is a problem [this should be extremely rare],
+ * then use the nokgdbroundup option to avoid roundup. In that case
+ * the other CPUs might interfere with your debugging context, so
+ * use this with care:
+ */
+static int kgdb_do_roundup = 1;
+
+static int __init opt_nokgdbroundup(char *str)
+{
+ kgdb_do_roundup = 0;
+
+ return 0;
+}
+
+early_param("nokgdbroundup", opt_nokgdbroundup);
+
+/*
+ * Finally, some KGDB code :-)
+ */
+
+/*
+ * Weak aliases for breakpoint management,
+ * can be overriden by architectures when needed:
+ */
+int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
+{
+ int err;
+
+ err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+
+ return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
+}
+
+int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
+{
+ return probe_kernel_write((char *)addr,
+ (char *)bundle, BREAK_INSTR_SIZE);
+}
+
+int __weak kgdb_validate_break_address(unsigned long addr)
+{
+ char tmp_variable[BREAK_INSTR_SIZE];
+ int err;
+ /* Validate setting the breakpoint and then removing it. In the
+ * remove fails, the kernel needs to emit a bad message because we
+ * are deep trouble not being able to put things back the way we
+ * found them.
+ */
+ err = kgdb_arch_set_breakpoint(addr, tmp_variable);
+ if (err)
+ return err;
+ err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
+ if (err)
+ printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
+ "memory destroyed at: %lx", addr);
+ return err;
+}
+
+unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
+{
+ return instruction_pointer(regs);
+}
+
+int __weak kgdb_arch_init(void)
+{
+ return 0;
+}
+
+int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
+{
+ return 0;
+}
+
+/**
+ * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
+ * @regs: Current &struct pt_regs.
+ *
+ * This function will be called if the particular architecture must
+ * disable hardware debugging while it is processing gdb packets or
+ * handling exception.
+ */
+void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
+{
+}
+
+/*
+ * Some architectures need cache flushes when we set/clear a
+ * breakpoint:
+ */
+static void kgdb_flush_swbreak_addr(unsigned long addr)
+{
+ if (!CACHE_FLUSH_IS_SAFE)
+ return;
+
+ if (current->mm && current->mm->mmap_cache) {
+ flush_cache_range(current->mm->mmap_cache,
+ addr, addr + BREAK_INSTR_SIZE);
+ }
+ /* Force flush instruction cache if it was outside the mm */
+ flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+}
+
+/*
+ * SW breakpoint management:
+ */
+int dbg_activate_sw_breakpoints(void)
+{
+ unsigned long addr;
+ int error;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (kgdb_break[i].state != BP_SET)
+ continue;
+
+ addr = kgdb_break[i].bpt_addr;
+ error = kgdb_arch_set_breakpoint(addr,
+ kgdb_break[i].saved_instr);
+ if (error) {
+ ret = error;
+ printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
+ continue;
+ }
+
+ kgdb_flush_swbreak_addr(addr);
+ kgdb_break[i].state = BP_ACTIVE;
+ }
+ return ret;
+}
+
+int dbg_set_sw_break(unsigned long addr)
+{
+ int err = kgdb_validate_break_address(addr);
+ int breakno = -1;
+ int i;
+
+ if (err)
+ return err;
+
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if ((kgdb_break[i].state == BP_SET) &&
+ (kgdb_break[i].bpt_addr == addr))
+ return -EEXIST;
+ }
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (kgdb_break[i].state == BP_REMOVED &&
+ kgdb_break[i].bpt_addr == addr) {
+ breakno = i;
+ break;
+ }
+ }
+
+ if (breakno == -1) {
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (kgdb_break[i].state == BP_UNDEFINED) {
+ breakno = i;
+ break;
+ }
+ }
+ }
+
+ if (breakno == -1)
+ return -E2BIG;
+
+ kgdb_break[breakno].state = BP_SET;
+ kgdb_break[breakno].type = BP_BREAKPOINT;
+ kgdb_break[breakno].bpt_addr = addr;
+
+ return 0;
+}
+
+int dbg_deactivate_sw_breakpoints(void)
+{
+ unsigned long addr;
+ int error;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (kgdb_break[i].state != BP_ACTIVE)
+ continue;
+ addr = kgdb_break[i].bpt_addr;
+ error = kgdb_arch_remove_breakpoint(addr,
+ kgdb_break[i].saved_instr);
+ if (error) {
+ printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
+ ret = error;
+ }
+
+ kgdb_flush_swbreak_addr(addr);
+ kgdb_break[i].state = BP_SET;
+ }
+ return ret;
+}
+
+int dbg_remove_sw_break(unsigned long addr)
+{
+ int i;
+
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if ((kgdb_break[i].state == BP_SET) &&
+ (kgdb_break[i].bpt_addr == addr)) {
+ kgdb_break[i].state = BP_REMOVED;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+int kgdb_isremovedbreak(unsigned long addr)
+{
+ int i;
+
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if ((kgdb_break[i].state == BP_REMOVED) &&
+ (kgdb_break[i].bpt_addr == addr))
+ return 1;
+ }
+ return 0;
+}
+
+int dbg_remove_all_break(void)
+{
+ unsigned long addr;
+ int error;
+ int i;
+
+ /* Clear memory breakpoints. */
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (kgdb_break[i].state != BP_ACTIVE)
+ goto setundefined;
+ addr = kgdb_break[i].bpt_addr;
+ error = kgdb_arch_remove_breakpoint(addr,
+ kgdb_break[i].saved_instr);
+ if (error)
+ printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
+ addr);
+setundefined:
+ kgdb_break[i].state = BP_UNDEFINED;
+ }
+
+ /* Clear hardware breakpoints. */
+ if (arch_kgdb_ops.remove_all_hw_break)
+ arch_kgdb_ops.remove_all_hw_break();
+
+ return 0;
+}
+
+/*
+ * Return true if there is a valid kgdb I/O module. Also if no
+ * debugger is attached a message can be printed to the console about
+ * waiting for the debugger to attach.
+ *
+ * The print_wait argument is only to be true when called from inside
+ * the core kgdb_handle_exception, because it will wait for the
+ * debugger to attach.
+ */
+static int kgdb_io_ready(int print_wait)
+{
+ if (!dbg_io_ops)
+ return 0;
+ if (kgdb_connected)
+ return 1;
+ if (atomic_read(&kgdb_setting_breakpoint))
+ return 1;
+ if (print_wait) {
+#ifdef CONFIG_KGDB_KDB
+ if (!dbg_kdb_mode)
+ printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n");
+#else
+ printk(KERN_CRIT "KGDB: Waiting for remote debugger\n");
+#endif
+ }
+ return 1;
+}
+
+static int kgdb_reenter_check(struct kgdb_state *ks)
+{
+ unsigned long addr;
+
+ if (atomic_read(&kgdb_active) != raw_smp_processor_id())
+ return 0;
+
+ /* Panic on recursive debugger calls: */
+ exception_level++;
+ addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
+ dbg_deactivate_sw_breakpoints();
+
+ /*
+ * If the break point removed ok at the place exception
+ * occurred, try to recover and print a warning to the end
+ * user because the user planted a breakpoint in a place that
+ * KGDB needs in order to function.
+ */
+ if (dbg_remove_sw_break(addr) == 0) {
+ exception_level = 0;
+ kgdb_skipexception(ks->ex_vector, ks->linux_regs);
+ dbg_activate_sw_breakpoints();
+ printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n",
+ addr);
+ WARN_ON_ONCE(1);
+
+ return 1;
+ }
+ dbg_remove_all_break();
+ kgdb_skipexception(ks->ex_vector, ks->linux_regs);
+
+ if (exception_level > 1) {
+ dump_stack();
+ panic("Recursive entry to debugger");
+ }
+
+ printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n");
+#ifdef CONFIG_KGDB_KDB
+ /* Allow kdb to debug itself one level */
+ return 0;
+#endif
+ dump_stack();
+ panic("Recursive entry to debugger");
+
+ return 1;
+}
+
+static void dbg_cpu_switch(int cpu, int next_cpu)
+{
+ /* Mark the cpu we are switching away from as a slave when it
+ * holds the kgdb_active token. This must be done so that the
+ * that all the cpus wait in for the debug core will not enter
+ * again as the master. */
+ if (cpu == atomic_read(&kgdb_active)) {
+ kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
+ kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER;
+ }
+ kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER;
+}
+
+static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
+{
+ unsigned long flags;
+ int sstep_tries = 100;
+ int error;
+ int i, cpu;
+ int trace_on = 0;
+acquirelock:
+ /*
+ * Interrupts will be restored by the 'trap return' code, except when
+ * single stepping.
+ */
+ local_irq_save(flags);
+
+ cpu = ks->cpu;
+ kgdb_info[cpu].debuggerinfo = regs;
+ kgdb_info[cpu].task = current;
+ kgdb_info[cpu].ret_state = 0;
+ kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
+ /*
+ * Make sure the above info reaches the primary CPU before
+ * our cpu_in_kgdb[] flag setting does:
+ */
+ atomic_inc(&cpu_in_kgdb[cpu]);
+
+ if (exception_level == 1)
+ goto cpu_master_loop;
+
+ /*
+ * CPU will loop if it is a slave or request to become a kgdb
+ * master cpu and acquire the kgdb_active lock:
+ */
+ while (1) {
+cpu_loop:
+ if (kgdb_info[cpu].exception_state & DCPU_NEXT_MASTER) {
+ kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER;
+ goto cpu_master_loop;
+ } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
+ if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
+ break;
+ } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
+ if (!atomic_read(&passive_cpu_wait[cpu]))
+ goto return_normal;
+ } else {
+return_normal:
+ /* Return to normal operation by executing any
+ * hw breakpoint fixup.
+ */
+ if (arch_kgdb_ops.correct_hw_break)
+ arch_kgdb_ops.correct_hw_break();
+ if (trace_on)
+ tracing_on();
+ atomic_dec(&cpu_in_kgdb[cpu]);
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ local_irq_restore(flags);
+ return 0;
+ }
+ cpu_relax();
+ }
+
+ /*
+ * For single stepping, try to only enter on the processor
+ * that was single stepping. To gaurd against a deadlock, the
+ * kernel will only try for the value of sstep_tries before
+ * giving up and continuing on.
+ */
+ if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
+ (kgdb_info[cpu].task &&
+ kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
+ atomic_set(&kgdb_active, -1);
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ local_irq_restore(flags);
+
+ goto acquirelock;
+ }
+
+ if (!kgdb_io_ready(1)) {
+ kgdb_info[cpu].ret_state = 1;
+ goto kgdb_restore; /* No I/O connection, resume the system */
+ }
+
+ /*
+ * Don't enter if we have hit a removed breakpoint.
+ */
+ if (kgdb_skipexception(ks->ex_vector, ks->linux_regs))
+ goto kgdb_restore;
+
+ /* Call the I/O driver's pre_exception routine */
+ if (dbg_io_ops->pre_exception)
+ dbg_io_ops->pre_exception();
+
+ kgdb_disable_hw_debug(ks->linux_regs);
+
+ /*
+ * Get the passive CPU lock which will hold all the non-primary
+ * CPU in a spin state while the debugger is active
+ */
+ if (!kgdb_single_step) {
+ for (i = 0; i < NR_CPUS; i++)
+ atomic_inc(&passive_cpu_wait[i]);
+ }
+
+#ifdef CONFIG_SMP
+ /* Signal the other CPUs to enter kgdb_wait() */
+ if ((!kgdb_single_step) && kgdb_do_roundup)
+ kgdb_roundup_cpus(flags);
+#endif
+
+ /*
+ * Wait for the other CPUs to be notified and be waiting for us:
+ */
+ for_each_online_cpu(i) {
+ while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i]))
+ cpu_relax();
+ }
+
+ /*
+ * At this point the primary processor is completely
+ * in the debugger and all secondary CPUs are quiescent
+ */
+ dbg_deactivate_sw_breakpoints();
+ kgdb_single_step = 0;
+ kgdb_contthread = current;
+ exception_level = 0;
+ trace_on = tracing_is_on();
+ if (trace_on)
+ tracing_off();
+
+ while (1) {
+cpu_master_loop:
+ if (dbg_kdb_mode) {
+ kgdb_connected = 1;
+ error = kdb_stub(ks);
+ } else {
+ error = gdb_serial_stub(ks);
+ }
+
+ if (error == DBG_PASS_EVENT) {
+ dbg_kdb_mode = !dbg_kdb_mode;
+ kgdb_connected = 0;
+ } else if (error == DBG_SWITCH_CPU_EVENT) {
+ dbg_cpu_switch(cpu, dbg_switch_cpu);
+ goto cpu_loop;
+ } else {
+ kgdb_info[cpu].ret_state = error;
+ break;
+ }
+ }
+
+ /* Call the I/O driver's post_exception routine */
+ if (dbg_io_ops->post_exception)
+ dbg_io_ops->post_exception();
+
+ atomic_dec(&cpu_in_kgdb[ks->cpu]);
+
+ if (!kgdb_single_step) {
+ for (i = NR_CPUS-1; i >= 0; i--)
+ atomic_dec(&passive_cpu_wait[i]);
+ /*
+ * Wait till all the CPUs have quit from the debugger,
+ * but allow a CPU that hit an exception and is
+ * waiting to become the master to remain in the debug
+ * core.
+ */
+ for_each_online_cpu(i) {
+ while (kgdb_do_roundup &&
+ atomic_read(&cpu_in_kgdb[i]) &&
+ !(kgdb_info[i].exception_state &
+ DCPU_WANT_MASTER))
+ cpu_relax();
+ }
+ }
+
+kgdb_restore:
+ if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
+ int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
+ if (kgdb_info[sstep_cpu].task)
+ kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
+ else
+ kgdb_sstep_pid = 0;
+ }
+ if (trace_on)
+ tracing_on();
+ /* Free kgdb_active */
+ atomic_set(&kgdb_active, -1);
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ local_irq_restore(flags);
+
+ return kgdb_info[cpu].ret_state;
+}
+
+/*
+ * kgdb_handle_exception() - main entry point from a kernel exception
+ *
+ * Locking hierarchy:
+ * interface locks, if any (begin_session)
+ * kgdb lock (kgdb_active)
+ */
+int
+kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
+{
+ struct kgdb_state kgdb_var;
+ struct kgdb_state *ks = &kgdb_var;
+ int ret;
+
+ ks->cpu = raw_smp_processor_id();
+ ks->ex_vector = evector;
+ ks->signo = signo;
+ ks->err_code = ecode;
+ ks->kgdb_usethreadid = 0;
+ ks->linux_regs = regs;
+
+ if (kgdb_reenter_check(ks))
+ return 0; /* Ouch, double exception ! */
+ kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
+ ret = kgdb_cpu_enter(ks, regs);
+ kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER |
+ DCPU_IS_SLAVE);
+ return ret;
+}
+
+int kgdb_nmicallback(int cpu, void *regs)
+{
+#ifdef CONFIG_SMP
+ struct kgdb_state kgdb_var;
+ struct kgdb_state *ks = &kgdb_var;
+
+ memset(ks, 0, sizeof(struct kgdb_state));
+ ks->cpu = cpu;
+ ks->linux_regs = regs;
+
+ if (!atomic_read(&cpu_in_kgdb[cpu]) &&
+ atomic_read(&kgdb_active) != -1 &&
+ atomic_read(&kgdb_active) != cpu) {
+ kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
+ kgdb_cpu_enter(ks, regs);
+ kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
+ return 0;
+ }
+#endif
+ return 1;
+}
+
+static void kgdb_console_write(struct console *co, const char *s,
+ unsigned count)
+{
+ unsigned long flags;
+
+ /* If we're debugging, or KGDB has not connected, don't try
+ * and print. */
+ if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode)
+ return;
+
+ local_irq_save(flags);
+ gdbstub_msg_write(s, count);
+ local_irq_restore(flags);
+}
+
+static struct console kgdbcons = {
+ .name = "kgdb",
+ .write = kgdb_console_write,
+ .flags = CON_PRINTBUFFER | CON_ENABLED,
+ .index = -1,
+};
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handle_dbg(int key, struct tty_struct *tty)
+{
+ if (!dbg_io_ops) {
+ printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
+ return;
+ }
+ if (!kgdb_connected) {
+#ifdef CONFIG_KGDB_KDB
+ if (!dbg_kdb_mode)
+ printk(KERN_CRIT "KGDB or $3#33 for KDB\n");
+#else
+ printk(KERN_CRIT "Entering KGDB\n");
+#endif
+ }
+
+ kgdb_breakpoint();
+}
+
+static struct sysrq_key_op sysrq_dbg_op = {
+ .handler = sysrq_handle_dbg,
+ .help_msg = "debug(G)",
+ .action_msg = "DEBUG",
+};
+#endif
+
+static int kgdb_panic_event(struct notifier_block *self,
+ unsigned long val,
+ void *data)
+{
+ if (dbg_kdb_mode)
+ kdb_printf("PANIC: %s\n", (char *)data);
+ kgdb_breakpoint();
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block kgdb_panic_event_nb = {
+ .notifier_call = kgdb_panic_event,
+ .priority = INT_MAX,
+};
+
+void __weak kgdb_arch_late(void)
+{
+}
+
+void __init dbg_late_init(void)
+{
+ dbg_is_early = false;
+ if (kgdb_io_module_registered)
+ kgdb_arch_late();
+ kdb_init(KDB_INIT_FULL);
+}
+
+static void kgdb_register_callbacks(void)
+{
+ if (!kgdb_io_module_registered) {
+ kgdb_io_module_registered = 1;
+ kgdb_arch_init();
+ if (!dbg_is_early)
+ kgdb_arch_late();
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kgdb_panic_event_nb);
+#ifdef CONFIG_MAGIC_SYSRQ
+ register_sysrq_key('g', &sysrq_dbg_op);
+#endif
+ if (kgdb_use_con && !kgdb_con_registered) {
+ register_console(&kgdbcons);
+ kgdb_con_registered = 1;
+ }
+ }
+}
+
+static void kgdb_unregister_callbacks(void)
+{
+ /*
+ * When this routine is called KGDB should unregister from the
+ * panic handler and clean up, making sure it is not handling any
+ * break exceptions at the time.
+ */
+ if (kgdb_io_module_registered) {
+ kgdb_io_module_registered = 0;
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &kgdb_panic_event_nb);
+ kgdb_arch_exit();
+#ifdef CONFIG_MAGIC_SYSRQ
+ unregister_sysrq_key('g', &sysrq_dbg_op);
+#endif
+ if (kgdb_con_registered) {
+ unregister_console(&kgdbcons);
+ kgdb_con_registered = 0;
+ }
+ }
+}
+
+/*
+ * There are times a tasklet needs to be used vs a compiled in
+ * break point so as to cause an exception outside a kgdb I/O module,
+ * such as is the case with kgdboe, where calling a breakpoint in the
+ * I/O driver itself would be fatal.
+ */
+static void kgdb_tasklet_bpt(unsigned long ing)
+{
+ kgdb_breakpoint();
+ atomic_set(&kgdb_break_tasklet_var, 0);
+}
+
+static DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0);
+
+void kgdb_schedule_breakpoint(void)
+{
+ if (atomic_read(&kgdb_break_tasklet_var) ||
+ atomic_read(&kgdb_active) != -1 ||
+ atomic_read(&kgdb_setting_breakpoint))
+ return;
+ atomic_inc(&kgdb_break_tasklet_var);
+ tasklet_schedule(&kgdb_tasklet_breakpoint);
+}
+EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint);
+
+static void kgdb_initial_breakpoint(void)
+{
+ kgdb_break_asap = 0;
+
+ printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n");
+ kgdb_breakpoint();
+}
+
+/**
+ * kgdb_register_io_module - register KGDB IO module
+ * @new_dbg_io_ops: the io ops vector
+ *
+ * Register it with the KGDB core.
+ */
+int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
+{
+ int err;
+
+ spin_lock(&kgdb_registration_lock);
+
+ if (dbg_io_ops) {
+ spin_unlock(&kgdb_registration_lock);
+
+ printk(KERN_ERR "kgdb: Another I/O driver is already "
+ "registered with KGDB.\n");
+ return -EBUSY;
+ }
+
+ if (new_dbg_io_ops->init) {
+ err = new_dbg_io_ops->init();
+ if (err) {
+ spin_unlock(&kgdb_registration_lock);
+ return err;
+ }
+ }
+
+ dbg_io_ops = new_dbg_io_ops;
+
+ spin_unlock(&kgdb_registration_lock);
+
+ printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
+ new_dbg_io_ops->name);
+
+ /* Arm KGDB now. */
+ kgdb_register_callbacks();
+
+ if (kgdb_break_asap)
+ kgdb_initial_breakpoint();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kgdb_register_io_module);
+
+/**
+ * kkgdb_unregister_io_module - unregister KGDB IO module
+ * @old_dbg_io_ops: the io ops vector
+ *
+ * Unregister it with the KGDB core.
+ */
+void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
+{
+ BUG_ON(kgdb_connected);
+
+ /*
+ * KGDB is no longer able to communicate out, so
+ * unregister our callbacks and reset state.
+ */
+ kgdb_unregister_callbacks();
+
+ spin_lock(&kgdb_registration_lock);
+
+ WARN_ON_ONCE(dbg_io_ops != old_dbg_io_ops);
+ dbg_io_ops = NULL;
+
+ spin_unlock(&kgdb_registration_lock);
+
+ printk(KERN_INFO
+ "kgdb: Unregistered I/O driver %s, debugger disabled.\n",
+ old_dbg_io_ops->name);
+}
+EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
+
+int dbg_io_get_char(void)
+{
+ int ret = dbg_io_ops->read_char();
+ if (ret == NO_POLL_CHAR)
+ return -1;
+ if (!dbg_kdb_mode)
+ return ret;
+ if (ret == 127)
+ return 8;
+ return ret;
+}
+
+/**
+ * kgdb_breakpoint - generate breakpoint exception
+ *
+ * This function will generate a breakpoint exception. It is used at the
+ * beginning of a program to sync up with a debugger and can be used
+ * otherwise as a quick means to stop program execution and "break" into
+ * the debugger.
+ */
+void kgdb_breakpoint(void)
+{
+ atomic_inc(&kgdb_setting_breakpoint);
+ wmb(); /* Sync point before breakpoint */
+ arch_kgdb_breakpoint();
+ wmb(); /* Sync point after breakpoint */
+ atomic_dec(&kgdb_setting_breakpoint);
+}
+EXPORT_SYMBOL_GPL(kgdb_breakpoint);
+
+static int __init opt_kgdb_wait(char *str)
+{
+ kgdb_break_asap = 1;
+
+ kdb_init(KDB_INIT_EARLY);
+ if (kgdb_io_module_registered)
+ kgdb_initial_breakpoint();
+
+ return 0;
+}
+
+early_param("kgdbwait", opt_kgdb_wait);
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h
new file mode 100644
index 00000000000..c5d753d80f6
--- /dev/null
+++ b/kernel/debug/debug_core.h
@@ -0,0 +1,81 @@
+/*
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _DEBUG_CORE_H_
+#define _DEBUG_CORE_H_
+/*
+ * These are the private implementation headers between the kernel
+ * debugger core and the debugger front end code.
+ */
+
+/* kernel debug core data structures */
+struct kgdb_state {
+ int ex_vector;
+ int signo;
+ int err_code;
+ int cpu;
+ int pass_exception;
+ unsigned long thr_query;
+ unsigned long threadid;
+ long kgdb_usethreadid;
+ struct pt_regs *linux_regs;
+};
+
+/* Exception state values */
+#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
+#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
+#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
+#define DCPU_SSTEP 0x8 /* CPU is single stepping */
+
+struct debuggerinfo_struct {
+ void *debuggerinfo;
+ struct task_struct *task;
+ int exception_state;
+ int ret_state;
+ int irq_depth;
+};
+
+extern struct debuggerinfo_struct kgdb_info[];
+
+/* kernel debug core break point routines */
+extern int dbg_remove_all_break(void);
+extern int dbg_set_sw_break(unsigned long addr);
+extern int dbg_remove_sw_break(unsigned long addr);
+extern int dbg_activate_sw_breakpoints(void);
+extern int dbg_deactivate_sw_breakpoints(void);
+
+/* polled character access to i/o module */
+extern int dbg_io_get_char(void);
+
+/* stub return value for switching between the gdbstub and kdb */
+#define DBG_PASS_EVENT -12345
+/* Switch from one cpu to another */
+#define DBG_SWITCH_CPU_EVENT -123456
+extern int dbg_switch_cpu;
+
+/* gdbstub interface functions */
+extern int gdb_serial_stub(struct kgdb_state *ks);
+extern void gdbstub_msg_write(const char *s, int len);
+
+/* gdbstub functions used for kdb <-> gdbstub transition */
+extern int gdbstub_state(struct kgdb_state *ks, char *cmd);
+extern int dbg_kdb_mode;
+
+#ifdef CONFIG_KGDB_KDB
+extern int kdb_stub(struct kgdb_state *ks);
+extern int kdb_parse(const char *cmdstr);
+#else /* ! CONFIG_KGDB_KDB */
+static inline int kdb_stub(struct kgdb_state *ks)
+{
+ return DBG_PASS_EVENT;
+}
+#endif /* CONFIG_KGDB_KDB */
+
+#endif /* _DEBUG_CORE_H_ */
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
new file mode 100644
index 00000000000..4b17b326952
--- /dev/null
+++ b/kernel/debug/gdbstub.c
@@ -0,0 +1,1017 @@
+/*
+ * Kernel Debug Core
+ *
+ * Maintainer: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (C) 2000-2001 VERITAS Software Corporation.
+ * Copyright (C) 2002-2004 Timesys Corporation
+ * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
+ * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
+ * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
+ * Copyright (C) 2005-2009 Wind River Systems, Inc.
+ * Copyright (C) 2007 MontaVista Software, Inc.
+ * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Contributors at various stages not listed above:
+ * Jason Wessel ( jason.wessel@windriver.com )
+ * George Anzinger <george@mvista.com>
+ * Anurekh Saxena (anurekh.saxena@timesys.com)
+ * Lake Stevens Instrument Division (Glenn Engel)
+ * Jim Kingdon, Cygnus Support.
+ *
+ * Original KGDB stub: David Grothe <dave@gcom.com>,
+ * Tigran Aivazian <tigran@sco.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/reboot.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/unaligned.h>
+#include "debug_core.h"
+
+#define KGDB_MAX_THREAD_QUERY 17
+
+/* Our I/O buffers. */
+static char remcom_in_buffer[BUFMAX];
+static char remcom_out_buffer[BUFMAX];
+
+/* Storage for the registers, in GDB format. */
+static unsigned long gdb_regs[(NUMREGBYTES +
+ sizeof(unsigned long) - 1) /
+ sizeof(unsigned long)];
+
+/*
+ * GDB remote protocol parser:
+ */
+
+static int hex(char ch)
+{
+ if ((ch >= 'a') && (ch <= 'f'))
+ return ch - 'a' + 10;
+ if ((ch >= '0') && (ch <= '9'))
+ return ch - '0';
+ if ((ch >= 'A') && (ch <= 'F'))
+ return ch - 'A' + 10;
+ return -1;
+}
+
+#ifdef CONFIG_KGDB_KDB
+static int gdbstub_read_wait(void)
+{
+ int ret = -1;
+ int i;
+
+ /* poll any additional I/O interfaces that are defined */
+ while (ret < 0)
+ for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
+ ret = kdb_poll_funcs[i]();
+ if (ret > 0)
+ break;
+ }
+ return ret;
+}
+#else
+static int gdbstub_read_wait(void)
+{
+ int ret = dbg_io_ops->read_char();
+ while (ret == NO_POLL_CHAR)
+ ret = dbg_io_ops->read_char();
+ return ret;
+}
+#endif
+/* scan for the sequence $<data>#<checksum> */
+static void get_packet(char *buffer)
+{
+ unsigned char checksum;
+ unsigned char xmitcsum;
+ int count;
+ char ch;
+
+ do {
+ /*
+ * Spin and wait around for the start character, ignore all
+ * other characters:
+ */
+ while ((ch = (gdbstub_read_wait())) != '$')
+ /* nothing */;
+
+ kgdb_connected = 1;
+ checksum = 0;
+ xmitcsum = -1;
+
+ count = 0;
+
+ /*
+ * now, read until a # or end of buffer is found:
+ */
+ while (count < (BUFMAX - 1)) {
+ ch = gdbstub_read_wait();
+ if (ch == '#')
+ break;
+ checksum = checksum + ch;
+ buffer[count] = ch;
+ count = count + 1;
+ }
+ buffer[count] = 0;
+
+ if (ch == '#') {
+ xmitcsum = hex(gdbstub_read_wait()) << 4;
+ xmitcsum += hex(gdbstub_read_wait());
+
+ if (checksum != xmitcsum)
+ /* failed checksum */
+ dbg_io_ops->write_char('-');
+ else
+ /* successful transfer */
+ dbg_io_ops->write_char('+');
+ if (dbg_io_ops->flush)
+ dbg_io_ops->flush();
+ }
+ } while (checksum != xmitcsum);
+}
+
+/*
+ * Send the packet in buffer.
+ * Check for gdb connection if asked for.
+ */
+static void put_packet(char *buffer)
+{
+ unsigned char checksum;
+ int count;
+ char ch;
+
+ /*
+ * $<packet info>#<checksum>.
+ */
+ while (1) {
+ dbg_io_ops->write_char('$');
+ checksum = 0;
+ count = 0;
+
+ while ((ch = buffer[count])) {
+ dbg_io_ops->write_char(ch);
+ checksum += ch;
+ count++;
+ }
+
+ dbg_io_ops->write_char('#');
+ dbg_io_ops->write_char(hex_asc_hi(checksum));
+ dbg_io_ops->write_char(hex_asc_lo(checksum));
+ if (dbg_io_ops->flush)
+ dbg_io_ops->flush();
+
+ /* Now see what we get in reply. */
+ ch = gdbstub_read_wait();
+
+ if (ch == 3)
+ ch = gdbstub_read_wait();
+
+ /* If we get an ACK, we are done. */
+ if (ch == '+')
+ return;
+
+ /*
+ * If we get the start of another packet, this means
+ * that GDB is attempting to reconnect. We will NAK
+ * the packet being sent, and stop trying to send this
+ * packet.
+ */
+ if (ch == '$') {
+ dbg_io_ops->write_char('-');
+ if (dbg_io_ops->flush)
+ dbg_io_ops->flush();
+ return;
+ }
+ }
+}
+
+static char gdbmsgbuf[BUFMAX + 1];
+
+void gdbstub_msg_write(const char *s, int len)
+{
+ char *bufptr;
+ int wcount;
+ int i;
+
+ if (len == 0)
+ len = strlen(s);
+
+ /* 'O'utput */
+ gdbmsgbuf[0] = 'O';
+
+ /* Fill and send buffers... */
+ while (len > 0) {
+ bufptr = gdbmsgbuf + 1;
+
+ /* Calculate how many this time */
+ if ((len << 1) > (BUFMAX - 2))
+ wcount = (BUFMAX - 2) >> 1;
+ else
+ wcount = len;
+
+ /* Pack in hex chars */
+ for (i = 0; i < wcount; i++)
+ bufptr = pack_hex_byte(bufptr, s[i]);
+ *bufptr = '\0';
+
+ /* Move up */
+ s += wcount;
+ len -= wcount;
+
+ /* Write packet */
+ put_packet(gdbmsgbuf);
+ }
+}
+
+/*
+ * Convert the memory pointed to by mem into hex, placing result in
+ * buf. Return a pointer to the last char put in buf (null). May
+ * return an error.
+ */
+int kgdb_mem2hex(char *mem, char *buf, int count)
+{
+ char *tmp;
+ int err;
+
+ /*
+ * We use the upper half of buf as an intermediate buffer for the
+ * raw memory copy. Hex conversion will work against this one.
+ */
+ tmp = buf + count;
+
+ err = probe_kernel_read(tmp, mem, count);
+ if (!err) {
+ while (count > 0) {
+ buf = pack_hex_byte(buf, *tmp);
+ tmp++;
+ count--;
+ }
+
+ *buf = 0;
+ }
+
+ return err;
+}
+
+/*
+ * Convert the hex array pointed to by buf into binary to be placed in
+ * mem. Return a pointer to the character AFTER the last byte
+ * written. May return an error.
+ */
+int kgdb_hex2mem(char *buf, char *mem, int count)
+{
+ char *tmp_raw;
+ char *tmp_hex;
+
+ /*
+ * We use the upper half of buf as an intermediate buffer for the
+ * raw memory that is converted from hex.
+ */
+ tmp_raw = buf + count * 2;
+
+ tmp_hex = tmp_raw - 1;
+ while (tmp_hex >= buf) {
+ tmp_raw--;
+ *tmp_raw = hex(*tmp_hex--);
+ *tmp_raw |= hex(*tmp_hex--) << 4;
+ }
+
+ return probe_kernel_write(mem, tmp_raw, count);
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int kgdb_hex2long(char **ptr, unsigned long *long_val)
+{
+ int hex_val;
+ int num = 0;
+ int negate = 0;
+
+ *long_val = 0;
+
+ if (**ptr == '-') {
+ negate = 1;
+ (*ptr)++;
+ }
+ while (**ptr) {
+ hex_val = hex(**ptr);
+ if (hex_val < 0)
+ break;
+
+ *long_val = (*long_val << 4) | hex_val;
+ num++;
+ (*ptr)++;
+ }
+
+ if (negate)
+ *long_val = -*long_val;
+
+ return num;
+}
+
+/*
+ * Copy the binary array pointed to by buf into mem. Fix $, #, and
+ * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
+ * The input buf is overwitten with the result to write to mem.
+ */
+static int kgdb_ebin2mem(char *buf, char *mem, int count)
+{
+ int size = 0;
+ char *c = buf;
+
+ while (count-- > 0) {
+ c[size] = *buf++;
+ if (c[size] == 0x7d)
+ c[size] = *buf++ ^ 0x20;
+ size++;
+ }
+
+ return probe_kernel_write(mem, c, size);
+}
+
+/* Write memory due to an 'M' or 'X' packet. */
+static int write_mem_msg(int binary)
+{
+ char *ptr = &remcom_in_buffer[1];
+ unsigned long addr;
+ unsigned long length;
+ int err;
+
+ if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
+ kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
+ if (binary)
+ err = kgdb_ebin2mem(ptr, (char *)addr, length);
+ else
+ err = kgdb_hex2mem(ptr, (char *)addr, length);
+ if (err)
+ return err;
+ if (CACHE_FLUSH_IS_SAFE)
+ flush_icache_range(addr, addr + length);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void error_packet(char *pkt, int error)
+{
+ error = -error;
+ pkt[0] = 'E';
+ pkt[1] = hex_asc[(error / 10)];
+ pkt[2] = hex_asc[(error % 10)];
+ pkt[3] = '\0';
+}
+
+/*
+ * Thread ID accessors. We represent a flat TID space to GDB, where
+ * the per CPU idle threads (which under Linux all have PID 0) are
+ * remapped to negative TIDs.
+ */
+
+#define BUF_THREAD_ID_SIZE 16
+
+static char *pack_threadid(char *pkt, unsigned char *id)
+{
+ char *limit;
+
+ limit = pkt + BUF_THREAD_ID_SIZE;
+ while (pkt < limit)
+ pkt = pack_hex_byte(pkt, *id++);
+
+ return pkt;
+}
+
+static void int_to_threadref(unsigned char *id, int value)
+{
+ unsigned char *scan;
+ int i = 4;
+
+ scan = (unsigned char *)id;
+ while (i--)
+ *scan++ = 0;
+ put_unaligned_be32(value, scan);
+}
+
+static struct task_struct *getthread(struct pt_regs *regs, int tid)
+{
+ /*
+ * Non-positive TIDs are remapped to the cpu shadow information
+ */
+ if (tid == 0 || tid == -1)
+ tid = -atomic_read(&kgdb_active) - 2;
+ if (tid < -1 && tid > -NR_CPUS - 2) {
+ if (kgdb_info[-tid - 2].task)
+ return kgdb_info[-tid - 2].task;
+ else
+ return idle_task(-tid - 2);
+ }
+ if (tid <= 0) {
+ printk(KERN_ERR "KGDB: Internal thread select error\n");
+ dump_stack();
+ return NULL;
+ }
+
+ /*
+ * find_task_by_pid_ns() does not take the tasklist lock anymore
+ * but is nicely RCU locked - hence is a pretty resilient
+ * thing to use:
+ */
+ return find_task_by_pid_ns(tid, &init_pid_ns);
+}
+
+
+/*
+ * Remap normal tasks to their real PID,
+ * CPU shadow threads are mapped to -CPU - 2
+ */
+static inline int shadow_pid(int realpid)
+{
+ if (realpid)
+ return realpid;
+
+ return -raw_smp_processor_id() - 2;
+}
+
+/*
+ * All the functions that start with gdb_cmd are the various
+ * operations to implement the handlers for the gdbserial protocol
+ * where KGDB is communicating with an external debugger
+ */
+
+/* Handle the '?' status packets */
+static void gdb_cmd_status(struct kgdb_state *ks)
+{
+ /*
+ * We know that this packet is only sent
+ * during initial connect. So to be safe,
+ * we clear out our breakpoints now in case
+ * GDB is reconnecting.
+ */
+ dbg_remove_all_break();
+
+ remcom_out_buffer[0] = 'S';
+ pack_hex_byte(&remcom_out_buffer[1], ks->signo);
+}
+
+/* Handle the 'g' get registers request */
+static void gdb_cmd_getregs(struct kgdb_state *ks)
+{
+ struct task_struct *thread;
+ void *local_debuggerinfo;
+ int i;
+
+ thread = kgdb_usethread;
+ if (!thread) {
+ thread = kgdb_info[ks->cpu].task;
+ local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
+ } else {
+ local_debuggerinfo = NULL;
+ for_each_online_cpu(i) {
+ /*
+ * Try to find the task on some other
+ * or possibly this node if we do not
+ * find the matching task then we try
+ * to approximate the results.
+ */
+ if (thread == kgdb_info[i].task)
+ local_debuggerinfo = kgdb_info[i].debuggerinfo;
+ }
+ }
+
+ /*
+ * All threads that don't have debuggerinfo should be
+ * in schedule() sleeping, since all other CPUs
+ * are in kgdb_wait, and thus have debuggerinfo.
+ */
+ if (local_debuggerinfo) {
+ pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
+ } else {
+ /*
+ * Pull stuff saved during switch_to; nothing
+ * else is accessible (or even particularly
+ * relevant).
+ *
+ * This should be enough for a stack trace.
+ */
+ sleeping_thread_to_gdb_regs(gdb_regs, thread);
+ }
+ kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
+}
+
+/* Handle the 'G' set registers request */
+static void gdb_cmd_setregs(struct kgdb_state *ks)
+{
+ kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES);
+
+ if (kgdb_usethread && kgdb_usethread != current) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ } else {
+ gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs);
+ strcpy(remcom_out_buffer, "OK");
+ }
+}
+
+/* Handle the 'm' memory read bytes */
+static void gdb_cmd_memread(struct kgdb_state *ks)
+{
+ char *ptr = &remcom_in_buffer[1];
+ unsigned long length;
+ unsigned long addr;
+ int err;
+
+ if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
+ kgdb_hex2long(&ptr, &length) > 0) {
+ err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
+ if (err)
+ error_packet(remcom_out_buffer, err);
+ } else {
+ error_packet(remcom_out_buffer, -EINVAL);
+ }
+}
+
+/* Handle the 'M' memory write bytes */
+static void gdb_cmd_memwrite(struct kgdb_state *ks)
+{
+ int err = write_mem_msg(0);
+
+ if (err)
+ error_packet(remcom_out_buffer, err);
+ else
+ strcpy(remcom_out_buffer, "OK");
+}
+
+/* Handle the 'X' memory binary write bytes */
+static void gdb_cmd_binwrite(struct kgdb_state *ks)
+{
+ int err = write_mem_msg(1);
+
+ if (err)
+ error_packet(remcom_out_buffer, err);
+ else
+ strcpy(remcom_out_buffer, "OK");
+}
+
+/* Handle the 'D' or 'k', detach or kill packets */
+static void gdb_cmd_detachkill(struct kgdb_state *ks)
+{
+ int error;
+
+ /* The detach case */
+ if (remcom_in_buffer[0] == 'D') {
+ error = dbg_remove_all_break();
+ if (error < 0) {
+ error_packet(remcom_out_buffer, error);
+ } else {
+ strcpy(remcom_out_buffer, "OK");
+ kgdb_connected = 0;
+ }
+ put_packet(remcom_out_buffer);
+ } else {
+ /*
+ * Assume the kill case, with no exit code checking,
+ * trying to force detach the debugger:
+ */
+ dbg_remove_all_break();
+ kgdb_connected = 0;
+ }
+}
+
+/* Handle the 'R' reboot packets */
+static int gdb_cmd_reboot(struct kgdb_state *ks)
+{
+ /* For now, only honor R0 */
+ if (strcmp(remcom_in_buffer, "R0") == 0) {
+ printk(KERN_CRIT "Executing emergency reboot\n");
+ strcpy(remcom_out_buffer, "OK");
+ put_packet(remcom_out_buffer);
+
+ /*
+ * Execution should not return from
+ * machine_emergency_restart()
+ */
+ machine_emergency_restart();
+ kgdb_connected = 0;
+
+ return 1;
+ }
+ return 0;
+}
+
+/* Handle the 'q' query packets */
+static void gdb_cmd_query(struct kgdb_state *ks)
+{
+ struct task_struct *g;
+ struct task_struct *p;
+ unsigned char thref[8];
+ char *ptr;
+ int i;
+ int cpu;
+ int finished = 0;
+
+ switch (remcom_in_buffer[1]) {
+ case 's':
+ case 'f':
+ if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ break;
+ }
+
+ i = 0;
+ remcom_out_buffer[0] = 'm';
+ ptr = remcom_out_buffer + 1;
+ if (remcom_in_buffer[1] == 'f') {
+ /* Each cpu is a shadow thread */
+ for_each_online_cpu(cpu) {
+ ks->thr_query = 0;
+ int_to_threadref(thref, -cpu - 2);
+ pack_threadid(ptr, thref);
+ ptr += BUF_THREAD_ID_SIZE;
+ *(ptr++) = ',';
+ i++;
+ }
+ }
+
+ do_each_thread(g, p) {
+ if (i >= ks->thr_query && !finished) {
+ int_to_threadref(thref, p->pid);
+ pack_threadid(ptr, thref);
+ ptr += BUF_THREAD_ID_SIZE;
+ *(ptr++) = ',';
+ ks->thr_query++;
+ if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
+ finished = 1;
+ }
+ i++;
+ } while_each_thread(g, p);
+
+ *(--ptr) = '\0';
+ break;
+
+ case 'C':
+ /* Current thread id */
+ strcpy(remcom_out_buffer, "QC");
+ ks->threadid = shadow_pid(current->pid);
+ int_to_threadref(thref, ks->threadid);
+ pack_threadid(remcom_out_buffer + 2, thref);
+ break;
+ case 'T':
+ if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ break;
+ }
+ ks->threadid = 0;
+ ptr = remcom_in_buffer + 17;
+ kgdb_hex2long(&ptr, &ks->threadid);
+ if (!getthread(ks->linux_regs, ks->threadid)) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ break;
+ }
+ if ((int)ks->threadid > 0) {
+ kgdb_mem2hex(getthread(ks->linux_regs,
+ ks->threadid)->comm,
+ remcom_out_buffer, 16);
+ } else {
+ static char tmpstr[23 + BUF_THREAD_ID_SIZE];
+
+ sprintf(tmpstr, "shadowCPU%d",
+ (int)(-ks->threadid - 2));
+ kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
+ }
+ break;
+#ifdef CONFIG_KGDB_KDB
+ case 'R':
+ if (strncmp(remcom_in_buffer, "qRcmd,", 6) == 0) {
+ int len = strlen(remcom_in_buffer + 6);
+
+ if ((len % 2) != 0) {
+ strcpy(remcom_out_buffer, "E01");
+ break;
+ }
+ kgdb_hex2mem(remcom_in_buffer + 6,
+ remcom_out_buffer, len);
+ len = len / 2;
+ remcom_out_buffer[len++] = 0;
+
+ kdb_parse(remcom_out_buffer);
+ strcpy(remcom_out_buffer, "OK");
+ }
+ break;
+#endif
+ }
+}
+
+/* Handle the 'H' task query packets */
+static void gdb_cmd_task(struct kgdb_state *ks)
+{
+ struct task_struct *thread;
+ char *ptr;
+
+ switch (remcom_in_buffer[1]) {
+ case 'g':
+ ptr = &remcom_in_buffer[2];
+ kgdb_hex2long(&ptr, &ks->threadid);
+ thread = getthread(ks->linux_regs, ks->threadid);
+ if (!thread && ks->threadid > 0) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ break;
+ }
+ kgdb_usethread = thread;
+ ks->kgdb_usethreadid = ks->threadid;
+ strcpy(remcom_out_buffer, "OK");
+ break;
+ case 'c':
+ ptr = &remcom_in_buffer[2];
+ kgdb_hex2long(&ptr, &ks->threadid);
+ if (!ks->threadid) {
+ kgdb_contthread = NULL;
+ } else {
+ thread = getthread(ks->linux_regs, ks->threadid);
+ if (!thread && ks->threadid > 0) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ break;
+ }
+ kgdb_contthread = thread;
+ }
+ strcpy(remcom_out_buffer, "OK");
+ break;
+ }
+}
+
+/* Handle the 'T' thread query packets */
+static void gdb_cmd_thread(struct kgdb_state *ks)
+{
+ char *ptr = &remcom_in_buffer[1];
+ struct task_struct *thread;
+
+ kgdb_hex2long(&ptr, &ks->threadid);
+ thread = getthread(ks->linux_regs, ks->threadid);
+ if (thread)
+ strcpy(remcom_out_buffer, "OK");
+ else
+ error_packet(remcom_out_buffer, -EINVAL);
+}
+
+/* Handle the 'z' or 'Z' breakpoint remove or set packets */
+static void gdb_cmd_break(struct kgdb_state *ks)
+{
+ /*
+ * Since GDB-5.3, it's been drafted that '0' is a software
+ * breakpoint, '1' is a hardware breakpoint, so let's do that.
+ */
+ char *bpt_type = &remcom_in_buffer[1];
+ char *ptr = &remcom_in_buffer[2];
+ unsigned long addr;
+ unsigned long length;
+ int error = 0;
+
+ if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') {
+ /* Unsupported */
+ if (*bpt_type > '4')
+ return;
+ } else {
+ if (*bpt_type != '0' && *bpt_type != '1')
+ /* Unsupported. */
+ return;
+ }
+
+ /*
+ * Test if this is a hardware breakpoint, and
+ * if we support it:
+ */
+ if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT))
+ /* Unsupported. */
+ return;
+
+ if (*(ptr++) != ',') {
+ error_packet(remcom_out_buffer, -EINVAL);
+ return;
+ }
+ if (!kgdb_hex2long(&ptr, &addr)) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ return;
+ }
+ if (*(ptr++) != ',' ||
+ !kgdb_hex2long(&ptr, &length)) {
+ error_packet(remcom_out_buffer, -EINVAL);
+ return;
+ }
+
+ if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
+ error = dbg_set_sw_break(addr);
+ else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
+ error = dbg_remove_sw_break(addr);
+ else if (remcom_in_buffer[0] == 'Z')
+ error = arch_kgdb_ops.set_hw_breakpoint(addr,
+ (int)length, *bpt_type - '0');
+ else if (remcom_in_buffer[0] == 'z')
+ error = arch_kgdb_ops.remove_hw_breakpoint(addr,
+ (int) length, *bpt_type - '0');
+
+ if (error == 0)
+ strcpy(remcom_out_buffer, "OK");
+ else
+ error_packet(remcom_out_buffer, error);
+}
+
+/* Handle the 'C' signal / exception passing packets */
+static int gdb_cmd_exception_pass(struct kgdb_state *ks)
+{
+ /* C09 == pass exception
+ * C15 == detach kgdb, pass exception
+ */
+ if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') {
+
+ ks->pass_exception = 1;
+ remcom_in_buffer[0] = 'c';
+
+ } else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') {
+
+ ks->pass_exception = 1;
+ remcom_in_buffer[0] = 'D';
+ dbg_remove_all_break();
+ kgdb_connected = 0;
+ return 1;
+
+ } else {
+ gdbstub_msg_write("KGDB only knows signal 9 (pass)"
+ " and 15 (pass and disconnect)\n"
+ "Executing a continue without signal passing\n", 0);
+ remcom_in_buffer[0] = 'c';
+ }
+
+ /* Indicate fall through */
+ return -1;
+}
+
+/*
+ * This function performs all gdbserial command procesing
+ */
+int gdb_serial_stub(struct kgdb_state *ks)
+{
+ int error = 0;
+ int tmp;
+
+ /* Clear the out buffer. */
+ memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
+
+ if (kgdb_connected) {
+ unsigned char thref[8];
+ char *ptr;
+
+ /* Reply to host that an exception has occurred */
+ ptr = remcom_out_buffer;
+ *ptr++ = 'T';
+ ptr = pack_hex_byte(ptr, ks->signo);
+ ptr += strlen(strcpy(ptr, "thread:"));
+ int_to_threadref(thref, shadow_pid(current->pid));
+ ptr = pack_threadid(ptr, thref);
+ *ptr++ = ';';
+ put_packet(remcom_out_buffer);
+ }
+
+ kgdb_usethread = kgdb_info[ks->cpu].task;
+ ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
+ ks->pass_exception = 0;
+
+ while (1) {
+ error = 0;
+
+ /* Clear the out buffer. */
+ memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
+
+ get_packet(remcom_in_buffer);
+
+ switch (remcom_in_buffer[0]) {
+ case '?': /* gdbserial status */
+ gdb_cmd_status(ks);
+ break;
+ case 'g': /* return the value of the CPU registers */
+ gdb_cmd_getregs(ks);
+ break;
+ case 'G': /* set the value of the CPU registers - return OK */
+ gdb_cmd_setregs(ks);
+ break;
+ case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
+ gdb_cmd_memread(ks);
+ break;
+ case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
+ gdb_cmd_memwrite(ks);
+ break;
+ case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
+ gdb_cmd_binwrite(ks);
+ break;
+ /* kill or detach. KGDB should treat this like a
+ * continue.
+ */
+ case 'D': /* Debugger detach */
+ case 'k': /* Debugger detach via kill */
+ gdb_cmd_detachkill(ks);
+ goto default_handle;
+ case 'R': /* Reboot */
+ if (gdb_cmd_reboot(ks))
+ goto default_handle;
+ break;
+ case 'q': /* query command */
+ gdb_cmd_query(ks);
+ break;
+ case 'H': /* task related */
+ gdb_cmd_task(ks);
+ break;
+ case 'T': /* Query thread status */
+ gdb_cmd_thread(ks);
+ break;
+ case 'z': /* Break point remove */
+ case 'Z': /* Break point set */
+ gdb_cmd_break(ks);
+ break;
+#ifdef CONFIG_KGDB_KDB
+ case '3': /* Escape into back into kdb */
+ if (remcom_in_buffer[1] == '\0') {
+ gdb_cmd_detachkill(ks);
+ return DBG_PASS_EVENT;
+ }
+#endif
+ case 'C': /* Exception passing */
+ tmp = gdb_cmd_exception_pass(ks);
+ if (tmp > 0)
+ goto default_handle;
+ if (tmp == 0)
+ break;
+ /* Fall through on tmp < 0 */
+ case 'c': /* Continue packet */
+ case 's': /* Single step packet */
+ if (kgdb_contthread && kgdb_contthread != current) {
+ /* Can't switch threads in kgdb */
+ error_packet(remcom_out_buffer, -EINVAL);
+ break;
+ }
+ dbg_activate_sw_breakpoints();
+ /* Fall through to default processing */
+ default:
+default_handle:
+ error = kgdb_arch_handle_exception(ks->ex_vector,
+ ks->signo,
+ ks->err_code,
+ remcom_in_buffer,
+ remcom_out_buffer,
+ ks->linux_regs);
+ /*
+ * Leave cmd processing on error, detach,
+ * kill, continue, or single step.
+ */
+ if (error >= 0 || remcom_in_buffer[0] == 'D' ||
+ remcom_in_buffer[0] == 'k') {
+ error = 0;
+ goto kgdb_exit;
+ }
+
+ }
+
+ /* reply to the request */
+ put_packet(remcom_out_buffer);
+ }
+
+kgdb_exit:
+ if (ks->pass_exception)
+ error = 1;
+ return error;
+}
+
+int gdbstub_state(struct kgdb_state *ks, char *cmd)
+{
+ int error;
+
+ switch (cmd[0]) {
+ case 'e':
+ error = kgdb_arch_handle_exception(ks->ex_vector,
+ ks->signo,
+ ks->err_code,
+ remcom_in_buffer,
+ remcom_out_buffer,
+ ks->linux_regs);
+ return error;
+ case 's':
+ case 'c':
+ strcpy(remcom_in_buffer, cmd);
+ return 0;
+ case '?':
+ gdb_cmd_status(ks);
+ break;
+ case '\0':
+ strcpy(remcom_out_buffer, "");
+ break;
+ }
+ dbg_io_ops->write_char('+');
+ put_packet(remcom_out_buffer);
+ return 0;
+}
diff --git a/kernel/debug/kdb/.gitignore b/kernel/debug/kdb/.gitignore
new file mode 100644
index 00000000000..396d12eda9e
--- /dev/null
+++ b/kernel/debug/kdb/.gitignore
@@ -0,0 +1 @@
+gen-kdb_cmds.c
diff --git a/kernel/debug/kdb/Makefile b/kernel/debug/kdb/Makefile
new file mode 100644
index 00000000000..d4fc58f4b88
--- /dev/null
+++ b/kernel/debug/kdb/Makefile
@@ -0,0 +1,25 @@
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+# Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+#
+
+CCVERSION := $(shell $(CC) -v 2>&1 | sed -ne '$$p')
+obj-y := kdb_io.o kdb_main.o kdb_support.o kdb_bt.o gen-kdb_cmds.o kdb_bp.o kdb_debugger.o
+obj-$(CONFIG_KDB_KEYBOARD) += kdb_keyboard.o
+
+clean-files := gen-kdb_cmds.c
+
+quiet_cmd_gen-kdb = GENKDB $@
+ cmd_gen-kdb = $(AWK) 'BEGIN {print "\#include <linux/stddef.h>"; print "\#include <linux/init.h>"} \
+ /^\#/{next} \
+ /^[ \t]*$$/{next} \
+ {gsub(/"/, "\\\"", $$0); \
+ print "static __initdata char kdb_cmd" cmds++ "[] = \"" $$0 "\\n\";"} \
+ END {print "extern char *kdb_cmds[]; char __initdata *kdb_cmds[] = {"; for (i = 0; i < cmds; ++i) {print " kdb_cmd" i ","}; print(" NULL\n};");}' \
+ $(filter-out %/Makefile,$^) > $@#
+
+$(obj)/gen-kdb_cmds.c: $(src)/kdb_cmds $(src)/Makefile
+ $(call cmd,gen-kdb)
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
new file mode 100644
index 00000000000..75bd9b3ebbb
--- /dev/null
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -0,0 +1,564 @@
+/*
+ * Kernel Debugger Architecture Independent Breakpoint Handler
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ */
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kdb.h>
+#include <linux/kgdb.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include "kdb_private.h"
+
+/*
+ * Table of kdb_breakpoints
+ */
+kdb_bp_t kdb_breakpoints[KDB_MAXBPT];
+
+static void kdb_setsinglestep(struct pt_regs *regs)
+{
+ KDB_STATE_SET(DOING_SS);
+}
+
+static char *kdb_rwtypes[] = {
+ "Instruction(i)",
+ "Instruction(Register)",
+ "Data Write",
+ "I/O",
+ "Data Access"
+};
+
+static char *kdb_bptype(kdb_bp_t *bp)
+{
+ if (bp->bp_type < 0 || bp->bp_type > 4)
+ return "";
+
+ return kdb_rwtypes[bp->bp_type];
+}
+
+static int kdb_parsebp(int argc, const char **argv, int *nextargp, kdb_bp_t *bp)
+{
+ int nextarg = *nextargp;
+ int diag;
+
+ bp->bph_length = 1;
+ if ((argc + 1) != nextarg) {
+ if (strnicmp(argv[nextarg], "datar", sizeof("datar")) == 0)
+ bp->bp_type = BP_ACCESS_WATCHPOINT;
+ else if (strnicmp(argv[nextarg], "dataw", sizeof("dataw")) == 0)
+ bp->bp_type = BP_WRITE_WATCHPOINT;
+ else if (strnicmp(argv[nextarg], "inst", sizeof("inst")) == 0)
+ bp->bp_type = BP_HARDWARE_BREAKPOINT;
+ else
+ return KDB_ARGCOUNT;
+
+ bp->bph_length = 1;
+
+ nextarg++;
+
+ if ((argc + 1) != nextarg) {
+ unsigned long len;
+
+ diag = kdbgetularg((char *)argv[nextarg],
+ &len);
+ if (diag)
+ return diag;
+
+
+ if (len > 8)
+ return KDB_BADLENGTH;
+
+ bp->bph_length = len;
+ nextarg++;
+ }
+
+ if ((argc + 1) != nextarg)
+ return KDB_ARGCOUNT;
+ }
+
+ *nextargp = nextarg;
+ return 0;
+}
+
+static int _kdb_bp_remove(kdb_bp_t *bp)
+{
+ int ret = 1;
+ if (!bp->bp_installed)
+ return ret;
+ if (!bp->bp_type)
+ ret = dbg_remove_sw_break(bp->bp_addr);
+ else
+ ret = arch_kgdb_ops.remove_hw_breakpoint(bp->bp_addr,
+ bp->bph_length,
+ bp->bp_type);
+ if (ret == 0)
+ bp->bp_installed = 0;
+ return ret;
+}
+
+static void kdb_handle_bp(struct pt_regs *regs, kdb_bp_t *bp)
+{
+ if (KDB_DEBUG(BP))
+ kdb_printf("regs->ip = 0x%lx\n", instruction_pointer(regs));
+
+ /*
+ * Setup single step
+ */
+ kdb_setsinglestep(regs);
+
+ /*
+ * Reset delay attribute
+ */
+ bp->bp_delay = 0;
+ bp->bp_delayed = 1;
+}
+
+static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
+{
+ int ret;
+ /*
+ * Install the breakpoint, if it is not already installed.
+ */
+
+ if (KDB_DEBUG(BP))
+ kdb_printf("%s: bp_installed %d\n",
+ __func__, bp->bp_installed);
+ if (!KDB_STATE(SSBPT))
+ bp->bp_delay = 0;
+ if (bp->bp_installed)
+ return 1;
+ if (bp->bp_delay || (bp->bp_delayed && KDB_STATE(DOING_SS))) {
+ if (KDB_DEBUG(BP))
+ kdb_printf("%s: delayed bp\n", __func__);
+ kdb_handle_bp(regs, bp);
+ return 0;
+ }
+ if (!bp->bp_type)
+ ret = dbg_set_sw_break(bp->bp_addr);
+ else
+ ret = arch_kgdb_ops.set_hw_breakpoint(bp->bp_addr,
+ bp->bph_length,
+ bp->bp_type);
+ if (ret == 0) {
+ bp->bp_installed = 1;
+ } else {
+ kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
+ __func__, bp->bp_addr);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * kdb_bp_install
+ *
+ * Install kdb_breakpoints prior to returning from the
+ * kernel debugger. This allows the kdb_breakpoints to be set
+ * upon functions that are used internally by kdb, such as
+ * printk(). This function is only called once per kdb session.
+ */
+void kdb_bp_install(struct pt_regs *regs)
+{
+ int i;
+
+ for (i = 0; i < KDB_MAXBPT; i++) {
+ kdb_bp_t *bp = &kdb_breakpoints[i];
+
+ if (KDB_DEBUG(BP)) {
+ kdb_printf("%s: bp %d bp_enabled %d\n",
+ __func__, i, bp->bp_enabled);
+ }
+ if (bp->bp_enabled)
+ _kdb_bp_install(regs, bp);
+ }
+}
+
+/*
+ * kdb_bp_remove
+ *
+ * Remove kdb_breakpoints upon entry to the kernel debugger.
+ *
+ * Parameters:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ */
+void kdb_bp_remove(void)
+{
+ int i;
+
+ for (i = KDB_MAXBPT - 1; i >= 0; i--) {
+ kdb_bp_t *bp = &kdb_breakpoints[i];
+
+ if (KDB_DEBUG(BP)) {
+ kdb_printf("%s: bp %d bp_enabled %d\n",
+ __func__, i, bp->bp_enabled);
+ }
+ if (bp->bp_enabled)
+ _kdb_bp_remove(bp);
+ }
+}
+
+
+/*
+ * kdb_printbp
+ *
+ * Internal function to format and print a breakpoint entry.
+ *
+ * Parameters:
+ * None.
+ * Outputs:
+ * None.
+ * Returns:
+ * None.
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+static void kdb_printbp(kdb_bp_t *bp, int i)
+{
+ kdb_printf("%s ", kdb_bptype(bp));
+ kdb_printf("BP #%d at ", i);
+ kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT);
+
+ if (bp->bp_enabled)
+ kdb_printf("\n is enabled");
+ else
+ kdb_printf("\n is disabled");
+
+ kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n",
+ bp->bp_addr, bp->bp_type, bp->bp_installed);
+
+ kdb_printf("\n");
+}
+
+/*
+ * kdb_bp
+ *
+ * Handle the bp commands.
+ *
+ * [bp|bph] <addr-expression> [DATAR|DATAW]
+ *
+ * Parameters:
+ * argc Count of arguments in argv
+ * argv Space delimited command line arguments
+ * Outputs:
+ * None.
+ * Returns:
+ * Zero for success, a kdb diagnostic if failure.
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ * bp Set breakpoint on all cpus. Only use hardware assist if need.
+ * bph Set breakpoint on all cpus. Force hardware register
+ */
+
+static int kdb_bp(int argc, const char **argv)
+{
+ int i, bpno;
+ kdb_bp_t *bp, *bp_check;
+ int diag;
+ int free;
+ char *symname = NULL;
+ long offset = 0ul;
+ int nextarg;
+ kdb_bp_t template = {0};
+
+ if (argc == 0) {
+ /*
+ * Display breakpoint table
+ */
+ for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT;
+ bpno++, bp++) {
+ if (bp->bp_free)
+ continue;
+ kdb_printbp(bp, bpno);
+ }
+
+ return 0;
+ }
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &template.bp_addr,
+ &offset, &symname);
+ if (diag)
+ return diag;
+ if (!template.bp_addr)
+ return KDB_BADINT;
+
+ /*
+ * Find an empty bp structure to allocate
+ */
+ free = KDB_MAXBPT;
+ for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) {
+ if (bp->bp_free)
+ break;
+ }
+
+ if (bpno == KDB_MAXBPT)
+ return KDB_TOOMANYBPT;
+
+ if (strcmp(argv[0], "bph") == 0) {
+ template.bp_type = BP_HARDWARE_BREAKPOINT;
+ diag = kdb_parsebp(argc, argv, &nextarg, &template);
+ if (diag)
+ return diag;
+ } else {
+ template.bp_type = BP_BREAKPOINT;
+ }
+
+ /*
+ * Check for clashing breakpoints.
+ *
+ * Note, in this design we can't have hardware breakpoints
+ * enabled for both read and write on the same address.
+ */
+ for (i = 0, bp_check = kdb_breakpoints; i < KDB_MAXBPT;
+ i++, bp_check++) {
+ if (!bp_check->bp_free &&
+ bp_check->bp_addr == template.bp_addr) {
+ kdb_printf("You already have a breakpoint at "
+ kdb_bfd_vma_fmt0 "\n", template.bp_addr);
+ return KDB_DUPBPT;
+ }
+ }
+
+ template.bp_enabled = 1;
+
+ /*
+ * Actually allocate the breakpoint found earlier
+ */
+ *bp = template;
+ bp->bp_free = 0;
+
+ kdb_printbp(bp, bpno);
+
+ return 0;
+}
+
+/*
+ * kdb_bc
+ *
+ * Handles the 'bc', 'be', and 'bd' commands
+ *
+ * [bd|bc|be] <breakpoint-number>
+ * [bd|bc|be] *
+ *
+ * Parameters:
+ * argc Count of arguments in argv
+ * argv Space delimited command line arguments
+ * Outputs:
+ * None.
+ * Returns:
+ * Zero for success, a kdb diagnostic for failure
+ * Locking:
+ * None.
+ * Remarks:
+ */
+static int kdb_bc(int argc, const char **argv)
+{
+ unsigned long addr;
+ kdb_bp_t *bp = NULL;
+ int lowbp = KDB_MAXBPT;
+ int highbp = 0;
+ int done = 0;
+ int i;
+ int diag = 0;
+
+ int cmd; /* KDBCMD_B? */
+#define KDBCMD_BC 0
+#define KDBCMD_BE 1
+#define KDBCMD_BD 2
+
+ if (strcmp(argv[0], "be") == 0)
+ cmd = KDBCMD_BE;
+ else if (strcmp(argv[0], "bd") == 0)
+ cmd = KDBCMD_BD;
+ else
+ cmd = KDBCMD_BC;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ if (strcmp(argv[1], "*") == 0) {
+ lowbp = 0;
+ highbp = KDB_MAXBPT;
+ } else {
+ diag = kdbgetularg(argv[1], &addr);
+ if (diag)
+ return diag;
+
+ /*
+ * For addresses less than the maximum breakpoint number,
+ * assume that the breakpoint number is desired.
+ */
+ if (addr < KDB_MAXBPT) {
+ bp = &kdb_breakpoints[addr];
+ lowbp = highbp = addr;
+ highbp++;
+ } else {
+ for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT;
+ i++, bp++) {
+ if (bp->bp_addr == addr) {
+ lowbp = highbp = i;
+ highbp++;
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * Now operate on the set of breakpoints matching the input
+ * criteria (either '*' for all, or an individual breakpoint).
+ */
+ for (bp = &kdb_breakpoints[lowbp], i = lowbp;
+ i < highbp;
+ i++, bp++) {
+ if (bp->bp_free)
+ continue;
+
+ done++;
+
+ switch (cmd) {
+ case KDBCMD_BC:
+ bp->bp_enabled = 0;
+
+ kdb_printf("Breakpoint %d at "
+ kdb_bfd_vma_fmt " cleared\n",
+ i, bp->bp_addr);
+
+ bp->bp_addr = 0;
+ bp->bp_free = 1;
+
+ break;
+ case KDBCMD_BE:
+ bp->bp_enabled = 1;
+
+ kdb_printf("Breakpoint %d at "
+ kdb_bfd_vma_fmt " enabled",
+ i, bp->bp_addr);
+
+ kdb_printf("\n");
+ break;
+ case KDBCMD_BD:
+ if (!bp->bp_enabled)
+ break;
+
+ bp->bp_enabled = 0;
+
+ kdb_printf("Breakpoint %d at "
+ kdb_bfd_vma_fmt " disabled\n",
+ i, bp->bp_addr);
+
+ break;
+ }
+ if (bp->bp_delay && (cmd == KDBCMD_BC || cmd == KDBCMD_BD)) {
+ bp->bp_delay = 0;
+ KDB_STATE_CLEAR(SSBPT);
+ }
+ }
+
+ return (!done) ? KDB_BPTNOTFOUND : 0;
+}
+
+/*
+ * kdb_ss
+ *
+ * Process the 'ss' (Single Step) and 'ssb' (Single Step to Branch)
+ * commands.
+ *
+ * ss
+ * ssb
+ *
+ * Parameters:
+ * argc Argument count
+ * argv Argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * KDB_CMD_SS[B] for success, a kdb error if failure.
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ * Set the arch specific option to trigger a debug trap after the next
+ * instruction.
+ *
+ * For 'ssb', set the trace flag in the debug trap handler
+ * after printing the current insn and return directly without
+ * invoking the kdb command processor, until a branch instruction
+ * is encountered.
+ */
+
+static int kdb_ss(int argc, const char **argv)
+{
+ int ssb = 0;
+
+ ssb = (strcmp(argv[0], "ssb") == 0);
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+ /*
+ * Set trace flag and go.
+ */
+ KDB_STATE_SET(DOING_SS);
+ if (ssb) {
+ KDB_STATE_SET(DOING_SSB);
+ return KDB_CMD_SSB;
+ }
+ return KDB_CMD_SS;
+}
+
+/* Initialize the breakpoint table and register breakpoint commands. */
+
+void __init kdb_initbptab(void)
+{
+ int i;
+ kdb_bp_t *bp;
+
+ /*
+ * First time initialization.
+ */
+ memset(&kdb_breakpoints, '\0', sizeof(kdb_breakpoints));
+
+ for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++)
+ bp->bp_free = 1;
+
+ kdb_register_repeat("bp", kdb_bp, "[<vaddr>]",
+ "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("bl", kdb_bp, "[<vaddr>]",
+ "Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+ if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)
+ kdb_register_repeat("bph", kdb_bp, "[<vaddr>]",
+ "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("bc", kdb_bc, "<bpnum>",
+ "Clear Breakpoint", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("be", kdb_bc, "<bpnum>",
+ "Enable Breakpoint", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bd", kdb_bc, "<bpnum>",
+ "Disable Breakpoint", 0, KDB_REPEAT_NONE);
+
+ kdb_register_repeat("ss", kdb_ss, "",
+ "Single Step", 1, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("ssb", kdb_ss, "",
+ "Single step to branch/call", 0, KDB_REPEAT_NO_ARGS);
+ /*
+ * Architecture dependent initialization.
+ */
+}
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
new file mode 100644
index 00000000000..2f62fe85f16
--- /dev/null
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -0,0 +1,210 @@
+/*
+ * Kernel Debugger Architecture Independent Stack Traceback
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/kdb.h>
+#include <linux/nmi.h>
+#include <asm/system.h>
+#include "kdb_private.h"
+
+
+static void kdb_show_stack(struct task_struct *p, void *addr)
+{
+ int old_lvl = console_loglevel;
+ console_loglevel = 15;
+ kdb_trap_printk++;
+ kdb_set_current_task(p);
+ if (addr) {
+ show_stack((struct task_struct *)p, addr);
+ } else if (kdb_current_regs) {
+#ifdef CONFIG_X86
+ show_stack(p, &kdb_current_regs->sp);
+#else
+ show_stack(p, NULL);
+#endif
+ } else {
+ show_stack(p, NULL);
+ }
+ console_loglevel = old_lvl;
+ kdb_trap_printk--;
+}
+
+/*
+ * kdb_bt
+ *
+ * This function implements the 'bt' command. Print a stack
+ * traceback.
+ *
+ * bt [<address-expression>] (addr-exp is for alternate stacks)
+ * btp <pid> Kernel stack for <pid>
+ * btt <address-expression> Kernel stack for task structure at
+ * <address-expression>
+ * bta [DRSTCZEUIMA] All useful processes, optionally
+ * filtered by state
+ * btc [<cpu>] The current process on one cpu,
+ * default is all cpus
+ *
+ * bt <address-expression> refers to a address on the stack, that location
+ * is assumed to contain a return address.
+ *
+ * btt <address-expression> refers to the address of a struct task.
+ *
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Outputs:
+ * None.
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ * Locking:
+ * none.
+ * Remarks:
+ * Backtrack works best when the code uses frame pointers. But even
+ * without frame pointers we should get a reasonable trace.
+ *
+ * mds comes in handy when examining the stack to do a manual traceback or
+ * to get a starting point for bt <address-expression>.
+ */
+
+static int
+kdb_bt1(struct task_struct *p, unsigned long mask,
+ int argcount, int btaprompt)
+{
+ char buffer[2];
+ if (kdb_getarea(buffer[0], (unsigned long)p) ||
+ kdb_getarea(buffer[0], (unsigned long)(p+1)-1))
+ return KDB_BADADDR;
+ if (!kdb_task_state(p, mask))
+ return 0;
+ kdb_printf("Stack traceback for pid %d\n", p->pid);
+ kdb_ps1(p);
+ kdb_show_stack(p, NULL);
+ if (btaprompt) {
+ kdb_getstr(buffer, sizeof(buffer),
+ "Enter <q> to end, <cr> to continue:");
+ if (buffer[0] == 'q') {
+ kdb_printf("\n");
+ return 1;
+ }
+ }
+ touch_nmi_watchdog();
+ return 0;
+}
+
+int
+kdb_bt(int argc, const char **argv)
+{
+ int diag;
+ int argcount = 5;
+ int btaprompt = 1;
+ int nextarg;
+ unsigned long addr;
+ long offset;
+
+ kdbgetintenv("BTARGS", &argcount); /* Arguments to print */
+ kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each
+ * proc in bta */
+
+ if (strcmp(argv[0], "bta") == 0) {
+ struct task_struct *g, *p;
+ unsigned long cpu;
+ unsigned long mask = kdb_task_state_string(argc ? argv[1] :
+ NULL);
+ if (argc == 0)
+ kdb_ps_suppressed();
+ /* Run the active tasks first */
+ for_each_online_cpu(cpu) {
+ p = kdb_curr_task(cpu);
+ if (kdb_bt1(p, mask, argcount, btaprompt))
+ return 0;
+ }
+ /* Now the inactive tasks */
+ kdb_do_each_thread(g, p) {
+ if (task_curr(p))
+ continue;
+ if (kdb_bt1(p, mask, argcount, btaprompt))
+ return 0;
+ } kdb_while_each_thread(g, p);
+ } else if (strcmp(argv[0], "btp") == 0) {
+ struct task_struct *p;
+ unsigned long pid;
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+ diag = kdbgetularg((char *)argv[1], &pid);
+ if (diag)
+ return diag;
+ p = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (p) {
+ kdb_set_current_task(p);
+ return kdb_bt1(p, ~0UL, argcount, 0);
+ }
+ kdb_printf("No process with pid == %ld found\n", pid);
+ return 0;
+ } else if (strcmp(argv[0], "btt") == 0) {
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+ diag = kdbgetularg((char *)argv[1], &addr);
+ if (diag)
+ return diag;
+ kdb_set_current_task((struct task_struct *)addr);
+ return kdb_bt1((struct task_struct *)addr, ~0UL, argcount, 0);
+ } else if (strcmp(argv[0], "btc") == 0) {
+ unsigned long cpu = ~0;
+ struct task_struct *save_current_task = kdb_current_task;
+ char buf[80];
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+ if (argc == 1) {
+ diag = kdbgetularg((char *)argv[1], &cpu);
+ if (diag)
+ return diag;
+ }
+ /* Recursive use of kdb_parse, do not use argv after
+ * this point */
+ argv = NULL;
+ if (cpu != ~0) {
+ if (cpu >= num_possible_cpus() || !cpu_online(cpu)) {
+ kdb_printf("no process for cpu %ld\n", cpu);
+ return 0;
+ }
+ sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+ kdb_parse(buf);
+ return 0;
+ }
+ kdb_printf("btc: cpu status: ");
+ kdb_parse("cpu\n");
+ for_each_online_cpu(cpu) {
+ sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+ kdb_parse(buf);
+ touch_nmi_watchdog();
+ }
+ kdb_set_current_task(save_current_task);
+ return 0;
+ } else {
+ if (argc) {
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr,
+ &offset, NULL);
+ if (diag)
+ return diag;
+ kdb_show_stack(kdb_current_task, (void *)addr);
+ return 0;
+ } else {
+ return kdb_bt1(kdb_current_task, ~0UL, argcount, 0);
+ }
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds
new file mode 100644
index 00000000000..56c88e4db30
--- /dev/null
+++ b/kernel/debug/kdb/kdb_cmds
@@ -0,0 +1,35 @@
+# Initial commands for kdb, alter to suit your needs.
+# These commands are executed in kdb_init() context, no SMP, no
+# processes. Commands that require process data (including stack or
+# registers) are not reliable this early. set and bp commands should
+# be safe. Global breakpoint commands affect each cpu as it is booted.
+
+# Standard debugging information for first level support, just type archkdb
+# or archkdbcpu or archkdbshort at the kdb prompt.
+
+defcmd dumpcommon "" "Common kdb debugging"
+ set BTAPROMPT 0
+ set LINES 10000
+ -summary
+ -cpu
+ -ps
+ -dmesg 600
+ -bt
+endefcmd
+
+defcmd dumpall "" "First line debugging"
+ set BTSYMARG 1
+ set BTARGS 9
+ pid R
+ -dumpcommon
+ -bta
+endefcmd
+
+defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
+ set BTSYMARG 1
+ set BTARGS 9
+ pid R
+ -dumpcommon
+ -btc
+endefcmd
+
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
new file mode 100644
index 00000000000..bf6e8270e95
--- /dev/null
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -0,0 +1,169 @@
+/*
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/kdebug.h>
+#include "kdb_private.h"
+#include "../debug_core.h"
+
+/*
+ * KDB interface to KGDB internals
+ */
+get_char_func kdb_poll_funcs[] = {
+ dbg_io_get_char,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+};
+EXPORT_SYMBOL_GPL(kdb_poll_funcs);
+
+int kdb_poll_idx = 1;
+EXPORT_SYMBOL_GPL(kdb_poll_idx);
+
+int kdb_stub(struct kgdb_state *ks)
+{
+ int error = 0;
+ kdb_bp_t *bp;
+ unsigned long addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
+ kdb_reason_t reason = KDB_REASON_OOPS;
+ kdb_dbtrap_t db_result = KDB_DB_NOBPT;
+ int i;
+
+ if (KDB_STATE(REENTRY)) {
+ reason = KDB_REASON_SWITCH;
+ KDB_STATE_CLEAR(REENTRY);
+ addr = instruction_pointer(ks->linux_regs);
+ }
+ ks->pass_exception = 0;
+ if (atomic_read(&kgdb_setting_breakpoint))
+ reason = KDB_REASON_KEYBOARD;
+
+ for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
+ if ((bp->bp_enabled) && (bp->bp_addr == addr)) {
+ reason = KDB_REASON_BREAK;
+ db_result = KDB_DB_BPT;
+ if (addr != instruction_pointer(ks->linux_regs))
+ kgdb_arch_set_pc(ks->linux_regs, addr);
+ break;
+ }
+ }
+ if (reason == KDB_REASON_BREAK || reason == KDB_REASON_SWITCH) {
+ for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
+ if (bp->bp_free)
+ continue;
+ if (bp->bp_addr == addr) {
+ bp->bp_delay = 1;
+ bp->bp_delayed = 1;
+ /*
+ * SSBPT is set when the kernel debugger must single step a
+ * task in order to re-establish an instruction breakpoint
+ * which uses the instruction replacement mechanism. It is
+ * cleared by any action that removes the need to single-step
+ * the breakpoint.
+ */
+ reason = KDB_REASON_BREAK;
+ db_result = KDB_DB_BPT;
+ KDB_STATE_SET(SSBPT);
+ break;
+ }
+ }
+ }
+
+ if (reason != KDB_REASON_BREAK && ks->ex_vector == 0 &&
+ ks->signo == SIGTRAP) {
+ reason = KDB_REASON_SSTEP;
+ db_result = KDB_DB_BPT;
+ }
+ /* Set initial kdb state variables */
+ KDB_STATE_CLEAR(KGDB_TRANS);
+ kdb_initial_cpu = ks->cpu;
+ kdb_current_task = kgdb_info[ks->cpu].task;
+ kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo;
+ /* Remove any breakpoints as needed by kdb and clear single step */
+ kdb_bp_remove();
+ KDB_STATE_CLEAR(DOING_SS);
+ KDB_STATE_CLEAR(DOING_SSB);
+ KDB_STATE_SET(PAGER);
+ /* zero out any offline cpu data */
+ for_each_present_cpu(i) {
+ if (!cpu_online(i)) {
+ kgdb_info[i].debuggerinfo = NULL;
+ kgdb_info[i].task = NULL;
+ }
+ }
+ if (ks->err_code == DIE_OOPS || reason == KDB_REASON_OOPS) {
+ ks->pass_exception = 1;
+ KDB_FLAG_SET(CATASTROPHIC);
+ }
+ kdb_initial_cpu = ks->cpu;
+ if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) {
+ KDB_STATE_CLEAR(SSBPT);
+ KDB_STATE_CLEAR(DOING_SS);
+ } else {
+ /* Start kdb main loop */
+ error = kdb_main_loop(KDB_REASON_ENTER, reason,
+ ks->err_code, db_result, ks->linux_regs);
+ }
+ /*
+ * Upon exit from the kdb main loop setup break points and restart
+ * the system based on the requested continue state
+ */
+ kdb_initial_cpu = -1;
+ kdb_current_task = NULL;
+ kdb_current_regs = NULL;
+ KDB_STATE_CLEAR(PAGER);
+ kdbnearsym_cleanup();
+ if (error == KDB_CMD_KGDB) {
+ if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) {
+ /*
+ * This inteface glue which allows kdb to transition in into
+ * the gdb stub. In order to do this the '?' or '' gdb serial
+ * packet response is processed here. And then control is
+ * passed to the gdbstub.
+ */
+ if (KDB_STATE(DOING_KGDB))
+ gdbstub_state(ks, "?");
+ else
+ gdbstub_state(ks, "");
+ KDB_STATE_CLEAR(DOING_KGDB);
+ KDB_STATE_CLEAR(DOING_KGDB2);
+ }
+ return DBG_PASS_EVENT;
+ }
+ kdb_bp_install(ks->linux_regs);
+ dbg_activate_sw_breakpoints();
+ /* Set the exit state to a single step or a continue */
+ if (KDB_STATE(DOING_SS))
+ gdbstub_state(ks, "s");
+ else
+ gdbstub_state(ks, "c");
+
+ KDB_FLAG_CLEAR(CATASTROPHIC);
+
+ /* Invoke arch specific exception handling prior to system resume */
+ kgdb_info[ks->cpu].ret_state = gdbstub_state(ks, "e");
+ if (ks->pass_exception)
+ kgdb_info[ks->cpu].ret_state = 1;
+ if (error == KDB_CMD_CPU) {
+ KDB_STATE_SET(REENTRY);
+ /*
+ * Force clear the single step bit because kdb emulates this
+ * differently vs the gdbstub
+ */
+ kgdb_single_step = 0;
+ dbg_deactivate_sw_breakpoints();
+ return DBG_SWITCH_CPU_EVENT;
+ }
+ return kgdb_info[ks->cpu].ret_state;
+}
+
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
new file mode 100644
index 00000000000..c9b7f4f90bb
--- /dev/null
+++ b/kernel/debug/kdb/kdb_io.c
@@ -0,0 +1,826 @@
+/*
+ * Kernel Debugger Architecture Independent Console I/O handler
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/kallsyms.h>
+#include "kdb_private.h"
+
+#define CMD_BUFLEN 256
+char kdb_prompt_str[CMD_BUFLEN];
+
+int kdb_trap_printk;
+
+static void kgdb_transition_check(char *buffer)
+{
+ int slen = strlen(buffer);
+ if (strncmp(buffer, "$?#3f", slen) != 0 &&
+ strncmp(buffer, "$qSupported#37", slen) != 0 &&
+ strncmp(buffer, "+$qSupported#37", slen) != 0) {
+ KDB_STATE_SET(KGDB_TRANS);
+ kdb_printf("%s", buffer);
+ }
+}
+
+static int kdb_read_get_key(char *buffer, size_t bufsize)
+{
+#define ESCAPE_UDELAY 1000
+#define ESCAPE_DELAY (2*1000000/ESCAPE_UDELAY) /* 2 seconds worth of udelays */
+ char escape_data[5]; /* longest vt100 escape sequence is 4 bytes */
+ char *ped = escape_data;
+ int escape_delay = 0;
+ get_char_func *f, *f_escape = NULL;
+ int key;
+
+ for (f = &kdb_poll_funcs[0]; ; ++f) {
+ if (*f == NULL) {
+ /* Reset NMI watchdog once per poll loop */
+ touch_nmi_watchdog();
+ f = &kdb_poll_funcs[0];
+ }
+ if (escape_delay == 2) {
+ *ped = '\0';
+ ped = escape_data;
+ --escape_delay;
+ }
+ if (escape_delay == 1) {
+ key = *ped++;
+ if (!*ped)
+ --escape_delay;
+ break;
+ }
+ key = (*f)();
+ if (key == -1) {
+ if (escape_delay) {
+ udelay(ESCAPE_UDELAY);
+ --escape_delay;
+ }
+ continue;
+ }
+ if (bufsize <= 2) {
+ if (key == '\r')
+ key = '\n';
+ *buffer++ = key;
+ *buffer = '\0';
+ return -1;
+ }
+ if (escape_delay == 0 && key == '\e') {
+ escape_delay = ESCAPE_DELAY;
+ ped = escape_data;
+ f_escape = f;
+ }
+ if (escape_delay) {
+ *ped++ = key;
+ if (f_escape != f) {
+ escape_delay = 2;
+ continue;
+ }
+ if (ped - escape_data == 1) {
+ /* \e */
+ continue;
+ } else if (ped - escape_data == 2) {
+ /* \e<something> */
+ if (key != '[')
+ escape_delay = 2;
+ continue;
+ } else if (ped - escape_data == 3) {
+ /* \e[<something> */
+ int mapkey = 0;
+ switch (key) {
+ case 'A': /* \e[A, up arrow */
+ mapkey = 16;
+ break;
+ case 'B': /* \e[B, down arrow */
+ mapkey = 14;
+ break;
+ case 'C': /* \e[C, right arrow */
+ mapkey = 6;
+ break;
+ case 'D': /* \e[D, left arrow */
+ mapkey = 2;
+ break;
+ case '1': /* dropthrough */
+ case '3': /* dropthrough */
+ /* \e[<1,3,4>], may be home, del, end */
+ case '4':
+ mapkey = -1;
+ break;
+ }
+ if (mapkey != -1) {
+ if (mapkey > 0) {
+ escape_data[0] = mapkey;
+ escape_data[1] = '\0';
+ }
+ escape_delay = 2;
+ }
+ continue;
+ } else if (ped - escape_data == 4) {
+ /* \e[<1,3,4><something> */
+ int mapkey = 0;
+ if (key == '~') {
+ switch (escape_data[2]) {
+ case '1': /* \e[1~, home */
+ mapkey = 1;
+ break;
+ case '3': /* \e[3~, del */
+ mapkey = 4;
+ break;
+ case '4': /* \e[4~, end */
+ mapkey = 5;
+ break;
+ }
+ }
+ if (mapkey > 0) {
+ escape_data[0] = mapkey;
+ escape_data[1] = '\0';
+ }
+ escape_delay = 2;
+ continue;
+ }
+ }
+ break; /* A key to process */
+ }
+ return key;
+}
+
+/*
+ * kdb_read
+ *
+ * This function reads a string of characters, terminated by
+ * a newline, or by reaching the end of the supplied buffer,
+ * from the current kernel debugger console device.
+ * Parameters:
+ * buffer - Address of character buffer to receive input characters.
+ * bufsize - size, in bytes, of the character buffer
+ * Returns:
+ * Returns a pointer to the buffer containing the received
+ * character string. This string will be terminated by a
+ * newline character.
+ * Locking:
+ * No locks are required to be held upon entry to this
+ * function. It is not reentrant - it relies on the fact
+ * that while kdb is running on only one "master debug" cpu.
+ * Remarks:
+ *
+ * The buffer size must be >= 2. A buffer size of 2 means that the caller only
+ * wants a single key.
+ *
+ * An escape key could be the start of a vt100 control sequence such as \e[D
+ * (left arrow) or it could be a character in its own right. The standard
+ * method for detecting the difference is to wait for 2 seconds to see if there
+ * are any other characters. kdb is complicated by the lack of a timer service
+ * (interrupts are off), by multiple input sources and by the need to sometimes
+ * return after just one key. Escape sequence processing has to be done as
+ * states in the polling loop.
+ */
+
+static char *kdb_read(char *buffer, size_t bufsize)
+{
+ char *cp = buffer;
+ char *bufend = buffer+bufsize-2; /* Reserve space for newline
+ * and null byte */
+ char *lastchar;
+ char *p_tmp;
+ char tmp;
+ static char tmpbuffer[CMD_BUFLEN];
+ int len = strlen(buffer);
+ int len_tmp;
+ int tab = 0;
+ int count;
+ int i;
+ int diag, dtab_count;
+ int key;
+
+
+ diag = kdbgetintenv("DTABCOUNT", &dtab_count);
+ if (diag)
+ dtab_count = 30;
+
+ if (len > 0) {
+ cp += len;
+ if (*(buffer+len-1) == '\n')
+ cp--;
+ }
+
+ lastchar = cp;
+ *cp = '\0';
+ kdb_printf("%s", buffer);
+poll_again:
+ key = kdb_read_get_key(buffer, bufsize);
+ if (key == -1)
+ return buffer;
+ if (key != 9)
+ tab = 0;
+ switch (key) {
+ case 8: /* backspace */
+ if (cp > buffer) {
+ if (cp < lastchar) {
+ memcpy(tmpbuffer, cp, lastchar - cp);
+ memcpy(cp-1, tmpbuffer, lastchar - cp);
+ }
+ *(--lastchar) = '\0';
+ --cp;
+ kdb_printf("\b%s \r", cp);
+ tmp = *cp;
+ *cp = '\0';
+ kdb_printf(kdb_prompt_str);
+ kdb_printf("%s", buffer);
+ *cp = tmp;
+ }
+ break;
+ case 13: /* enter */
+ *lastchar++ = '\n';
+ *lastchar++ = '\0';
+ kdb_printf("\n");
+ return buffer;
+ case 4: /* Del */
+ if (cp < lastchar) {
+ memcpy(tmpbuffer, cp+1, lastchar - cp - 1);
+ memcpy(cp, tmpbuffer, lastchar - cp - 1);
+ *(--lastchar) = '\0';
+ kdb_printf("%s \r", cp);
+ tmp = *cp;
+ *cp = '\0';
+ kdb_printf(kdb_prompt_str);
+ kdb_printf("%s", buffer);
+ *cp = tmp;
+ }
+ break;
+ case 1: /* Home */
+ if (cp > buffer) {
+ kdb_printf("\r");
+ kdb_printf(kdb_prompt_str);
+ cp = buffer;
+ }
+ break;
+ case 5: /* End */
+ if (cp < lastchar) {
+ kdb_printf("%s", cp);
+ cp = lastchar;
+ }
+ break;
+ case 2: /* Left */
+ if (cp > buffer) {
+ kdb_printf("\b");
+ --cp;
+ }
+ break;
+ case 14: /* Down */
+ memset(tmpbuffer, ' ',
+ strlen(kdb_prompt_str) + (lastchar-buffer));
+ *(tmpbuffer+strlen(kdb_prompt_str) +
+ (lastchar-buffer)) = '\0';
+ kdb_printf("\r%s\r", tmpbuffer);
+ *lastchar = (char)key;
+ *(lastchar+1) = '\0';
+ return lastchar;
+ case 6: /* Right */
+ if (cp < lastchar) {
+ kdb_printf("%c", *cp);
+ ++cp;
+ }
+ break;
+ case 16: /* Up */
+ memset(tmpbuffer, ' ',
+ strlen(kdb_prompt_str) + (lastchar-buffer));
+ *(tmpbuffer+strlen(kdb_prompt_str) +
+ (lastchar-buffer)) = '\0';
+ kdb_printf("\r%s\r", tmpbuffer);
+ *lastchar = (char)key;
+ *(lastchar+1) = '\0';
+ return lastchar;
+ case 9: /* Tab */
+ if (tab < 2)
+ ++tab;
+ p_tmp = buffer;
+ while (*p_tmp == ' ')
+ p_tmp++;
+ if (p_tmp > cp)
+ break;
+ memcpy(tmpbuffer, p_tmp, cp-p_tmp);
+ *(tmpbuffer + (cp-p_tmp)) = '\0';
+ p_tmp = strrchr(tmpbuffer, ' ');
+ if (p_tmp)
+ ++p_tmp;
+ else
+ p_tmp = tmpbuffer;
+ len = strlen(p_tmp);
+ count = kallsyms_symbol_complete(p_tmp,
+ sizeof(tmpbuffer) -
+ (p_tmp - tmpbuffer));
+ if (tab == 2 && count > 0) {
+ kdb_printf("\n%d symbols are found.", count);
+ if (count > dtab_count) {
+ count = dtab_count;
+ kdb_printf(" But only first %d symbols will"
+ " be printed.\nYou can change the"
+ " environment variable DTABCOUNT.",
+ count);
+ }
+ kdb_printf("\n");
+ for (i = 0; i < count; i++) {
+ if (kallsyms_symbol_next(p_tmp, i) < 0)
+ break;
+ kdb_printf("%s ", p_tmp);
+ *(p_tmp + len) = '\0';
+ }
+ if (i >= dtab_count)
+ kdb_printf("...");
+ kdb_printf("\n");
+ kdb_printf(kdb_prompt_str);
+ kdb_printf("%s", buffer);
+ } else if (tab != 2 && count > 0) {
+ len_tmp = strlen(p_tmp);
+ strncpy(p_tmp+len_tmp, cp, lastchar-cp+1);
+ len_tmp = strlen(p_tmp);
+ strncpy(cp, p_tmp+len, len_tmp-len + 1);
+ len = len_tmp - len;
+ kdb_printf("%s", cp);
+ cp += len;
+ lastchar += len;
+ }
+ kdb_nextline = 1; /* reset output line number */
+ break;
+ default:
+ if (key >= 32 && lastchar < bufend) {
+ if (cp < lastchar) {
+ memcpy(tmpbuffer, cp, lastchar - cp);
+ memcpy(cp+1, tmpbuffer, lastchar - cp);
+ *++lastchar = '\0';
+ *cp = key;
+ kdb_printf("%s\r", cp);
+ ++cp;
+ tmp = *cp;
+ *cp = '\0';
+ kdb_printf(kdb_prompt_str);
+ kdb_printf("%s", buffer);
+ *cp = tmp;
+ } else {
+ *++lastchar = '\0';
+ *cp++ = key;
+ /* The kgdb transition check will hide
+ * printed characters if we think that
+ * kgdb is connecting, until the check
+ * fails */
+ if (!KDB_STATE(KGDB_TRANS))
+ kgdb_transition_check(buffer);
+ else
+ kdb_printf("%c", key);
+ }
+ /* Special escape to kgdb */
+ if (lastchar - buffer >= 5 &&
+ strcmp(lastchar - 5, "$?#3f") == 0) {
+ strcpy(buffer, "kgdb");
+ KDB_STATE_SET(DOING_KGDB);
+ return buffer;
+ }
+ if (lastchar - buffer >= 14 &&
+ strcmp(lastchar - 14, "$qSupported#37") == 0) {
+ strcpy(buffer, "kgdb");
+ KDB_STATE_SET(DOING_KGDB2);
+ return buffer;
+ }
+ }
+ break;
+ }
+ goto poll_again;
+}
+
+/*
+ * kdb_getstr
+ *
+ * Print the prompt string and read a command from the
+ * input device.
+ *
+ * Parameters:
+ * buffer Address of buffer to receive command
+ * bufsize Size of buffer in bytes
+ * prompt Pointer to string to use as prompt string
+ * Returns:
+ * Pointer to command buffer.
+ * Locking:
+ * None.
+ * Remarks:
+ * For SMP kernels, the processor number will be
+ * substituted for %d, %x or %o in the prompt.
+ */
+
+char *kdb_getstr(char *buffer, size_t bufsize, char *prompt)
+{
+ if (prompt && kdb_prompt_str != prompt)
+ strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
+ kdb_printf(kdb_prompt_str);
+ kdb_nextline = 1; /* Prompt and input resets line number */
+ return kdb_read(buffer, bufsize);
+}
+
+/*
+ * kdb_input_flush
+ *
+ * Get rid of any buffered console input.
+ *
+ * Parameters:
+ * none
+ * Returns:
+ * nothing
+ * Locking:
+ * none
+ * Remarks:
+ * Call this function whenever you want to flush input. If there is any
+ * outstanding input, it ignores all characters until there has been no
+ * data for approximately 1ms.
+ */
+
+static void kdb_input_flush(void)
+{
+ get_char_func *f;
+ int res;
+ int flush_delay = 1;
+ while (flush_delay) {
+ flush_delay--;
+empty:
+ touch_nmi_watchdog();
+ for (f = &kdb_poll_funcs[0]; *f; ++f) {
+ res = (*f)();
+ if (res != -1) {
+ flush_delay = 1;
+ goto empty;
+ }
+ }
+ if (flush_delay)
+ mdelay(1);
+ }
+}
+
+/*
+ * kdb_printf
+ *
+ * Print a string to the output device(s).
+ *
+ * Parameters:
+ * printf-like format and optional args.
+ * Returns:
+ * 0
+ * Locking:
+ * None.
+ * Remarks:
+ * use 'kdbcons->write()' to avoid polluting 'log_buf' with
+ * kdb output.
+ *
+ * If the user is doing a cmd args | grep srch
+ * then kdb_grepping_flag is set.
+ * In that case we need to accumulate full lines (ending in \n) before
+ * searching for the pattern.
+ */
+
+static char kdb_buffer[256]; /* A bit too big to go on stack */
+static char *next_avail = kdb_buffer;
+static int size_avail;
+static int suspend_grep;
+
+/*
+ * search arg1 to see if it contains arg2
+ * (kdmain.c provides flags for ^pat and pat$)
+ *
+ * return 1 for found, 0 for not found
+ */
+static int kdb_search_string(char *searched, char *searchfor)
+{
+ char firstchar, *cp;
+ int len1, len2;
+
+ /* not counting the newline at the end of "searched" */
+ len1 = strlen(searched)-1;
+ len2 = strlen(searchfor);
+ if (len1 < len2)
+ return 0;
+ if (kdb_grep_leading && kdb_grep_trailing && len1 != len2)
+ return 0;
+ if (kdb_grep_leading) {
+ if (!strncmp(searched, searchfor, len2))
+ return 1;
+ } else if (kdb_grep_trailing) {
+ if (!strncmp(searched+len1-len2, searchfor, len2))
+ return 1;
+ } else {
+ firstchar = *searchfor;
+ cp = searched;
+ while ((cp = strchr(cp, firstchar))) {
+ if (!strncmp(cp, searchfor, len2))
+ return 1;
+ cp++;
+ }
+ }
+ return 0;
+}
+
+int vkdb_printf(const char *fmt, va_list ap)
+{
+ int diag;
+ int linecount;
+ int logging, saved_loglevel = 0;
+ int saved_trap_printk;
+ int got_printf_lock = 0;
+ int retlen = 0;
+ int fnd, len;
+ char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
+ char *moreprompt = "more> ";
+ struct console *c = console_drivers;
+ static DEFINE_SPINLOCK(kdb_printf_lock);
+ unsigned long uninitialized_var(flags);
+
+ preempt_disable();
+ saved_trap_printk = kdb_trap_printk;
+ kdb_trap_printk = 0;
+
+ /* Serialize kdb_printf if multiple cpus try to write at once.
+ * But if any cpu goes recursive in kdb, just print the output,
+ * even if it is interleaved with any other text.
+ */
+ if (!KDB_STATE(PRINTF_LOCK)) {
+ KDB_STATE_SET(PRINTF_LOCK);
+ spin_lock_irqsave(&kdb_printf_lock, flags);
+ got_printf_lock = 1;
+ atomic_inc(&kdb_event);
+ } else {
+ __acquire(kdb_printf_lock);
+ }
+
+ diag = kdbgetintenv("LINES", &linecount);
+ if (diag || linecount <= 1)
+ linecount = 24;
+
+ diag = kdbgetintenv("LOGGING", &logging);
+ if (diag)
+ logging = 0;
+
+ if (!kdb_grepping_flag || suspend_grep) {
+ /* normally, every vsnprintf starts a new buffer */
+ next_avail = kdb_buffer;
+ size_avail = sizeof(kdb_buffer);
+ }
+ vsnprintf(next_avail, size_avail, fmt, ap);
+
+ /*
+ * If kdb_parse() found that the command was cmd xxx | grep yyy
+ * then kdb_grepping_flag is set, and kdb_grep_string contains yyy
+ *
+ * Accumulate the print data up to a newline before searching it.
+ * (vsnprintf does null-terminate the string that it generates)
+ */
+
+ /* skip the search if prints are temporarily unconditional */
+ if (!suspend_grep && kdb_grepping_flag) {
+ cp = strchr(kdb_buffer, '\n');
+ if (!cp) {
+ /*
+ * Special cases that don't end with newlines
+ * but should be written without one:
+ * The "[nn]kdb> " prompt should
+ * appear at the front of the buffer.
+ *
+ * The "[nn]more " prompt should also be
+ * (MOREPROMPT -> moreprompt)
+ * written * but we print that ourselves,
+ * we set the suspend_grep flag to make
+ * it unconditional.
+ *
+ */
+ if (next_avail == kdb_buffer) {
+ /*
+ * these should occur after a newline,
+ * so they will be at the front of the
+ * buffer
+ */
+ cp2 = kdb_buffer;
+ len = strlen(kdb_prompt_str);
+ if (!strncmp(cp2, kdb_prompt_str, len)) {
+ /*
+ * We're about to start a new
+ * command, so we can go back
+ * to normal mode.
+ */
+ kdb_grepping_flag = 0;
+ goto kdb_printit;
+ }
+ }
+ /* no newline; don't search/write the buffer
+ until one is there */
+ len = strlen(kdb_buffer);
+ next_avail = kdb_buffer + len;
+ size_avail = sizeof(kdb_buffer) - len;
+ goto kdb_print_out;
+ }
+
+ /*
+ * The newline is present; print through it or discard
+ * it, depending on the results of the search.
+ */
+ cp++; /* to byte after the newline */
+ replaced_byte = *cp; /* remember what/where it was */
+ cphold = cp;
+ *cp = '\0'; /* end the string for our search */
+
+ /*
+ * We now have a newline at the end of the string
+ * Only continue with this output if it contains the
+ * search string.
+ */
+ fnd = kdb_search_string(kdb_buffer, kdb_grep_string);
+ if (!fnd) {
+ /*
+ * At this point the complete line at the start
+ * of kdb_buffer can be discarded, as it does
+ * not contain what the user is looking for.
+ * Shift the buffer left.
+ */
+ *cphold = replaced_byte;
+ strcpy(kdb_buffer, cphold);
+ len = strlen(kdb_buffer);
+ next_avail = kdb_buffer + len;
+ size_avail = sizeof(kdb_buffer) - len;
+ goto kdb_print_out;
+ }
+ /*
+ * at this point the string is a full line and
+ * should be printed, up to the null.
+ */
+ }
+kdb_printit:
+
+ /*
+ * Write to all consoles.
+ */
+ retlen = strlen(kdb_buffer);
+ if (!dbg_kdb_mode && kgdb_connected) {
+ gdbstub_msg_write(kdb_buffer, retlen);
+ } else {
+ if (!dbg_io_ops->is_console) {
+ len = strlen(kdb_buffer);
+ cp = kdb_buffer;
+ while (len--) {
+ dbg_io_ops->write_char(*cp);
+ cp++;
+ }
+ }
+ while (c) {
+ c->write(c, kdb_buffer, retlen);
+ touch_nmi_watchdog();
+ c = c->next;
+ }
+ }
+ if (logging) {
+ saved_loglevel = console_loglevel;
+ console_loglevel = 0;
+ printk(KERN_INFO "%s", kdb_buffer);
+ }
+
+ if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n'))
+ kdb_nextline++;
+
+ /* check for having reached the LINES number of printed lines */
+ if (kdb_nextline == linecount) {
+ char buf1[16] = "";
+#if defined(CONFIG_SMP)
+ char buf2[32];
+#endif
+
+ /* Watch out for recursion here. Any routine that calls
+ * kdb_printf will come back through here. And kdb_read
+ * uses kdb_printf to echo on serial consoles ...
+ */
+ kdb_nextline = 1; /* In case of recursion */
+
+ /*
+ * Pause until cr.
+ */
+ moreprompt = kdbgetenv("MOREPROMPT");
+ if (moreprompt == NULL)
+ moreprompt = "more> ";
+
+#if defined(CONFIG_SMP)
+ if (strchr(moreprompt, '%')) {
+ sprintf(buf2, moreprompt, get_cpu());
+ put_cpu();
+ moreprompt = buf2;
+ }
+#endif
+
+ kdb_input_flush();
+ c = console_drivers;
+
+ if (!dbg_io_ops->is_console) {
+ len = strlen(moreprompt);
+ cp = moreprompt;
+ while (len--) {
+ dbg_io_ops->write_char(*cp);
+ cp++;
+ }
+ }
+ while (c) {
+ c->write(c, moreprompt, strlen(moreprompt));
+ touch_nmi_watchdog();
+ c = c->next;
+ }
+
+ if (logging)
+ printk("%s", moreprompt);
+
+ kdb_read(buf1, 2); /* '2' indicates to return
+ * immediately after getting one key. */
+ kdb_nextline = 1; /* Really set output line 1 */
+
+ /* empty and reset the buffer: */
+ kdb_buffer[0] = '\0';
+ next_avail = kdb_buffer;
+ size_avail = sizeof(kdb_buffer);
+ if ((buf1[0] == 'q') || (buf1[0] == 'Q')) {
+ /* user hit q or Q */
+ KDB_FLAG_SET(CMD_INTERRUPT); /* command interrupted */
+ KDB_STATE_CLEAR(PAGER);
+ /* end of command output; back to normal mode */
+ kdb_grepping_flag = 0;
+ kdb_printf("\n");
+ } else if (buf1[0] == ' ') {
+ kdb_printf("\n");
+ suspend_grep = 1; /* for this recursion */
+ } else if (buf1[0] == '\n') {
+ kdb_nextline = linecount - 1;
+ kdb_printf("\r");
+ suspend_grep = 1; /* for this recursion */
+ } else if (buf1[0] && buf1[0] != '\n') {
+ /* user hit something other than enter */
+ suspend_grep = 1; /* for this recursion */
+ kdb_printf("\nOnly 'q' or 'Q' are processed at more "
+ "prompt, input ignored\n");
+ } else if (kdb_grepping_flag) {
+ /* user hit enter */
+ suspend_grep = 1; /* for this recursion */
+ kdb_printf("\n");
+ }
+ kdb_input_flush();
+ }
+
+ /*
+ * For grep searches, shift the printed string left.
+ * replaced_byte contains the character that was overwritten with
+ * the terminating null, and cphold points to the null.
+ * Then adjust the notion of available space in the buffer.
+ */
+ if (kdb_grepping_flag && !suspend_grep) {
+ *cphold = replaced_byte;
+ strcpy(kdb_buffer, cphold);
+ len = strlen(kdb_buffer);
+ next_avail = kdb_buffer + len;
+ size_avail = sizeof(kdb_buffer) - len;
+ }
+
+kdb_print_out:
+ suspend_grep = 0; /* end of what may have been a recursive call */
+ if (logging)
+ console_loglevel = saved_loglevel;
+ if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) {
+ got_printf_lock = 0;
+ spin_unlock_irqrestore(&kdb_printf_lock, flags);
+ KDB_STATE_CLEAR(PRINTF_LOCK);
+ atomic_dec(&kdb_event);
+ } else {
+ __release(kdb_printf_lock);
+ }
+ kdb_trap_printk = saved_trap_printk;
+ preempt_enable();
+ return retlen;
+}
+
+int kdb_printf(const char *fmt, ...)
+{
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = vkdb_printf(fmt, ap);
+ va_end(ap);
+
+ return r;
+}
+
diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c
new file mode 100644
index 00000000000..4bca634975c
--- /dev/null
+++ b/kernel/debug/kdb/kdb_keyboard.c
@@ -0,0 +1,212 @@
+/*
+ * Kernel Debugger Architecture Dependent Console I/O handler
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ */
+
+#include <linux/kdb.h>
+#include <linux/keyboard.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/io.h>
+
+/* Keyboard Controller Registers on normal PCs. */
+
+#define KBD_STATUS_REG 0x64 /* Status register (R) */
+#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */
+
+/* Status Register Bits */
+
+#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */
+#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
+
+static int kbd_exists;
+
+/*
+ * Check if the keyboard controller has a keypress for us.
+ * Some parts (Enter Release, LED change) are still blocking polled here,
+ * but hopefully they are all short.
+ */
+int kdb_get_kbd_char(void)
+{
+ int scancode, scanstatus;
+ static int shift_lock; /* CAPS LOCK state (0-off, 1-on) */
+ static int shift_key; /* Shift next keypress */
+ static int ctrl_key;
+ u_short keychar;
+
+ if (KDB_FLAG(NO_I8042) || KDB_FLAG(NO_VT_CONSOLE) ||
+ (inb(KBD_STATUS_REG) == 0xff && inb(KBD_DATA_REG) == 0xff)) {
+ kbd_exists = 0;
+ return -1;
+ }
+ kbd_exists = 1;
+
+ if ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
+ return -1;
+
+ /*
+ * Fetch the scancode
+ */
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+
+ /*
+ * Ignore mouse events.
+ */
+ if (scanstatus & KBD_STAT_MOUSE_OBF)
+ return -1;
+
+ /*
+ * Ignore release, trigger on make
+ * (except for shift keys, where we want to
+ * keep the shift state so long as the key is
+ * held down).
+ */
+
+ if (((scancode&0x7f) == 0x2a) || ((scancode&0x7f) == 0x36)) {
+ /*
+ * Next key may use shift table
+ */
+ if ((scancode & 0x80) == 0)
+ shift_key = 1;
+ else
+ shift_key = 0;
+ return -1;
+ }
+
+ if ((scancode&0x7f) == 0x1d) {
+ /*
+ * Left ctrl key
+ */
+ if ((scancode & 0x80) == 0)
+ ctrl_key = 1;
+ else
+ ctrl_key = 0;
+ return -1;
+ }
+
+ if ((scancode & 0x80) != 0)
+ return -1;
+
+ scancode &= 0x7f;
+
+ /*
+ * Translate scancode
+ */
+
+ if (scancode == 0x3a) {
+ /*
+ * Toggle caps lock
+ */
+ shift_lock ^= 1;
+
+#ifdef KDB_BLINK_LED
+ kdb_toggleled(0x4);
+#endif
+ return -1;
+ }
+
+ if (scancode == 0x0e) {
+ /*
+ * Backspace
+ */
+ return 8;
+ }
+
+ /* Special Key */
+ switch (scancode) {
+ case 0xF: /* Tab */
+ return 9;
+ case 0x53: /* Del */
+ return 4;
+ case 0x47: /* Home */
+ return 1;
+ case 0x4F: /* End */
+ return 5;
+ case 0x4B: /* Left */
+ return 2;
+ case 0x48: /* Up */
+ return 16;
+ case 0x50: /* Down */
+ return 14;
+ case 0x4D: /* Right */
+ return 6;
+ }
+
+ if (scancode == 0xe0)
+ return -1;
+
+ /*
+ * For Japanese 86/106 keyboards
+ * See comment in drivers/char/pc_keyb.c.
+ * - Masahiro Adegawa
+ */
+ if (scancode == 0x73)
+ scancode = 0x59;
+ else if (scancode == 0x7d)
+ scancode = 0x7c;
+
+ if (!shift_lock && !shift_key && !ctrl_key) {
+ keychar = plain_map[scancode];
+ } else if ((shift_lock || shift_key) && key_maps[1]) {
+ keychar = key_maps[1][scancode];
+ } else if (ctrl_key && key_maps[4]) {
+ keychar = key_maps[4][scancode];
+ } else {
+ keychar = 0x0020;
+ kdb_printf("Unknown state/scancode (%d)\n", scancode);
+ }
+ keychar &= 0x0fff;
+ if (keychar == '\t')
+ keychar = ' ';
+ switch (KTYP(keychar)) {
+ case KT_LETTER:
+ case KT_LATIN:
+ if (isprint(keychar))
+ break; /* printable characters */
+ /* drop through */
+ case KT_SPEC:
+ if (keychar == K_ENTER)
+ break;
+ /* drop through */
+ default:
+ return -1; /* ignore unprintables */
+ }
+
+ if ((scancode & 0x7f) == 0x1c) {
+ /*
+ * enter key. All done. Absorb the release scancode.
+ */
+ while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
+ ;
+
+ /*
+ * Fetch the scancode
+ */
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+
+ while (scanstatus & KBD_STAT_MOUSE_OBF) {
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+ }
+
+ if (scancode != 0x9c) {
+ /*
+ * Wasn't an enter-release, why not?
+ */
+ kdb_printf("kdb: expected enter got 0x%x status 0x%x\n",
+ scancode, scanstatus);
+ }
+
+ return 13;
+ }
+
+ return keychar & 0xff;
+}
+EXPORT_SYMBOL_GPL(kdb_get_kbd_char);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
new file mode 100644
index 00000000000..b724c791b6d
--- /dev/null
+++ b/kernel/debug/kdb/kdb_main.c
@@ -0,0 +1,2849 @@
+/*
+ * Kernel Debugger Architecture Independent Main Code
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Xscale (R) modifications copyright (C) 2003 Intel Corporation.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sysrq.h>
+#include <linux/smp.h>
+#include <linux/utsname.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nmi.h>
+#include <linux/time.h>
+#include <linux/ptrace.h>
+#include <linux/sysctl.h>
+#include <linux/cpu.h>
+#include <linux/kdebug.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include "kdb_private.h"
+
+#define GREP_LEN 256
+char kdb_grep_string[GREP_LEN];
+int kdb_grepping_flag;
+EXPORT_SYMBOL(kdb_grepping_flag);
+int kdb_grep_leading;
+int kdb_grep_trailing;
+
+/*
+ * Kernel debugger state flags
+ */
+int kdb_flags;
+atomic_t kdb_event;
+
+/*
+ * kdb_lock protects updates to kdb_initial_cpu. Used to
+ * single thread processors through the kernel debugger.
+ */
+int kdb_initial_cpu = -1; /* cpu number that owns kdb */
+int kdb_nextline = 1;
+int kdb_state; /* General KDB state */
+
+struct task_struct *kdb_current_task;
+EXPORT_SYMBOL(kdb_current_task);
+struct pt_regs *kdb_current_regs;
+
+const char *kdb_diemsg;
+static int kdb_go_count;
+#ifdef CONFIG_KDB_CONTINUE_CATASTROPHIC
+static unsigned int kdb_continue_catastrophic =
+ CONFIG_KDB_CONTINUE_CATASTROPHIC;
+#else
+static unsigned int kdb_continue_catastrophic;
+#endif
+
+/* kdb_commands describes the available commands. */
+static kdbtab_t *kdb_commands;
+#define KDB_BASE_CMD_MAX 50
+static int kdb_max_commands = KDB_BASE_CMD_MAX;
+static kdbtab_t kdb_base_commands[50];
+#define for_each_kdbcmd(cmd, num) \
+ for ((cmd) = kdb_base_commands, (num) = 0; \
+ num < kdb_max_commands; \
+ num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++)
+
+typedef struct _kdbmsg {
+ int km_diag; /* kdb diagnostic */
+ char *km_msg; /* Corresponding message text */
+} kdbmsg_t;
+
+#define KDBMSG(msgnum, text) \
+ { KDB_##msgnum, text }
+
+static kdbmsg_t kdbmsgs[] = {
+ KDBMSG(NOTFOUND, "Command Not Found"),
+ KDBMSG(ARGCOUNT, "Improper argument count, see usage."),
+ KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, "
+ "8 is only allowed on 64 bit systems"),
+ KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"),
+ KDBMSG(NOTENV, "Cannot find environment variable"),
+ KDBMSG(NOENVVALUE, "Environment variable should have value"),
+ KDBMSG(NOTIMP, "Command not implemented"),
+ KDBMSG(ENVFULL, "Environment full"),
+ KDBMSG(ENVBUFFULL, "Environment buffer full"),
+ KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
+#ifdef CONFIG_CPU_XSCALE
+ KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
+#else
+ KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"),
+#endif
+ KDBMSG(DUPBPT, "Duplicate breakpoint address"),
+ KDBMSG(BPTNOTFOUND, "Breakpoint not found"),
+ KDBMSG(BADMODE, "Invalid IDMODE"),
+ KDBMSG(BADINT, "Illegal numeric value"),
+ KDBMSG(INVADDRFMT, "Invalid symbolic address format"),
+ KDBMSG(BADREG, "Invalid register name"),
+ KDBMSG(BADCPUNUM, "Invalid cpu number"),
+ KDBMSG(BADLENGTH, "Invalid length field"),
+ KDBMSG(NOBP, "No Breakpoint exists"),
+ KDBMSG(BADADDR, "Invalid address"),
+};
+#undef KDBMSG
+
+static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
+
+
+/*
+ * Initial environment. This is all kept static and local to
+ * this file. We don't want to rely on the memory allocation
+ * mechanisms in the kernel, so we use a very limited allocate-only
+ * heap for new and altered environment variables. The entire
+ * environment is limited to a fixed number of entries (add more
+ * to __env[] if required) and a fixed amount of heap (add more to
+ * KDB_ENVBUFSIZE if required).
+ */
+
+static char *__env[] = {
+#if defined(CONFIG_SMP)
+ "PROMPT=[%d]kdb> ",
+ "MOREPROMPT=[%d]more> ",
+#else
+ "PROMPT=kdb> ",
+ "MOREPROMPT=more> ",
+#endif
+ "RADIX=16",
+ "MDCOUNT=8", /* lines of md output */
+ "BTARGS=9", /* 9 possible args in bt */
+ KDB_PLATFORM_ENV,
+ "DTABCOUNT=30",
+ "NOSECT=1",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+};
+
+static const int __nenv = (sizeof(__env) / sizeof(char *));
+
+struct task_struct *kdb_curr_task(int cpu)
+{
+ struct task_struct *p = curr_task(cpu);
+#ifdef _TIF_MCA_INIT
+ if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && KDB_TSK(cpu))
+ p = krp->p;
+#endif
+ return p;
+}
+
+/*
+ * kdbgetenv - This function will return the character string value of
+ * an environment variable.
+ * Parameters:
+ * match A character string representing an environment variable.
+ * Returns:
+ * NULL No environment variable matches 'match'
+ * char* Pointer to string value of environment variable.
+ */
+char *kdbgetenv(const char *match)
+{
+ char **ep = __env;
+ int matchlen = strlen(match);
+ int i;
+
+ for (i = 0; i < __nenv; i++) {
+ char *e = *ep++;
+
+ if (!e)
+ continue;
+
+ if ((strncmp(match, e, matchlen) == 0)
+ && ((e[matchlen] == '\0')
+ || (e[matchlen] == '='))) {
+ char *cp = strchr(e, '=');
+ return cp ? ++cp : "";
+ }
+ }
+ return NULL;
+}
+
+/*
+ * kdballocenv - This function is used to allocate bytes for
+ * environment entries.
+ * Parameters:
+ * match A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long representation of the env variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ * Remarks:
+ * We use a static environment buffer (envbuffer) to hold the values
+ * of dynamically generated environment variables (see kdb_set). Buffer
+ * space once allocated is never free'd, so over time, the amount of space
+ * (currently 512 bytes) will be exhausted if env variables are changed
+ * frequently.
+ */
+static char *kdballocenv(size_t bytes)
+{
+#define KDB_ENVBUFSIZE 512
+ static char envbuffer[KDB_ENVBUFSIZE];
+ static int envbufsize;
+ char *ep = NULL;
+
+ if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
+ ep = &envbuffer[envbufsize];
+ envbufsize += bytes;
+ }
+ return ep;
+}
+
+/*
+ * kdbgetulenv - This function will return the value of an unsigned
+ * long-valued environment variable.
+ * Parameters:
+ * match A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of the env variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ */
+static int kdbgetulenv(const char *match, unsigned long *value)
+{
+ char *ep;
+
+ ep = kdbgetenv(match);
+ if (!ep)
+ return KDB_NOTENV;
+ if (strlen(ep) == 0)
+ return KDB_NOENVVALUE;
+
+ *value = simple_strtoul(ep, NULL, 0);
+
+ return 0;
+}
+
+/*
+ * kdbgetintenv - This function will return the value of an
+ * integer-valued environment variable.
+ * Parameters:
+ * match A character string representing an integer-valued env variable
+ * Outputs:
+ * *value the integer representation of the environment variable 'match'
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ */
+int kdbgetintenv(const char *match, int *value)
+{
+ unsigned long val;
+ int diag;
+
+ diag = kdbgetulenv(match, &val);
+ if (!diag)
+ *value = (int) val;
+ return diag;
+}
+
+/*
+ * kdbgetularg - This function will convert a numeric string into an
+ * unsigned long value.
+ * Parameters:
+ * arg A character string representing a numeric value
+ * Outputs:
+ * *value the unsigned long represntation of arg.
+ * Returns:
+ * Zero on success, a kdb diagnostic on failure.
+ */
+int kdbgetularg(const char *arg, unsigned long *value)
+{
+ char *endp;
+ unsigned long val;
+
+ val = simple_strtoul(arg, &endp, 0);
+
+ if (endp == arg) {
+ /*
+ * Try base 16, for us folks too lazy to type the
+ * leading 0x...
+ */
+ val = simple_strtoul(arg, &endp, 16);
+ if (endp == arg)
+ return KDB_BADINT;
+ }
+
+ *value = val;
+
+ return 0;
+}
+
+/*
+ * kdb_set - This function implements the 'set' command. Alter an
+ * existing environment variable or create a new one.
+ */
+int kdb_set(int argc, const char **argv)
+{
+ int i;
+ char *ep;
+ size_t varlen, vallen;
+
+ /*
+ * we can be invoked two ways:
+ * set var=value argv[1]="var", argv[2]="value"
+ * set var = value argv[1]="var", argv[2]="=", argv[3]="value"
+ * - if the latter, shift 'em down.
+ */
+ if (argc == 3) {
+ argv[2] = argv[3];
+ argc--;
+ }
+
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+
+ /*
+ * Check for internal variables
+ */
+ if (strcmp(argv[1], "KDBDEBUG") == 0) {
+ unsigned int debugflags;
+ char *cp;
+
+ debugflags = simple_strtoul(argv[2], &cp, 0);
+ if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) {
+ kdb_printf("kdb: illegal debug flags '%s'\n",
+ argv[2]);
+ return 0;
+ }
+ kdb_flags = (kdb_flags &
+ ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
+ | (debugflags << KDB_DEBUG_FLAG_SHIFT);
+
+ return 0;
+ }
+
+ /*
+ * Tokenizer squashed the '=' sign. argv[1] is variable
+ * name, argv[2] = value.
+ */
+ varlen = strlen(argv[1]);
+ vallen = strlen(argv[2]);
+ ep = kdballocenv(varlen + vallen + 2);
+ if (ep == (char *)0)
+ return KDB_ENVBUFFULL;
+
+ sprintf(ep, "%s=%s", argv[1], argv[2]);
+
+ ep[varlen+vallen+1] = '\0';
+
+ for (i = 0; i < __nenv; i++) {
+ if (__env[i]
+ && ((strncmp(__env[i], argv[1], varlen) == 0)
+ && ((__env[i][varlen] == '\0')
+ || (__env[i][varlen] == '=')))) {
+ __env[i] = ep;
+ return 0;
+ }
+ }
+
+ /*
+ * Wasn't existing variable. Fit into slot.
+ */
+ for (i = 0; i < __nenv-1; i++) {
+ if (__env[i] == (char *)0) {
+ __env[i] = ep;
+ return 0;
+ }
+ }
+
+ return KDB_ENVFULL;
+}
+
+static int kdb_check_regs(void)
+{
+ if (!kdb_current_regs) {
+ kdb_printf("No current kdb registers."
+ " You may need to select another task\n");
+ return KDB_BADREG;
+ }
+ return 0;
+}
+
+/*
+ * kdbgetaddrarg - This function is responsible for parsing an
+ * address-expression and returning the value of the expression,
+ * symbol name, and offset to the caller.
+ *
+ * The argument may consist of a numeric value (decimal or
+ * hexidecimal), a symbol name, a register name (preceeded by the
+ * percent sign), an environment variable with a numeric value
+ * (preceeded by a dollar sign) or a simple arithmetic expression
+ * consisting of a symbol name, +/-, and a numeric constant value
+ * (offset).
+ * Parameters:
+ * argc - count of arguments in argv
+ * argv - argument vector
+ * *nextarg - index to next unparsed argument in argv[]
+ * regs - Register state at time of KDB entry
+ * Outputs:
+ * *value - receives the value of the address-expression
+ * *offset - receives the offset specified, if any
+ * *name - receives the symbol name, if any
+ * *nextarg - index to next unparsed argument in argv[]
+ * Returns:
+ * zero is returned on success, a kdb diagnostic code is
+ * returned on error.
+ */
+int kdbgetaddrarg(int argc, const char **argv, int *nextarg,
+ unsigned long *value, long *offset,
+ char **name)
+{
+ unsigned long addr;
+ unsigned long off = 0;
+ int positive;
+ int diag;
+ int found = 0;
+ char *symname;
+ char symbol = '\0';
+ char *cp;
+ kdb_symtab_t symtab;
+
+ /*
+ * Process arguments which follow the following syntax:
+ *
+ * symbol | numeric-address [+/- numeric-offset]
+ * %register
+ * $environment-variable
+ */
+
+ if (*nextarg > argc)
+ return KDB_ARGCOUNT;
+
+ symname = (char *)argv[*nextarg];
+
+ /*
+ * If there is no whitespace between the symbol
+ * or address and the '+' or '-' symbols, we
+ * remember the character and replace it with a
+ * null so the symbol/value can be properly parsed
+ */
+ cp = strpbrk(symname, "+-");
+ if (cp != NULL) {
+ symbol = *cp;
+ *cp++ = '\0';
+ }
+
+ if (symname[0] == '$') {
+ diag = kdbgetulenv(&symname[1], &addr);
+ if (diag)
+ return diag;
+ } else if (symname[0] == '%') {
+ diag = kdb_check_regs();
+ if (diag)
+ return diag;
+ /* Implement register values with % at a later time as it is
+ * arch optional.
+ */
+ return KDB_NOTIMP;
+ } else {
+ found = kdbgetsymval(symname, &symtab);
+ if (found) {
+ addr = symtab.sym_start;
+ } else {
+ diag = kdbgetularg(argv[*nextarg], &addr);
+ if (diag)
+ return diag;
+ }
+ }
+
+ if (!found)
+ found = kdbnearsym(addr, &symtab);
+
+ (*nextarg)++;
+
+ if (name)
+ *name = symname;
+ if (value)
+ *value = addr;
+ if (offset && name && *name)
+ *offset = addr - symtab.sym_start;
+
+ if ((*nextarg > argc)
+ && (symbol == '\0'))
+ return 0;
+
+ /*
+ * check for +/- and offset
+ */
+
+ if (symbol == '\0') {
+ if ((argv[*nextarg][0] != '+')
+ && (argv[*nextarg][0] != '-')) {
+ /*
+ * Not our argument. Return.
+ */
+ return 0;
+ } else {
+ positive = (argv[*nextarg][0] == '+');
+ (*nextarg)++;
+ }
+ } else
+ positive = (symbol == '+');
+
+ /*
+ * Now there must be an offset!
+ */
+ if ((*nextarg > argc)
+ && (symbol == '\0')) {
+ return KDB_INVADDRFMT;
+ }
+
+ if (!symbol) {
+ cp = (char *)argv[*nextarg];
+ (*nextarg)++;
+ }
+
+ diag = kdbgetularg(cp, &off);
+ if (diag)
+ return diag;
+
+ if (!positive)
+ off = -off;
+
+ if (offset)
+ *offset += off;
+
+ if (value)
+ *value += off;
+
+ return 0;
+}
+
+static void kdb_cmderror(int diag)
+{
+ int i;
+
+ if (diag >= 0) {
+ kdb_printf("no error detected (diagnostic is %d)\n", diag);
+ return;
+ }
+
+ for (i = 0; i < __nkdb_err; i++) {
+ if (kdbmsgs[i].km_diag == diag) {
+ kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg);
+ return;
+ }
+ }
+
+ kdb_printf("Unknown diag %d\n", -diag);
+}
+
+/*
+ * kdb_defcmd, kdb_defcmd2 - This function implements the 'defcmd'
+ * command which defines one command as a set of other commands,
+ * terminated by endefcmd. kdb_defcmd processes the initial
+ * 'defcmd' command, kdb_defcmd2 is invoked from kdb_parse for
+ * the following commands until 'endefcmd'.
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ */
+struct defcmd_set {
+ int count;
+ int usable;
+ char *name;
+ char *usage;
+ char *help;
+ char **command;
+};
+static struct defcmd_set *defcmd_set;
+static int defcmd_set_count;
+static int defcmd_in_progress;
+
+/* Forward references */
+static int kdb_exec_defcmd(int argc, const char **argv);
+
+static int kdb_defcmd2(const char *cmdstr, const char *argv0)
+{
+ struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
+ char **save_command = s->command;
+ if (strcmp(argv0, "endefcmd") == 0) {
+ defcmd_in_progress = 0;
+ if (!s->count)
+ s->usable = 0;
+ if (s->usable)
+ kdb_register(s->name, kdb_exec_defcmd,
+ s->usage, s->help, 0);
+ return 0;
+ }
+ if (!s->usable)
+ return KDB_NOTIMP;
+ s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
+ if (!s->command) {
+ kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
+ cmdstr);
+ s->usable = 0;
+ return KDB_NOTIMP;
+ }
+ memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
+ s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
+ kfree(save_command);
+ return 0;
+}
+
+static int kdb_defcmd(int argc, const char **argv)
+{
+ struct defcmd_set *save_defcmd_set = defcmd_set, *s;
+ if (defcmd_in_progress) {
+ kdb_printf("kdb: nested defcmd detected, assuming missing "
+ "endefcmd\n");
+ kdb_defcmd2("endefcmd", "endefcmd");
+ }
+ if (argc == 0) {
+ int i;
+ for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
+ kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name,
+ s->usage, s->help);
+ for (i = 0; i < s->count; ++i)
+ kdb_printf("%s", s->command[i]);
+ kdb_printf("endefcmd\n");
+ }
+ return 0;
+ }
+ if (argc != 3)
+ return KDB_ARGCOUNT;
+ defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set),
+ GFP_KDB);
+ if (!defcmd_set) {
+ kdb_printf("Could not allocate new defcmd_set entry for %s\n",
+ argv[1]);
+ defcmd_set = save_defcmd_set;
+ return KDB_NOTIMP;
+ }
+ memcpy(defcmd_set, save_defcmd_set,
+ defcmd_set_count * sizeof(*defcmd_set));
+ kfree(save_defcmd_set);
+ s = defcmd_set + defcmd_set_count;
+ memset(s, 0, sizeof(*s));
+ s->usable = 1;
+ s->name = kdb_strdup(argv[1], GFP_KDB);
+ s->usage = kdb_strdup(argv[2], GFP_KDB);
+ s->help = kdb_strdup(argv[3], GFP_KDB);
+ if (s->usage[0] == '"') {
+ strcpy(s->usage, s->usage+1);
+ s->usage[strlen(s->usage)-1] = '\0';
+ }
+ if (s->help[0] == '"') {
+ strcpy(s->help, s->help+1);
+ s->help[strlen(s->help)-1] = '\0';
+ }
+ ++defcmd_set_count;
+ defcmd_in_progress = 1;
+ return 0;
+}
+
+/*
+ * kdb_exec_defcmd - Execute the set of commands associated with this
+ * defcmd name.
+ * Inputs:
+ * argc argument count
+ * argv argument vector
+ * Returns:
+ * zero for success, a kdb diagnostic if error
+ */
+static int kdb_exec_defcmd(int argc, const char **argv)
+{
+ int i, ret;
+ struct defcmd_set *s;
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+ for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
+ if (strcmp(s->name, argv[0]) == 0)
+ break;
+ }
+ if (i == defcmd_set_count) {
+ kdb_printf("kdb_exec_defcmd: could not find commands for %s\n",
+ argv[0]);
+ return KDB_NOTIMP;
+ }
+ for (i = 0; i < s->count; ++i) {
+ /* Recursive use of kdb_parse, do not use argv after
+ * this point */
+ argv = NULL;
+ kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
+ ret = kdb_parse(s->command[i]);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/* Command history */
+#define KDB_CMD_HISTORY_COUNT 32
+#define CMD_BUFLEN 200 /* kdb_printf: max printline
+ * size == 256 */
+static unsigned int cmd_head, cmd_tail;
+static unsigned int cmdptr;
+static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN];
+static char cmd_cur[CMD_BUFLEN];
+
+/*
+ * The "str" argument may point to something like | grep xyz
+ */
+static void parse_grep(const char *str)
+{
+ int len;
+ char *cp = (char *)str, *cp2;
+
+ /* sanity check: we should have been called with the \ first */
+ if (*cp != '|')
+ return;
+ cp++;
+ while (isspace(*cp))
+ cp++;
+ if (strncmp(cp, "grep ", 5)) {
+ kdb_printf("invalid 'pipe', see grephelp\n");
+ return;
+ }
+ cp += 5;
+ while (isspace(*cp))
+ cp++;
+ cp2 = strchr(cp, '\n');
+ if (cp2)
+ *cp2 = '\0'; /* remove the trailing newline */
+ len = strlen(cp);
+ if (len == 0) {
+ kdb_printf("invalid 'pipe', see grephelp\n");
+ return;
+ }
+ /* now cp points to a nonzero length search string */
+ if (*cp == '"') {
+ /* allow it be "x y z" by removing the "'s - there must
+ be two of them */
+ cp++;
+ cp2 = strchr(cp, '"');
+ if (!cp2) {
+ kdb_printf("invalid quoted string, see grephelp\n");
+ return;
+ }
+ *cp2 = '\0'; /* end the string where the 2nd " was */
+ }
+ kdb_grep_leading = 0;
+ if (*cp == '^') {
+ kdb_grep_leading = 1;
+ cp++;
+ }
+ len = strlen(cp);
+ kdb_grep_trailing = 0;
+ if (*(cp+len-1) == '$') {
+ kdb_grep_trailing = 1;
+ *(cp+len-1) = '\0';
+ }
+ len = strlen(cp);
+ if (!len)
+ return;
+ if (len >= GREP_LEN) {
+ kdb_printf("search string too long\n");
+ return;
+ }
+ strcpy(kdb_grep_string, cp);
+ kdb_grepping_flag++;
+ return;
+}
+
+/*
+ * kdb_parse - Parse the command line, search the command table for a
+ * matching command and invoke the command function. This
+ * function may be called recursively, if it is, the second call
+ * will overwrite argv and cbuf. It is the caller's
+ * responsibility to save their argv if they recursively call
+ * kdb_parse().
+ * Parameters:
+ * cmdstr The input command line to be parsed.
+ * regs The registers at the time kdb was entered.
+ * Returns:
+ * Zero for success, a kdb diagnostic if failure.
+ * Remarks:
+ * Limited to 20 tokens.
+ *
+ * Real rudimentary tokenization. Basically only whitespace
+ * is considered a token delimeter (but special consideration
+ * is taken of the '=' sign as used by the 'set' command).
+ *
+ * The algorithm used to tokenize the input string relies on
+ * there being at least one whitespace (or otherwise useless)
+ * character between tokens as the character immediately following
+ * the token is altered in-place to a null-byte to terminate the
+ * token string.
+ */
+
+#define MAXARGC 20
+
+int kdb_parse(const char *cmdstr)
+{
+ static char *argv[MAXARGC];
+ static int argc;
+ static char cbuf[CMD_BUFLEN+2];
+ char *cp;
+ char *cpp, quoted;
+ kdbtab_t *tp;
+ int i, escaped, ignore_errors = 0, check_grep;
+
+ /*
+ * First tokenize the command string.
+ */
+ cp = (char *)cmdstr;
+ kdb_grepping_flag = check_grep = 0;
+
+ if (KDB_FLAG(CMD_INTERRUPT)) {
+ /* Previous command was interrupted, newline must not
+ * repeat the command */
+ KDB_FLAG_CLEAR(CMD_INTERRUPT);
+ KDB_STATE_SET(PAGER);
+ argc = 0; /* no repeat */
+ }
+
+ if (*cp != '\n' && *cp != '\0') {
+ argc = 0;
+ cpp = cbuf;
+ while (*cp) {
+ /* skip whitespace */
+ while (isspace(*cp))
+ cp++;
+ if ((*cp == '\0') || (*cp == '\n') ||
+ (*cp == '#' && !defcmd_in_progress))
+ break;
+ /* special case: check for | grep pattern */
+ if (*cp == '|') {
+ check_grep++;
+ break;
+ }
+ if (cpp >= cbuf + CMD_BUFLEN) {
+ kdb_printf("kdb_parse: command buffer "
+ "overflow, command ignored\n%s\n",
+ cmdstr);
+ return KDB_NOTFOUND;
+ }
+ if (argc >= MAXARGC - 1) {
+ kdb_printf("kdb_parse: too many arguments, "
+ "command ignored\n%s\n", cmdstr);
+ return KDB_NOTFOUND;
+ }
+ argv[argc++] = cpp;
+ escaped = 0;
+ quoted = '\0';
+ /* Copy to next unquoted and unescaped
+ * whitespace or '=' */
+ while (*cp && *cp != '\n' &&
+ (escaped || quoted || !isspace(*cp))) {
+ if (cpp >= cbuf + CMD_BUFLEN)
+ break;
+ if (escaped) {
+ escaped = 0;
+ *cpp++ = *cp++;
+ continue;
+ }
+ if (*cp == '\\') {
+ escaped = 1;
+ ++cp;
+ continue;
+ }
+ if (*cp == quoted)
+ quoted = '\0';
+ else if (*cp == '\'' || *cp == '"')
+ quoted = *cp;
+ *cpp = *cp++;
+ if (*cpp == '=' && !quoted)
+ break;
+ ++cpp;
+ }
+ *cpp++ = '\0'; /* Squash a ws or '=' character */
+ }
+ }
+ if (!argc)
+ return 0;
+ if (check_grep)
+ parse_grep(cp);
+ if (defcmd_in_progress) {
+ int result = kdb_defcmd2(cmdstr, argv[0]);
+ if (!defcmd_in_progress) {
+ argc = 0; /* avoid repeat on endefcmd */
+ *(argv[0]) = '\0';
+ }
+ return result;
+ }
+ if (argv[0][0] == '-' && argv[0][1] &&
+ (argv[0][1] < '0' || argv[0][1] > '9')) {
+ ignore_errors = 1;
+ ++argv[0];
+ }
+
+ for_each_kdbcmd(tp, i) {
+ if (tp->cmd_name) {
+ /*
+ * If this command is allowed to be abbreviated,
+ * check to see if this is it.
+ */
+
+ if (tp->cmd_minlen
+ && (strlen(argv[0]) <= tp->cmd_minlen)) {
+ if (strncmp(argv[0],
+ tp->cmd_name,
+ tp->cmd_minlen) == 0) {
+ break;
+ }
+ }
+
+ if (strcmp(argv[0], tp->cmd_name) == 0)
+ break;
+ }
+ }
+
+ /*
+ * If we don't find a command by this name, see if the first
+ * few characters of this match any of the known commands.
+ * e.g., md1c20 should match md.
+ */
+ if (i == kdb_max_commands) {
+ for_each_kdbcmd(tp, i) {
+ if (tp->cmd_name) {
+ if (strncmp(argv[0],
+ tp->cmd_name,
+ strlen(tp->cmd_name)) == 0) {
+ break;
+ }
+ }
+ }
+ }
+
+ if (i < kdb_max_commands) {
+ int result;
+ KDB_STATE_SET(CMD);
+ result = (*tp->cmd_func)(argc-1, (const char **)argv);
+ if (result && ignore_errors && result > KDB_CMD_GO)
+ result = 0;
+ KDB_STATE_CLEAR(CMD);
+ switch (tp->cmd_repeat) {
+ case KDB_REPEAT_NONE:
+ argc = 0;
+ if (argv[0])
+ *(argv[0]) = '\0';
+ break;
+ case KDB_REPEAT_NO_ARGS:
+ argc = 1;
+ if (argv[1])
+ *(argv[1]) = '\0';
+ break;
+ case KDB_REPEAT_WITH_ARGS:
+ break;
+ }
+ return result;
+ }
+
+ /*
+ * If the input with which we were presented does not
+ * map to an existing command, attempt to parse it as an
+ * address argument and display the result. Useful for
+ * obtaining the address of a variable, or the nearest symbol
+ * to an address contained in a register.
+ */
+ {
+ unsigned long value;
+ char *name = NULL;
+ long offset;
+ int nextarg = 0;
+
+ if (kdbgetaddrarg(0, (const char **)argv, &nextarg,
+ &value, &offset, &name)) {
+ return KDB_NOTFOUND;
+ }
+
+ kdb_printf("%s = ", argv[0]);
+ kdb_symbol_print(value, NULL, KDB_SP_DEFAULT);
+ kdb_printf("\n");
+ return 0;
+ }
+}
+
+
+static int handle_ctrl_cmd(char *cmd)
+{
+#define CTRL_P 16
+#define CTRL_N 14
+
+ /* initial situation */
+ if (cmd_head == cmd_tail)
+ return 0;
+ switch (*cmd) {
+ case CTRL_P:
+ if (cmdptr != cmd_tail)
+ cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
+ strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+ return 1;
+ case CTRL_N:
+ if (cmdptr != cmd_head)
+ cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT;
+ strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * kdb_reboot - This function implements the 'reboot' command. Reboot
+ * the system immediately, or loop for ever on failure.
+ */
+static int kdb_reboot(int argc, const char **argv)
+{
+ emergency_restart();
+ kdb_printf("Hmm, kdb_reboot did not reboot, spinning here\n");
+ while (1)
+ cpu_relax();
+ /* NOTREACHED */
+ return 0;
+}
+
+static void kdb_dumpregs(struct pt_regs *regs)
+{
+ int old_lvl = console_loglevel;
+ console_loglevel = 15;
+ kdb_trap_printk++;
+ show_regs(regs);
+ kdb_trap_printk--;
+ kdb_printf("\n");
+ console_loglevel = old_lvl;
+}
+
+void kdb_set_current_task(struct task_struct *p)
+{
+ kdb_current_task = p;
+
+ if (kdb_task_has_cpu(p)) {
+ kdb_current_regs = KDB_TSKREGS(kdb_process_cpu(p));
+ return;
+ }
+ kdb_current_regs = NULL;
+}
+
+/*
+ * kdb_local - The main code for kdb. This routine is invoked on a
+ * specific processor, it is not global. The main kdb() routine
+ * ensures that only one processor at a time is in this routine.
+ * This code is called with the real reason code on the first
+ * entry to a kdb session, thereafter it is called with reason
+ * SWITCH, even if the user goes back to the original cpu.
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * regs The exception frame at time of fault/breakpoint.
+ * db_result Result code from the break or debug point.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ * KDB_CMD_GO User typed 'go'.
+ * KDB_CMD_CPU User switched to another cpu.
+ * KDB_CMD_SS Single step.
+ * KDB_CMD_SSB Single step until branch.
+ */
+static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
+ kdb_dbtrap_t db_result)
+{
+ char *cmdbuf;
+ int diag;
+ struct task_struct *kdb_current =
+ kdb_curr_task(raw_smp_processor_id());
+
+ KDB_DEBUG_STATE("kdb_local 1", reason);
+ kdb_go_count = 0;
+ if (reason == KDB_REASON_DEBUG) {
+ /* special case below */
+ } else {
+ kdb_printf("\nEntering kdb (current=0x%p, pid %d) ",
+ kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+ kdb_printf("on processor %d ", raw_smp_processor_id());
+#endif
+ }
+
+ switch (reason) {
+ case KDB_REASON_DEBUG:
+ {
+ /*
+ * If re-entering kdb after a single step
+ * command, don't print the message.
+ */
+ switch (db_result) {
+ case KDB_DB_BPT:
+ kdb_printf("\nEntering kdb (0x%p, pid %d) ",
+ kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+ kdb_printf("on processor %d ", raw_smp_processor_id());
+#endif
+ kdb_printf("due to Debug @ " kdb_machreg_fmt "\n",
+ instruction_pointer(regs));
+ break;
+ case KDB_DB_SSB:
+ /*
+ * In the midst of ssb command. Just return.
+ */
+ KDB_DEBUG_STATE("kdb_local 3", reason);
+ return KDB_CMD_SSB; /* Continue with SSB command */
+
+ break;
+ case KDB_DB_SS:
+ break;
+ case KDB_DB_SSBPT:
+ KDB_DEBUG_STATE("kdb_local 4", reason);
+ return 1; /* kdba_db_trap did the work */
+ default:
+ kdb_printf("kdb: Bad result from kdba_db_trap: %d\n",
+ db_result);
+ break;
+ }
+
+ }
+ break;
+ case KDB_REASON_ENTER:
+ if (KDB_STATE(KEYBOARD))
+ kdb_printf("due to Keyboard Entry\n");
+ else
+ kdb_printf("due to KDB_ENTER()\n");
+ break;
+ case KDB_REASON_KEYBOARD:
+ KDB_STATE_SET(KEYBOARD);
+ kdb_printf("due to Keyboard Entry\n");
+ break;
+ case KDB_REASON_ENTER_SLAVE:
+ /* drop through, slaves only get released via cpu switch */
+ case KDB_REASON_SWITCH:
+ kdb_printf("due to cpu switch\n");
+ break;
+ case KDB_REASON_OOPS:
+ kdb_printf("Oops: %s\n", kdb_diemsg);
+ kdb_printf("due to oops @ " kdb_machreg_fmt "\n",
+ instruction_pointer(regs));
+ kdb_dumpregs(regs);
+ break;
+ case KDB_REASON_NMI:
+ kdb_printf("due to NonMaskable Interrupt @ "
+ kdb_machreg_fmt "\n",
+ instruction_pointer(regs));
+ kdb_dumpregs(regs);
+ break;
+ case KDB_REASON_SSTEP:
+ case KDB_REASON_BREAK:
+ kdb_printf("due to %s @ " kdb_machreg_fmt "\n",
+ reason == KDB_REASON_BREAK ?
+ "Breakpoint" : "SS trap", instruction_pointer(regs));
+ /*
+ * Determine if this breakpoint is one that we
+ * are interested in.
+ */
+ if (db_result != KDB_DB_BPT) {
+ kdb_printf("kdb: error return from kdba_bp_trap: %d\n",
+ db_result);
+ KDB_DEBUG_STATE("kdb_local 6", reason);
+ return 0; /* Not for us, dismiss it */
+ }
+ break;
+ case KDB_REASON_RECURSE:
+ kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n",
+ instruction_pointer(regs));
+ break;
+ default:
+ kdb_printf("kdb: unexpected reason code: %d\n", reason);
+ KDB_DEBUG_STATE("kdb_local 8", reason);
+ return 0; /* Not for us, dismiss it */
+ }
+
+ while (1) {
+ /*
+ * Initialize pager context.
+ */
+ kdb_nextline = 1;
+ KDB_STATE_CLEAR(SUPPRESS);
+
+ cmdbuf = cmd_cur;
+ *cmdbuf = '\0';
+ *(cmd_hist[cmd_head]) = '\0';
+
+ if (KDB_FLAG(ONLY_DO_DUMP)) {
+ /* kdb is off but a catastrophic error requires a dump.
+ * Take the dump and reboot.
+ * Turn on logging so the kdb output appears in the log
+ * buffer in the dump.
+ */
+ const char *setargs[] = { "set", "LOGGING", "1" };
+ kdb_set(2, setargs);
+ kdb_reboot(0, NULL);
+ /*NOTREACHED*/
+ }
+
+do_full_getstr:
+#if defined(CONFIG_SMP)
+ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
+ raw_smp_processor_id());
+#else
+ snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
+#endif
+ if (defcmd_in_progress)
+ strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
+
+ /*
+ * Fetch command from keyboard
+ */
+ cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
+ if (*cmdbuf != '\n') {
+ if (*cmdbuf < 32) {
+ if (cmdptr == cmd_head) {
+ strncpy(cmd_hist[cmd_head], cmd_cur,
+ CMD_BUFLEN);
+ *(cmd_hist[cmd_head] +
+ strlen(cmd_hist[cmd_head])-1) = '\0';
+ }
+ if (!handle_ctrl_cmd(cmdbuf))
+ *(cmd_cur+strlen(cmd_cur)-1) = '\0';
+ cmdbuf = cmd_cur;
+ goto do_full_getstr;
+ } else {
+ strncpy(cmd_hist[cmd_head], cmd_cur,
+ CMD_BUFLEN);
+ }
+
+ cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
+ if (cmd_head == cmd_tail)
+ cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
+ }
+
+ cmdptr = cmd_head;
+ diag = kdb_parse(cmdbuf);
+ if (diag == KDB_NOTFOUND) {
+ kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
+ diag = 0;
+ }
+ if (diag == KDB_CMD_GO
+ || diag == KDB_CMD_CPU
+ || diag == KDB_CMD_SS
+ || diag == KDB_CMD_SSB
+ || diag == KDB_CMD_KGDB)
+ break;
+
+ if (diag)
+ kdb_cmderror(diag);
+ }
+ KDB_DEBUG_STATE("kdb_local 9", diag);
+ return diag;
+}
+
+
+/*
+ * kdb_print_state - Print the state data for the current processor
+ * for debugging.
+ * Inputs:
+ * text Identifies the debug point
+ * value Any integer value to be printed, e.g. reason code.
+ */
+void kdb_print_state(const char *text, int value)
+{
+ kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
+ text, raw_smp_processor_id(), value, kdb_initial_cpu,
+ kdb_state);
+}
+
+/*
+ * kdb_main_loop - After initial setup and assignment of the
+ * controlling cpu, all cpus are in this loop. One cpu is in
+ * control and will issue the kdb prompt, the others will spin
+ * until 'go' or cpu switch.
+ *
+ * To get a consistent view of the kernel stacks for all
+ * processes, this routine is invoked from the main kdb code via
+ * an architecture specific routine. kdba_main_loop is
+ * responsible for making the kernel stacks consistent for all
+ * processes, there should be no difference between a blocked
+ * process and a running process as far as kdb is concerned.
+ * Inputs:
+ * reason The reason KDB was invoked
+ * error The hardware-defined error code
+ * reason2 kdb's current reason code.
+ * Initially error but can change
+ * acording to kdb state.
+ * db_result Result code from break or debug point.
+ * regs The exception frame at time of fault/breakpoint.
+ * should always be valid.
+ * Returns:
+ * 0 KDB was invoked for an event which it wasn't responsible
+ * 1 KDB handled the event for which it was invoked.
+ */
+int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+ kdb_dbtrap_t db_result, struct pt_regs *regs)
+{
+ int result = 1;
+ /* Stay in kdb() until 'go', 'ss[b]' or an error */
+ while (1) {
+ /*
+ * All processors except the one that is in control
+ * will spin here.
+ */
+ KDB_DEBUG_STATE("kdb_main_loop 1", reason);
+ while (KDB_STATE(HOLD_CPU)) {
+ /* state KDB is turned off by kdb_cpu to see if the
+ * other cpus are still live, each cpu in this loop
+ * turns it back on.
+ */
+ if (!KDB_STATE(KDB))
+ KDB_STATE_SET(KDB);
+ }
+
+ KDB_STATE_CLEAR(SUPPRESS);
+ KDB_DEBUG_STATE("kdb_main_loop 2", reason);
+ if (KDB_STATE(LEAVING))
+ break; /* Another cpu said 'go' */
+ /* Still using kdb, this processor is in control */
+ result = kdb_local(reason2, error, regs, db_result);
+ KDB_DEBUG_STATE("kdb_main_loop 3", result);
+
+ if (result == KDB_CMD_CPU)
+ break;
+
+ if (result == KDB_CMD_SS) {
+ KDB_STATE_SET(DOING_SS);
+ break;
+ }
+
+ if (result == KDB_CMD_SSB) {
+ KDB_STATE_SET(DOING_SS);
+ KDB_STATE_SET(DOING_SSB);
+ break;
+ }
+
+ if (result == KDB_CMD_KGDB) {
+ if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)))
+ kdb_printf("Entering please attach debugger "
+ "or use $D#44+ or $3#33\n");
+ break;
+ }
+ if (result && result != 1 && result != KDB_CMD_GO)
+ kdb_printf("\nUnexpected kdb_local return code %d\n",
+ result);
+ KDB_DEBUG_STATE("kdb_main_loop 4", reason);
+ break;
+ }
+ if (KDB_STATE(DOING_SS))
+ KDB_STATE_CLEAR(SSBPT);
+
+ return result;
+}
+
+/*
+ * kdb_mdr - This function implements the guts of the 'mdr', memory
+ * read command.
+ * mdr <addr arg>,<byte count>
+ * Inputs:
+ * addr Start address
+ * count Number of bytes
+ * Returns:
+ * Always 0. Any errors are detected and printed by kdb_getarea.
+ */
+static int kdb_mdr(unsigned long addr, unsigned int count)
+{
+ unsigned char c;
+ while (count--) {
+ if (kdb_getarea(c, addr))
+ return 0;
+ kdb_printf("%02x", c);
+ addr++;
+ }
+ kdb_printf("\n");
+ return 0;
+}
+
+/*
+ * kdb_md - This function implements the 'md', 'md1', 'md2', 'md4',
+ * 'md8' 'mdr' and 'mds' commands.
+ *
+ * md|mds [<addr arg> [<line count> [<radix>]]]
+ * mdWcN [<addr arg> [<line count> [<radix>]]]
+ * where W = is the width (1, 2, 4 or 8) and N is the count.
+ * for eg., md1c20 reads 20 bytes, 1 at a time.
+ * mdr <addr arg>,<byte count>
+ */
+static void kdb_md_line(const char *fmtstr, unsigned long addr,
+ int symbolic, int nosect, int bytesperword,
+ int num, int repeat, int phys)
+{
+ /* print just one line of data */
+ kdb_symtab_t symtab;
+ char cbuf[32];
+ char *c = cbuf;
+ int i;
+ unsigned long word;
+
+ memset(cbuf, '\0', sizeof(cbuf));
+ if (phys)
+ kdb_printf("phys " kdb_machreg_fmt0 " ", addr);
+ else
+ kdb_printf(kdb_machreg_fmt0 " ", addr);
+
+ for (i = 0; i < num && repeat--; i++) {
+ if (phys) {
+ if (kdb_getphysword(&word, addr, bytesperword))
+ break;
+ } else if (kdb_getword(&word, addr, bytesperword))
+ break;
+ kdb_printf(fmtstr, word);
+ if (symbolic)
+ kdbnearsym(word, &symtab);
+ else
+ memset(&symtab, 0, sizeof(symtab));
+ if (symtab.sym_name) {
+ kdb_symbol_print(word, &symtab, 0);
+ if (!nosect) {
+ kdb_printf("\n");
+ kdb_printf(" %s %s "
+ kdb_machreg_fmt " "
+ kdb_machreg_fmt " "
+ kdb_machreg_fmt, symtab.mod_name,
+ symtab.sec_name, symtab.sec_start,
+ symtab.sym_start, symtab.sym_end);
+ }
+ addr += bytesperword;
+ } else {
+ union {
+ u64 word;
+ unsigned char c[8];
+ } wc;
+ unsigned char *cp;
+#ifdef __BIG_ENDIAN
+ cp = wc.c + 8 - bytesperword;
+#else
+ cp = wc.c;
+#endif
+ wc.word = word;
+#define printable_char(c) \
+ ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.'; })
+ switch (bytesperword) {
+ case 8:
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ addr += 4;
+ case 4:
+ *c++ = printable_char(*cp++);
+ *c++ = printable_char(*cp++);
+ addr += 2;
+ case 2:
+ *c++ = printable_char(*cp++);
+ addr++;
+ case 1:
+ *c++ = printable_char(*cp++);
+ addr++;
+ break;
+ }
+#undef printable_char
+ }
+ }
+ kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1),
+ " ", cbuf);
+}
+
+static int kdb_md(int argc, const char **argv)
+{
+ static unsigned long last_addr;
+ static int last_radix, last_bytesperword, last_repeat;
+ int radix = 16, mdcount = 8, bytesperword = KDB_WORD_SIZE, repeat;
+ int nosect = 0;
+ char fmtchar, fmtstr[64];
+ unsigned long addr;
+ unsigned long word;
+ long offset = 0;
+ int symbolic = 0;
+ int valid = 0;
+ int phys = 0;
+
+ kdbgetintenv("MDCOUNT", &mdcount);
+ kdbgetintenv("RADIX", &radix);
+ kdbgetintenv("BYTESPERWORD", &bytesperword);
+
+ /* Assume 'md <addr>' and start with environment values */
+ repeat = mdcount * 16 / bytesperword;
+
+ if (strcmp(argv[0], "mdr") == 0) {
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+ valid = 1;
+ } else if (isdigit(argv[0][2])) {
+ bytesperword = (int)(argv[0][2] - '0');
+ if (bytesperword == 0) {
+ bytesperword = last_bytesperword;
+ if (bytesperword == 0)
+ bytesperword = 4;
+ }
+ last_bytesperword = bytesperword;
+ repeat = mdcount * 16 / bytesperword;
+ if (!argv[0][3])
+ valid = 1;
+ else if (argv[0][3] == 'c' && argv[0][4]) {
+ char *p;
+ repeat = simple_strtoul(argv[0] + 4, &p, 10);
+ mdcount = ((repeat * bytesperword) + 15) / 16;
+ valid = !*p;
+ }
+ last_repeat = repeat;
+ } else if (strcmp(argv[0], "md") == 0)
+ valid = 1;
+ else if (strcmp(argv[0], "mds") == 0)
+ valid = 1;
+ else if (strcmp(argv[0], "mdp") == 0) {
+ phys = valid = 1;
+ }
+ if (!valid)
+ return KDB_NOTFOUND;
+
+ if (argc == 0) {
+ if (last_addr == 0)
+ return KDB_ARGCOUNT;
+ addr = last_addr;
+ radix = last_radix;
+ bytesperword = last_bytesperword;
+ repeat = last_repeat;
+ mdcount = ((repeat * bytesperword) + 15) / 16;
+ }
+
+ if (argc) {
+ unsigned long val;
+ int diag, nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr,
+ &offset, NULL);
+ if (diag)
+ return diag;
+ if (argc > nextarg+2)
+ return KDB_ARGCOUNT;
+
+ if (argc >= nextarg) {
+ diag = kdbgetularg(argv[nextarg], &val);
+ if (!diag) {
+ mdcount = (int) val;
+ repeat = mdcount * 16 / bytesperword;
+ }
+ }
+ if (argc >= nextarg+1) {
+ diag = kdbgetularg(argv[nextarg+1], &val);
+ if (!diag)
+ radix = (int) val;
+ }
+ }
+
+ if (strcmp(argv[0], "mdr") == 0)
+ return kdb_mdr(addr, mdcount);
+
+ switch (radix) {
+ case 10:
+ fmtchar = 'd';
+ break;
+ case 16:
+ fmtchar = 'x';
+ break;
+ case 8:
+ fmtchar = 'o';
+ break;
+ default:
+ return KDB_BADRADIX;
+ }
+
+ last_radix = radix;
+
+ if (bytesperword > KDB_WORD_SIZE)
+ return KDB_BADWIDTH;
+
+ switch (bytesperword) {
+ case 8:
+ sprintf(fmtstr, "%%16.16l%c ", fmtchar);
+ break;
+ case 4:
+ sprintf(fmtstr, "%%8.8l%c ", fmtchar);
+ break;
+ case 2:
+ sprintf(fmtstr, "%%4.4l%c ", fmtchar);
+ break;
+ case 1:
+ sprintf(fmtstr, "%%2.2l%c ", fmtchar);
+ break;
+ default:
+ return KDB_BADWIDTH;
+ }
+
+ last_repeat = repeat;
+ last_bytesperword = bytesperword;
+
+ if (strcmp(argv[0], "mds") == 0) {
+ symbolic = 1;
+ /* Do not save these changes as last_*, they are temporary mds
+ * overrides.
+ */
+ bytesperword = KDB_WORD_SIZE;
+ repeat = mdcount;
+ kdbgetintenv("NOSECT", &nosect);
+ }
+
+ /* Round address down modulo BYTESPERWORD */
+
+ addr &= ~(bytesperword-1);
+
+ while (repeat > 0) {
+ unsigned long a;
+ int n, z, num = (symbolic ? 1 : (16 / bytesperword));
+
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
+ for (a = addr, z = 0; z < repeat; a += bytesperword, ++z) {
+ if (phys) {
+ if (kdb_getphysword(&word, a, bytesperword)
+ || word)
+ break;
+ } else if (kdb_getword(&word, a, bytesperword) || word)
+ break;
+ }
+ n = min(num, repeat);
+ kdb_md_line(fmtstr, addr, symbolic, nosect, bytesperword,
+ num, repeat, phys);
+ addr += bytesperword * n;
+ repeat -= n;
+ z = (z + num - 1) / num;
+ if (z > 2) {
+ int s = num * (z-2);
+ kdb_printf(kdb_machreg_fmt0 "-" kdb_machreg_fmt0
+ " zero suppressed\n",
+ addr, addr + bytesperword * s - 1);
+ addr += bytesperword * s;
+ repeat -= s;
+ }
+ }
+ last_addr = addr;
+
+ return 0;
+}
+
+/*
+ * kdb_mm - This function implements the 'mm' command.
+ * mm address-expression new-value
+ * Remarks:
+ * mm works on machine words, mmW works on bytes.
+ */
+static int kdb_mm(int argc, const char **argv)
+{
+ int diag;
+ unsigned long addr;
+ long offset = 0;
+ unsigned long contents;
+ int nextarg;
+ int width;
+
+ if (argv[0][2] && !isdigit(argv[0][2]))
+ return KDB_NOTFOUND;
+
+ if (argc < 2)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ if (nextarg > argc)
+ return KDB_ARGCOUNT;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL);
+ if (diag)
+ return diag;
+
+ if (nextarg != argc + 1)
+ return KDB_ARGCOUNT;
+
+ width = argv[0][2] ? (argv[0][2] - '0') : (KDB_WORD_SIZE);
+ diag = kdb_putword(addr, contents, width);
+ if (diag)
+ return diag;
+
+ kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents);
+
+ return 0;
+}
+
+/*
+ * kdb_go - This function implements the 'go' command.
+ * go [address-expression]
+ */
+static int kdb_go(int argc, const char **argv)
+{
+ unsigned long addr;
+ int diag;
+ int nextarg;
+ long offset;
+
+ if (argc == 1) {
+ if (raw_smp_processor_id() != kdb_initial_cpu) {
+ kdb_printf("go <address> must be issued from the "
+ "initial cpu, do cpu %d first\n",
+ kdb_initial_cpu);
+ return KDB_ARGCOUNT;
+ }
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg,
+ &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ } else if (argc) {
+ return KDB_ARGCOUNT;
+ }
+
+ diag = KDB_CMD_GO;
+ if (KDB_FLAG(CATASTROPHIC)) {
+ kdb_printf("Catastrophic error detected\n");
+ kdb_printf("kdb_continue_catastrophic=%d, ",
+ kdb_continue_catastrophic);
+ if (kdb_continue_catastrophic == 0 && kdb_go_count++ == 0) {
+ kdb_printf("type go a second time if you really want "
+ "to continue\n");
+ return 0;
+ }
+ if (kdb_continue_catastrophic == 2) {
+ kdb_printf("forcing reboot\n");
+ kdb_reboot(0, NULL);
+ }
+ kdb_printf("attempting to continue\n");
+ }
+ return diag;
+}
+
+/*
+ * kdb_rd - This function implements the 'rd' command.
+ */
+static int kdb_rd(int argc, const char **argv)
+{
+ int diag = kdb_check_regs();
+ if (diag)
+ return diag;
+
+ kdb_dumpregs(kdb_current_regs);
+ return 0;
+}
+
+/*
+ * kdb_rm - This function implements the 'rm' (register modify) command.
+ * rm register-name new-contents
+ * Remarks:
+ * Currently doesn't allow modification of control or
+ * debug registers.
+ */
+static int kdb_rm(int argc, const char **argv)
+{
+ int diag;
+ int ind = 0;
+ unsigned long contents;
+
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+ /*
+ * Allow presence or absence of leading '%' symbol.
+ */
+ if (argv[1][0] == '%')
+ ind = 1;
+
+ diag = kdbgetularg(argv[2], &contents);
+ if (diag)
+ return diag;
+
+ diag = kdb_check_regs();
+ if (diag)
+ return diag;
+ kdb_printf("ERROR: Register set currently not implemented\n");
+ return 0;
+}
+
+#if defined(CONFIG_MAGIC_SYSRQ)
+/*
+ * kdb_sr - This function implements the 'sr' (SYSRQ key) command
+ * which interfaces to the soi-disant MAGIC SYSRQ functionality.
+ * sr <magic-sysrq-code>
+ */
+static int kdb_sr(int argc, const char **argv)
+{
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+ sysrq_toggle_support(1);
+ kdb_trap_printk++;
+ handle_sysrq(*argv[1], NULL);
+ kdb_trap_printk--;
+
+ return 0;
+}
+#endif /* CONFIG_MAGIC_SYSRQ */
+
+/*
+ * kdb_ef - This function implements the 'regs' (display exception
+ * frame) command. This command takes an address and expects to
+ * find an exception frame at that address, formats and prints
+ * it.
+ * regs address-expression
+ * Remarks:
+ * Not done yet.
+ */
+static int kdb_ef(int argc, const char **argv)
+{
+ int diag;
+ unsigned long addr;
+ long offset;
+ int nextarg;
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+ show_regs((struct pt_regs *)addr);
+ return 0;
+}
+
+#if defined(CONFIG_MODULES)
+/* modules using other modules */
+struct module_use {
+ struct list_head list;
+ struct module *module_which_uses;
+};
+
+/*
+ * kdb_lsmod - This function implements the 'lsmod' command. Lists
+ * currently loaded kernel modules.
+ * Mostly taken from userland lsmod.
+ */
+static int kdb_lsmod(int argc, const char **argv)
+{
+ struct module *mod;
+
+ if (argc != 0)
+ return KDB_ARGCOUNT;
+
+ kdb_printf("Module Size modstruct Used by\n");
+ list_for_each_entry(mod, kdb_modules, list) {
+
+ kdb_printf("%-20s%8u 0x%p ", mod->name,
+ mod->core_size, (void *)mod);
+#ifdef CONFIG_MODULE_UNLOAD
+ kdb_printf("%4d ", module_refcount(mod));
+#endif
+ if (mod->state == MODULE_STATE_GOING)
+ kdb_printf(" (Unloading)");
+ else if (mod->state == MODULE_STATE_COMING)
+ kdb_printf(" (Loading)");
+ else
+ kdb_printf(" (Live)");
+
+#ifdef CONFIG_MODULE_UNLOAD
+ {
+ struct module_use *use;
+ kdb_printf(" [ ");
+ list_for_each_entry(use, &mod->modules_which_use_me,
+ list)
+ kdb_printf("%s ", use->module_which_uses->name);
+ kdb_printf("]\n");
+ }
+#endif
+ }
+
+ return 0;
+}
+
+#endif /* CONFIG_MODULES */
+
+/*
+ * kdb_env - This function implements the 'env' command. Display the
+ * current environment variables.
+ */
+
+static int kdb_env(int argc, const char **argv)
+{
+ int i;
+
+ for (i = 0; i < __nenv; i++) {
+ if (__env[i])
+ kdb_printf("%s\n", __env[i]);
+ }
+
+ if (KDB_DEBUG(MASK))
+ kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
+
+ return 0;
+}
+
+#ifdef CONFIG_PRINTK
+/*
+ * kdb_dmesg - This function implements the 'dmesg' command to display
+ * the contents of the syslog buffer.
+ * dmesg [lines] [adjust]
+ */
+static int kdb_dmesg(int argc, const char **argv)
+{
+ char *syslog_data[4], *start, *end, c = '\0', *p;
+ int diag, logging, logsize, lines = 0, adjust = 0, n;
+
+ if (argc > 2)
+ return KDB_ARGCOUNT;
+ if (argc) {
+ char *cp;
+ lines = simple_strtol(argv[1], &cp, 0);
+ if (*cp)
+ lines = 0;
+ if (argc > 1) {
+ adjust = simple_strtoul(argv[2], &cp, 0);
+ if (*cp || adjust < 0)
+ adjust = 0;
+ }
+ }
+
+ /* disable LOGGING if set */
+ diag = kdbgetintenv("LOGGING", &logging);
+ if (!diag && logging) {
+ const char *setargs[] = { "set", "LOGGING", "0" };
+ kdb_set(2, setargs);
+ }
+
+ /* syslog_data[0,1] physical start, end+1. syslog_data[2,3]
+ * logical start, end+1. */
+ kdb_syslog_data(syslog_data);
+ if (syslog_data[2] == syslog_data[3])
+ return 0;
+ logsize = syslog_data[1] - syslog_data[0];
+ start = syslog_data[2];
+ end = syslog_data[3];
+#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
+ for (n = 0, p = start; p < end; ++p) {
+ c = *KDB_WRAP(p);
+ if (c == '\n')
+ ++n;
+ }
+ if (c != '\n')
+ ++n;
+ if (lines < 0) {
+ if (adjust >= n)
+ kdb_printf("buffer only contains %d lines, nothing "
+ "printed\n", n);
+ else if (adjust - lines >= n)
+ kdb_printf("buffer only contains %d lines, last %d "
+ "lines printed\n", n, n - adjust);
+ if (adjust) {
+ for (; start < end && adjust; ++start) {
+ if (*KDB_WRAP(start) == '\n')
+ --adjust;
+ }
+ if (start < end)
+ ++start;
+ }
+ for (p = start; p < end && lines; ++p) {
+ if (*KDB_WRAP(p) == '\n')
+ ++lines;
+ }
+ end = p;
+ } else if (lines > 0) {
+ int skip = n - (adjust + lines);
+ if (adjust >= n) {
+ kdb_printf("buffer only contains %d lines, "
+ "nothing printed\n", n);
+ skip = n;
+ } else if (skip < 0) {
+ lines += skip;
+ skip = 0;
+ kdb_printf("buffer only contains %d lines, first "
+ "%d lines printed\n", n, lines);
+ }
+ for (; start < end && skip; ++start) {
+ if (*KDB_WRAP(start) == '\n')
+ --skip;
+ }
+ for (p = start; p < end && lines; ++p) {
+ if (*KDB_WRAP(p) == '\n')
+ --lines;
+ }
+ end = p;
+ }
+ /* Do a line at a time (max 200 chars) to reduce protocol overhead */
+ c = '\n';
+ while (start != end) {
+ char buf[201];
+ p = buf;
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
+ while (start < end && (c = *KDB_WRAP(start)) &&
+ (p - buf) < sizeof(buf)-1) {
+ ++start;
+ *p++ = c;
+ if (c == '\n')
+ break;
+ }
+ *p = '\0';
+ kdb_printf("%s", buf);
+ }
+ if (c != '\n')
+ kdb_printf("\n");
+
+ return 0;
+}
+#endif /* CONFIG_PRINTK */
+/*
+ * kdb_cpu - This function implements the 'cpu' command.
+ * cpu [<cpunum>]
+ * Returns:
+ * KDB_CMD_CPU for success, a kdb diagnostic if error
+ */
+static void kdb_cpu_status(void)
+{
+ int i, start_cpu, first_print = 1;
+ char state, prev_state = '?';
+
+ kdb_printf("Currently on cpu %d\n", raw_smp_processor_id());
+ kdb_printf("Available cpus: ");
+ for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i)) {
+ state = 'F'; /* cpu is offline */
+ } else {
+ state = ' '; /* cpu is responding to kdb */
+ if (kdb_task_state_char(KDB_TSK(i)) == 'I')
+ state = 'I'; /* idle task */
+ }
+ if (state != prev_state) {
+ if (prev_state != '?') {
+ if (!first_print)
+ kdb_printf(", ");
+ first_print = 0;
+ kdb_printf("%d", start_cpu);
+ if (start_cpu < i-1)
+ kdb_printf("-%d", i-1);
+ if (prev_state != ' ')
+ kdb_printf("(%c)", prev_state);
+ }
+ prev_state = state;
+ start_cpu = i;
+ }
+ }
+ /* print the trailing cpus, ignoring them if they are all offline */
+ if (prev_state != 'F') {
+ if (!first_print)
+ kdb_printf(", ");
+ kdb_printf("%d", start_cpu);
+ if (start_cpu < i-1)
+ kdb_printf("-%d", i-1);
+ if (prev_state != ' ')
+ kdb_printf("(%c)", prev_state);
+ }
+ kdb_printf("\n");
+}
+
+static int kdb_cpu(int argc, const char **argv)
+{
+ unsigned long cpunum;
+ int diag;
+
+ if (argc == 0) {
+ kdb_cpu_status();
+ return 0;
+ }
+
+ if (argc != 1)
+ return KDB_ARGCOUNT;
+
+ diag = kdbgetularg(argv[1], &cpunum);
+ if (diag)
+ return diag;
+
+ /*
+ * Validate cpunum
+ */
+ if ((cpunum > NR_CPUS) || !cpu_online(cpunum))
+ return KDB_BADCPUNUM;
+
+ dbg_switch_cpu = cpunum;
+
+ /*
+ * Switch to other cpu
+ */
+ return KDB_CMD_CPU;
+}
+
+/* The user may not realize that ps/bta with no parameters does not print idle
+ * or sleeping system daemon processes, so tell them how many were suppressed.
+ */
+void kdb_ps_suppressed(void)
+{
+ int idle = 0, daemon = 0;
+ unsigned long mask_I = kdb_task_state_string("I"),
+ mask_M = kdb_task_state_string("M");
+ unsigned long cpu;
+ const struct task_struct *p, *g;
+ for_each_online_cpu(cpu) {
+ p = kdb_curr_task(cpu);
+ if (kdb_task_state(p, mask_I))
+ ++idle;
+ }
+ kdb_do_each_thread(g, p) {
+ if (kdb_task_state(p, mask_M))
+ ++daemon;
+ } kdb_while_each_thread(g, p);
+ if (idle || daemon) {
+ if (idle)
+ kdb_printf("%d idle process%s (state I)%s\n",
+ idle, idle == 1 ? "" : "es",
+ daemon ? " and " : "");
+ if (daemon)
+ kdb_printf("%d sleeping system daemon (state M) "
+ "process%s", daemon,
+ daemon == 1 ? "" : "es");
+ kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
+ }
+}
+
+/*
+ * kdb_ps - This function implements the 'ps' command which shows a
+ * list of the active processes.
+ * ps [DRSTCZEUIMA] All processes, optionally filtered by state
+ */
+void kdb_ps1(const struct task_struct *p)
+{
+ int cpu;
+ unsigned long tmp;
+
+ if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+ return;
+
+ cpu = kdb_process_cpu(p);
+ kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n",
+ (void *)p, p->pid, p->parent->pid,
+ kdb_task_has_cpu(p), kdb_process_cpu(p),
+ kdb_task_state_char(p),
+ (void *)(&p->thread),
+ p == kdb_curr_task(raw_smp_processor_id()) ? '*' : ' ',
+ p->comm);
+ if (kdb_task_has_cpu(p)) {
+ if (!KDB_TSK(cpu)) {
+ kdb_printf(" Error: no saved data for this cpu\n");
+ } else {
+ if (KDB_TSK(cpu) != p)
+ kdb_printf(" Error: does not match running "
+ "process table (0x%p)\n", KDB_TSK(cpu));
+ }
+ }
+}
+
+static int kdb_ps(int argc, const char **argv)
+{
+ struct task_struct *g, *p;
+ unsigned long mask, cpu;
+
+ if (argc == 0)
+ kdb_ps_suppressed();
+ kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n",
+ (int)(2*sizeof(void *))+2, "Task Addr",
+ (int)(2*sizeof(void *))+2, "Thread");
+ mask = kdb_task_state_string(argc ? argv[1] : NULL);
+ /* Run the active tasks first */
+ for_each_online_cpu(cpu) {
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
+ p = kdb_curr_task(cpu);
+ if (kdb_task_state(p, mask))
+ kdb_ps1(p);
+ }
+ kdb_printf("\n");
+ /* Now the real tasks */
+ kdb_do_each_thread(g, p) {
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
+ if (kdb_task_state(p, mask))
+ kdb_ps1(p);
+ } kdb_while_each_thread(g, p);
+
+ return 0;
+}
+
+/*
+ * kdb_pid - This function implements the 'pid' command which switches
+ * the currently active process.
+ * pid [<pid> | R]
+ */
+static int kdb_pid(int argc, const char **argv)
+{
+ struct task_struct *p;
+ unsigned long val;
+ int diag;
+
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+
+ if (argc) {
+ if (strcmp(argv[1], "R") == 0) {
+ p = KDB_TSK(kdb_initial_cpu);
+ } else {
+ diag = kdbgetularg(argv[1], &val);
+ if (diag)
+ return KDB_BADINT;
+
+ p = find_task_by_pid_ns((pid_t)val, &init_pid_ns);
+ if (!p) {
+ kdb_printf("No task with pid=%d\n", (pid_t)val);
+ return 0;
+ }
+ }
+ kdb_set_current_task(p);
+ }
+ kdb_printf("KDB current process is %s(pid=%d)\n",
+ kdb_current_task->comm,
+ kdb_current_task->pid);
+
+ return 0;
+}
+
+/*
+ * kdb_ll - This function implements the 'll' command which follows a
+ * linked list and executes an arbitrary command for each
+ * element.
+ */
+static int kdb_ll(int argc, const char **argv)
+{
+ int diag;
+ unsigned long addr;
+ long offset = 0;
+ unsigned long va;
+ unsigned long linkoffset;
+ int nextarg;
+ const char *command;
+
+ if (argc != 3)
+ return KDB_ARGCOUNT;
+
+ nextarg = 1;
+ diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+ if (diag)
+ return diag;
+
+ diag = kdbgetularg(argv[2], &linkoffset);
+ if (diag)
+ return diag;
+
+ /*
+ * Using the starting address as
+ * the first element in the list, and assuming that
+ * the list ends with a null pointer.
+ */
+
+ va = addr;
+ command = kdb_strdup(argv[3], GFP_KDB);
+ if (!command) {
+ kdb_printf("%s: cannot duplicate command\n", __func__);
+ return 0;
+ }
+ /* Recursive use of kdb_parse, do not use argv after this point */
+ argv = NULL;
+
+ while (va) {
+ char buf[80];
+
+ sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
+ diag = kdb_parse(buf);
+ if (diag)
+ return diag;
+
+ addr = va + linkoffset;
+ if (kdb_getword(&va, addr, sizeof(va)))
+ return 0;
+ }
+ kfree(command);
+
+ return 0;
+}
+
+static int kdb_kgdb(int argc, const char **argv)
+{
+ return KDB_CMD_KGDB;
+}
+
+/*
+ * kdb_help - This function implements the 'help' and '?' commands.
+ */
+static int kdb_help(int argc, const char **argv)
+{
+ kdbtab_t *kt;
+ int i;
+
+ kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description");
+ kdb_printf("-----------------------------"
+ "-----------------------------\n");
+ for_each_kdbcmd(kt, i) {
+ if (kt->cmd_name)
+ kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name,
+ kt->cmd_usage, kt->cmd_help);
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * kdb_kill - This function implements the 'kill' commands.
+ */
+static int kdb_kill(int argc, const char **argv)
+{
+ long sig, pid;
+ char *endp;
+ struct task_struct *p;
+ struct siginfo info;
+
+ if (argc != 2)
+ return KDB_ARGCOUNT;
+
+ sig = simple_strtol(argv[1], &endp, 0);
+ if (*endp)
+ return KDB_BADINT;
+ if (sig >= 0) {
+ kdb_printf("Invalid signal parameter.<-signal>\n");
+ return 0;
+ }
+ sig = -sig;
+
+ pid = simple_strtol(argv[2], &endp, 0);
+ if (*endp)
+ return KDB_BADINT;
+ if (pid <= 0) {
+ kdb_printf("Process ID must be large than 0.\n");
+ return 0;
+ }
+
+ /* Find the process. */
+ p = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (!p) {
+ kdb_printf("The specified process isn't found.\n");
+ return 0;
+ }
+ p = p->group_leader;
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+ info.si_pid = pid; /* same capabilities as process being signalled */
+ info.si_uid = 0; /* kdb has root authority */
+ kdb_send_sig_info(p, &info);
+ return 0;
+}
+
+struct kdb_tm {
+ int tm_sec; /* seconds */
+ int tm_min; /* minutes */
+ int tm_hour; /* hours */
+ int tm_mday; /* day of the month */
+ int tm_mon; /* month */
+ int tm_year; /* year */
+};
+
+static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
+{
+ /* This will work from 1970-2099, 2100 is not a leap year */
+ static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31,
+ 31, 30, 31, 30, 31 };
+ memset(tm, 0, sizeof(*tm));
+ tm->tm_sec = tv->tv_sec % (24 * 60 * 60);
+ tm->tm_mday = tv->tv_sec / (24 * 60 * 60) +
+ (2 * 365 + 1); /* shift base from 1970 to 1968 */
+ tm->tm_min = tm->tm_sec / 60 % 60;
+ tm->tm_hour = tm->tm_sec / 60 / 60;
+ tm->tm_sec = tm->tm_sec % 60;
+ tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
+ tm->tm_mday %= (4*365+1);
+ mon_day[1] = 29;
+ while (tm->tm_mday >= mon_day[tm->tm_mon]) {
+ tm->tm_mday -= mon_day[tm->tm_mon];
+ if (++tm->tm_mon == 12) {
+ tm->tm_mon = 0;
+ ++tm->tm_year;
+ mon_day[1] = 28;
+ }
+ }
+ ++tm->tm_mday;
+}
+
+/*
+ * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
+ * I cannot call that code directly from kdb, it has an unconditional
+ * cli()/sti() and calls routines that take locks which can stop the debugger.
+ */
+static void kdb_sysinfo(struct sysinfo *val)
+{
+ struct timespec uptime;
+ do_posix_clock_monotonic_gettime(&uptime);
+ memset(val, 0, sizeof(*val));
+ val->uptime = uptime.tv_sec;
+ val->loads[0] = avenrun[0];
+ val->loads[1] = avenrun[1];
+ val->loads[2] = avenrun[2];
+ val->procs = nr_threads-1;
+ si_meminfo(val);
+
+ return;
+}
+
+/*
+ * kdb_summary - This function implements the 'summary' command.
+ */
+static int kdb_summary(int argc, const char **argv)
+{
+ struct kdb_tm tm;
+ struct sysinfo val;
+
+ if (argc)
+ return KDB_ARGCOUNT;
+
+ kdb_printf("sysname %s\n", init_uts_ns.name.sysname);
+ kdb_printf("release %s\n", init_uts_ns.name.release);
+ kdb_printf("version %s\n", init_uts_ns.name.version);
+ kdb_printf("machine %s\n", init_uts_ns.name.machine);
+ kdb_printf("nodename %s\n", init_uts_ns.name.nodename);
+ kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
+ kdb_printf("ccversion %s\n", __stringify(CCVERSION));
+
+ kdb_gmtime(&xtime, &tm);
+ kdb_printf("date %04d-%02d-%02d %02d:%02d:%02d "
+ "tz_minuteswest %d\n",
+ 1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ sys_tz.tz_minuteswest);
+
+ kdb_sysinfo(&val);
+ kdb_printf("uptime ");
+ if (val.uptime > (24*60*60)) {
+ int days = val.uptime / (24*60*60);
+ val.uptime %= (24*60*60);
+ kdb_printf("%d day%s ", days, days == 1 ? "" : "s");
+ }
+ kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
+
+ /* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+ kdb_printf("load avg %ld.%02ld %ld.%02ld %ld.%02ld\n",
+ LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
+ LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
+ LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
+#undef LOAD_INT
+#undef LOAD_FRAC
+ /* Display in kilobytes */
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+ kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n"
+ "Buffers: %8lu kB\n",
+ val.totalram, val.freeram, val.bufferram);
+ return 0;
+}
+
+/*
+ * kdb_per_cpu - This function implements the 'per_cpu' command.
+ */
+static int kdb_per_cpu(int argc, const char **argv)
+{
+ char buf[256], fmtstr[64];
+ kdb_symtab_t symtab;
+ cpumask_t suppress = CPU_MASK_NONE;
+ int cpu, diag;
+ unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
+
+ if (argc < 1 || argc > 3)
+ return KDB_ARGCOUNT;
+
+ snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
+ if (!kdbgetsymval(buf, &symtab)) {
+ kdb_printf("%s is not a per_cpu variable\n", argv[1]);
+ return KDB_BADADDR;
+ }
+ if (argc >= 2) {
+ diag = kdbgetularg(argv[2], &bytesperword);
+ if (diag)
+ return diag;
+ }
+ if (!bytesperword)
+ bytesperword = KDB_WORD_SIZE;
+ else if (bytesperword > KDB_WORD_SIZE)
+ return KDB_BADWIDTH;
+ sprintf(fmtstr, "%%0%dlx ", (int)(2*bytesperword));
+ if (argc >= 3) {
+ diag = kdbgetularg(argv[3], &whichcpu);
+ if (diag)
+ return diag;
+ if (!cpu_online(whichcpu)) {
+ kdb_printf("cpu %ld is not online\n", whichcpu);
+ return KDB_BADCPUNUM;
+ }
+ }
+
+ /* Most architectures use __per_cpu_offset[cpu], some use
+ * __per_cpu_offset(cpu), smp has no __per_cpu_offset.
+ */
+#ifdef __per_cpu_offset
+#define KDB_PCU(cpu) __per_cpu_offset(cpu)
+#else
+#ifdef CONFIG_SMP
+#define KDB_PCU(cpu) __per_cpu_offset[cpu]
+#else
+#define KDB_PCU(cpu) 0
+#endif
+#endif
+
+ for_each_online_cpu(cpu) {
+ if (whichcpu != ~0UL && whichcpu != cpu)
+ continue;
+ addr = symtab.sym_start + KDB_PCU(cpu);
+ diag = kdb_getword(&val, addr, bytesperword);
+ if (diag) {
+ kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to "
+ "read, diag=%d\n", cpu, addr, diag);
+ continue;
+ }
+#ifdef CONFIG_SMP
+ if (!val) {
+ cpu_set(cpu, suppress);
+ continue;
+ }
+#endif /* CONFIG_SMP */
+ kdb_printf("%5d ", cpu);
+ kdb_md_line(fmtstr, addr,
+ bytesperword == KDB_WORD_SIZE,
+ 1, bytesperword, 1, 1, 0);
+ }
+ if (cpus_weight(suppress) == 0)
+ return 0;
+ kdb_printf("Zero suppressed cpu(s):");
+ for (cpu = first_cpu(suppress); cpu < num_possible_cpus();
+ cpu = next_cpu(cpu, suppress)) {
+ kdb_printf(" %d", cpu);
+ if (cpu == num_possible_cpus() - 1 ||
+ next_cpu(cpu, suppress) != cpu + 1)
+ continue;
+ while (cpu < num_possible_cpus() &&
+ next_cpu(cpu, suppress) == cpu + 1)
+ ++cpu;
+ kdb_printf("-%d", cpu);
+ }
+ kdb_printf("\n");
+
+#undef KDB_PCU
+
+ return 0;
+}
+
+/*
+ * display help for the use of cmd | grep pattern
+ */
+static int kdb_grep_help(int argc, const char **argv)
+{
+ kdb_printf("Usage of cmd args | grep pattern:\n");
+ kdb_printf(" Any command's output may be filtered through an ");
+ kdb_printf("emulated 'pipe'.\n");
+ kdb_printf(" 'grep' is just a key word.\n");
+ kdb_printf(" The pattern may include a very limited set of "
+ "metacharacters:\n");
+ kdb_printf(" pattern or ^pattern or pattern$ or ^pattern$\n");
+ kdb_printf(" And if there are spaces in the pattern, you may "
+ "quote it:\n");
+ kdb_printf(" \"pat tern\" or \"^pat tern\" or \"pat tern$\""
+ " or \"^pat tern$\"\n");
+ return 0;
+}
+
+/*
+ * kdb_register_repeat - This function is used to register a kernel
+ * debugger command.
+ * Inputs:
+ * cmd Command name
+ * func Function to execute the command
+ * usage A simple usage string showing arguments
+ * help A simple help string describing command
+ * repeat Does the command auto repeat on enter?
+ * Returns:
+ * zero for success, one if a duplicate command.
+ */
+#define kdb_command_extend 50 /* arbitrary */
+int kdb_register_repeat(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen,
+ kdb_repeat_t repeat)
+{
+ int i;
+ kdbtab_t *kp;
+
+ /*
+ * Brute force method to determine duplicates
+ */
+ for_each_kdbcmd(kp, i) {
+ if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
+ kdb_printf("Duplicate kdb command registered: "
+ "%s, func %p help %s\n", cmd, func, help);
+ return 1;
+ }
+ }
+
+ /*
+ * Insert command into first available location in table
+ */
+ for_each_kdbcmd(kp, i) {
+ if (kp->cmd_name == NULL)
+ break;
+ }
+
+ if (i >= kdb_max_commands) {
+ kdbtab_t *new = kmalloc((kdb_max_commands - KDB_BASE_CMD_MAX +
+ kdb_command_extend) * sizeof(*new), GFP_KDB);
+ if (!new) {
+ kdb_printf("Could not allocate new kdb_command "
+ "table\n");
+ return 1;
+ }
+ if (kdb_commands) {
+ memcpy(new, kdb_commands,
+ kdb_max_commands * sizeof(*new));
+ kfree(kdb_commands);
+ }
+ memset(new + kdb_max_commands, 0,
+ kdb_command_extend * sizeof(*new));
+ kdb_commands = new;
+ kp = kdb_commands + kdb_max_commands;
+ kdb_max_commands += kdb_command_extend;
+ }
+
+ kp->cmd_name = cmd;
+ kp->cmd_func = func;
+ kp->cmd_usage = usage;
+ kp->cmd_help = help;
+ kp->cmd_flags = 0;
+ kp->cmd_minlen = minlen;
+ kp->cmd_repeat = repeat;
+
+ return 0;
+}
+
+/*
+ * kdb_register - Compatibility register function for commands that do
+ * not need to specify a repeat state. Equivalent to
+ * kdb_register_repeat with KDB_REPEAT_NONE.
+ * Inputs:
+ * cmd Command name
+ * func Function to execute the command
+ * usage A simple usage string showing arguments
+ * help A simple help string describing command
+ * Returns:
+ * zero for success, one if a duplicate command.
+ */
+int kdb_register(char *cmd,
+ kdb_func_t func,
+ char *usage,
+ char *help,
+ short minlen)
+{
+ return kdb_register_repeat(cmd, func, usage, help, minlen,
+ KDB_REPEAT_NONE);
+}
+
+/*
+ * kdb_unregister - This function is used to unregister a kernel
+ * debugger command. It is generally called when a module which
+ * implements kdb commands is unloaded.
+ * Inputs:
+ * cmd Command name
+ * Returns:
+ * zero for success, one command not registered.
+ */
+int kdb_unregister(char *cmd)
+{
+ int i;
+ kdbtab_t *kp;
+
+ /*
+ * find the command.
+ */
+ for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) {
+ if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
+ kp->cmd_name = NULL;
+ return 0;
+ }
+ }
+
+ /* Couldn't find it. */
+ return 1;
+}
+
+/* Initialize the kdb command table. */
+static void __init kdb_inittab(void)
+{
+ int i;
+ kdbtab_t *kp;
+
+ for_each_kdbcmd(kp, i)
+ kp->cmd_name = NULL;
+
+ kdb_register_repeat("md", kdb_md, "<vaddr>",
+ "Display Memory Contents, also mdWcN, e.g. md8c1", 1,
+ KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>",
+ "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>",
+ "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mds", kdb_md, "<vaddr>",
+ "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>",
+ "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
+ kdb_register_repeat("go", kdb_go, "[<vaddr>]",
+ "Continue Execution", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("rd", kdb_rd, "",
+ "Display Registers", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("rm", kdb_rm, "<reg> <contents>",
+ "Modify Registers", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ef", kdb_ef, "<vaddr>",
+ "Display exception frame", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bt", kdb_bt, "[<vaddr>]",
+ "Stack traceback", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("btp", kdb_bt, "<pid>",
+ "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]",
+ "Display stack all processes", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("btc", kdb_bt, "",
+ "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("btt", kdb_bt, "<vaddr>",
+ "Backtrace process given its struct task address", 0,
+ KDB_REPEAT_NONE);
+ kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>",
+ "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("env", kdb_env, "",
+ "Show environment variables", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("set", kdb_set, "",
+ "Set environment variables", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("help", kdb_help, "",
+ "Display Help Message", 1, KDB_REPEAT_NONE);
+ kdb_register_repeat("?", kdb_help, "",
+ "Display Help Message", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("cpu", kdb_cpu, "<cpunum>",
+ "Switch to new cpu", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("kgdb", kdb_kgdb, "",
+ "Enter kgdb mode", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("ps", kdb_ps, "[<flags>|A]",
+ "Display active task list", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("pid", kdb_pid, "<pidnum>",
+ "Switch to another task", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("reboot", kdb_reboot, "",
+ "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
+#if defined(CONFIG_MODULES)
+ kdb_register_repeat("lsmod", kdb_lsmod, "",
+ "List loaded kernel modules", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_MAGIC_SYSRQ)
+ kdb_register_repeat("sr", kdb_sr, "<key>",
+ "Magic SysRq key", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_PRINTK)
+ kdb_register_repeat("dmesg", kdb_dmesg, "[lines]",
+ "Display syslog buffer", 0, KDB_REPEAT_NONE);
+#endif
+ kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
+ "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>",
+ "Send a signal to a process", 0, KDB_REPEAT_NONE);
+ kdb_register_repeat("summary", kdb_summary, "",
+ "Summarize the system", 4, KDB_REPEAT_NONE);
+ kdb_register_repeat("per_cpu", kdb_per_cpu, "",
+ "Display per_cpu variables", 3, KDB_REPEAT_NONE);
+ kdb_register_repeat("grephelp", kdb_grep_help, "",
+ "Display help on | grep", 0, KDB_REPEAT_NONE);
+}
+
+/* Execute any commands defined in kdb_cmds. */
+static void __init kdb_cmd_init(void)
+{
+ int i, diag;
+ for (i = 0; kdb_cmds[i]; ++i) {
+ diag = kdb_parse(kdb_cmds[i]);
+ if (diag)
+ kdb_printf("kdb command %s failed, kdb diag %d\n",
+ kdb_cmds[i], diag);
+ }
+ if (defcmd_in_progress) {
+ kdb_printf("Incomplete 'defcmd' set, forcing endefcmd\n");
+ kdb_parse("endefcmd");
+ }
+}
+
+/* Intialize kdb_printf, breakpoint tables and kdb state */
+void __init kdb_init(int lvl)
+{
+ static int kdb_init_lvl = KDB_NOT_INITIALIZED;
+ int i;
+
+ if (kdb_init_lvl == KDB_INIT_FULL || lvl <= kdb_init_lvl)
+ return;
+ for (i = kdb_init_lvl; i < lvl; i++) {
+ switch (i) {
+ case KDB_NOT_INITIALIZED:
+ kdb_inittab(); /* Initialize Command Table */
+ kdb_initbptab(); /* Initialize Breakpoints */
+ break;
+ case KDB_INIT_EARLY:
+ kdb_cmd_init(); /* Build kdb_cmds tables */
+ break;
+ }
+ }
+ kdb_init_lvl = lvl;
+}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
new file mode 100644
index 00000000000..97d3ba69775
--- /dev/null
+++ b/kernel/debug/kdb/kdb_private.h
@@ -0,0 +1,300 @@
+#ifndef _KDBPRIVATE_H
+#define _KDBPRIVATE_H
+
+/*
+ * Kernel Debugger Architecture Independent Private Headers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ */
+
+#include <linux/kgdb.h>
+#include "../debug_core.h"
+
+/* Kernel Debugger Error codes. Must not overlap with command codes. */
+#define KDB_NOTFOUND (-1)
+#define KDB_ARGCOUNT (-2)
+#define KDB_BADWIDTH (-3)
+#define KDB_BADRADIX (-4)
+#define KDB_NOTENV (-5)
+#define KDB_NOENVVALUE (-6)
+#define KDB_NOTIMP (-7)
+#define KDB_ENVFULL (-8)
+#define KDB_ENVBUFFULL (-9)
+#define KDB_TOOMANYBPT (-10)
+#define KDB_TOOMANYDBREGS (-11)
+#define KDB_DUPBPT (-12)
+#define KDB_BPTNOTFOUND (-13)
+#define KDB_BADMODE (-14)
+#define KDB_BADINT (-15)
+#define KDB_INVADDRFMT (-16)
+#define KDB_BADREG (-17)
+#define KDB_BADCPUNUM (-18)
+#define KDB_BADLENGTH (-19)
+#define KDB_NOBP (-20)
+#define KDB_BADADDR (-21)
+
+/* Kernel Debugger Command codes. Must not overlap with error codes. */
+#define KDB_CMD_GO (-1001)
+#define KDB_CMD_CPU (-1002)
+#define KDB_CMD_SS (-1003)
+#define KDB_CMD_SSB (-1004)
+#define KDB_CMD_KGDB (-1005)
+#define KDB_CMD_KGDB2 (-1006)
+
+/* Internal debug flags */
+#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */
+#define KDB_DEBUG_FLAG_BB_SUMM 0x0004 /* Basic block analysis, summary only */
+#define KDB_DEBUG_FLAG_AR 0x0008 /* Activation record, generic */
+#define KDB_DEBUG_FLAG_ARA 0x0010 /* Activation record, arch specific */
+#define KDB_DEBUG_FLAG_BB 0x0020 /* All basic block analysis */
+#define KDB_DEBUG_FLAG_STATE 0x0040 /* State flags */
+#define KDB_DEBUG_FLAG_MASK 0xffff /* All debug flags */
+#define KDB_DEBUG_FLAG_SHIFT 16 /* Shift factor for dbflags */
+
+#define KDB_DEBUG(flag) (kdb_flags & \
+ (KDB_DEBUG_FLAG_##flag << KDB_DEBUG_FLAG_SHIFT))
+#define KDB_DEBUG_STATE(text, value) if (KDB_DEBUG(STATE)) \
+ kdb_print_state(text, value)
+
+#if BITS_PER_LONG == 32
+
+#define KDB_PLATFORM_ENV "BYTESPERWORD=4"
+
+#define kdb_machreg_fmt "0x%lx"
+#define kdb_machreg_fmt0 "0x%08lx"
+#define kdb_bfd_vma_fmt "0x%lx"
+#define kdb_bfd_vma_fmt0 "0x%08lx"
+#define kdb_elfw_addr_fmt "0x%x"
+#define kdb_elfw_addr_fmt0 "0x%08x"
+#define kdb_f_count_fmt "%d"
+
+#elif BITS_PER_LONG == 64
+
+#define KDB_PLATFORM_ENV "BYTESPERWORD=8"
+
+#define kdb_machreg_fmt "0x%lx"
+#define kdb_machreg_fmt0 "0x%016lx"
+#define kdb_bfd_vma_fmt "0x%lx"
+#define kdb_bfd_vma_fmt0 "0x%016lx"
+#define kdb_elfw_addr_fmt "0x%x"
+#define kdb_elfw_addr_fmt0 "0x%016x"
+#define kdb_f_count_fmt "%ld"
+
+#endif
+
+/*
+ * KDB_MAXBPT describes the total number of breakpoints
+ * supported by this architecure.
+ */
+#define KDB_MAXBPT 16
+
+/* Maximum number of arguments to a function */
+#define KDB_MAXARGS 16
+
+typedef enum {
+ KDB_REPEAT_NONE = 0, /* Do not repeat this command */
+ KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */
+ KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */
+} kdb_repeat_t;
+
+typedef int (*kdb_func_t)(int, const char **);
+
+/* Symbol table format returned by kallsyms. */
+typedef struct __ksymtab {
+ unsigned long value; /* Address of symbol */
+ const char *mod_name; /* Module containing symbol or
+ * "kernel" */
+ unsigned long mod_start;
+ unsigned long mod_end;
+ const char *sec_name; /* Section containing symbol */
+ unsigned long sec_start;
+ unsigned long sec_end;
+ const char *sym_name; /* Full symbol name, including
+ * any version */
+ unsigned long sym_start;
+ unsigned long sym_end;
+ } kdb_symtab_t;
+extern int kallsyms_symbol_next(char *prefix_name, int flag);
+extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
+
+/* Exported Symbols for kernel loadable modules to use. */
+extern int kdb_register(char *, kdb_func_t, char *, char *, short);
+extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
+ short, kdb_repeat_t);
+extern int kdb_unregister(char *);
+
+extern int kdb_getarea_size(void *, unsigned long, size_t);
+extern int kdb_putarea_size(unsigned long, void *, size_t);
+
+/*
+ * Like get_user and put_user, kdb_getarea and kdb_putarea take variable
+ * names, not pointers. The underlying *_size functions take pointers.
+ */
+#define kdb_getarea(x, addr) kdb_getarea_size(&(x), addr, sizeof((x)))
+#define kdb_putarea(addr, x) kdb_putarea_size(addr, &(x), sizeof((x)))
+
+extern int kdb_getphysword(unsigned long *word,
+ unsigned long addr, size_t size);
+extern int kdb_getword(unsigned long *, unsigned long, size_t);
+extern int kdb_putword(unsigned long, unsigned long, size_t);
+
+extern int kdbgetularg(const char *, unsigned long *);
+extern int kdb_set(int, const char **);
+extern char *kdbgetenv(const char *);
+extern int kdbgetintenv(const char *, int *);
+extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
+ long *, char **);
+extern int kdbgetsymval(const char *, kdb_symtab_t *);
+extern int kdbnearsym(unsigned long, kdb_symtab_t *);
+extern void kdbnearsym_cleanup(void);
+extern char *kdb_strdup(const char *str, gfp_t type);
+extern void kdb_symbol_print(unsigned long, const kdb_symtab_t *, unsigned int);
+
+/* Routine for debugging the debugger state. */
+extern void kdb_print_state(const char *, int);
+
+extern int kdb_state;
+#define KDB_STATE_KDB 0x00000001 /* Cpu is inside kdb */
+#define KDB_STATE_LEAVING 0x00000002 /* Cpu is leaving kdb */
+#define KDB_STATE_CMD 0x00000004 /* Running a kdb command */
+#define KDB_STATE_KDB_CONTROL 0x00000008 /* This cpu is under
+ * kdb control */
+#define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */
+#define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */
+#define KDB_STATE_DOING_SSB 0x00000040 /* Doing ssb command,
+ * DOING_SS is also set */
+#define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint
+ * after one ss, independent of
+ * DOING_SS */
+#define KDB_STATE_REENTRY 0x00000100 /* Valid re-entry into kdb */
+#define KDB_STATE_SUPPRESS 0x00000200 /* Suppress error messages */
+#define KDB_STATE_PAGER 0x00000400 /* pager is available */
+#define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching
+ * back to initial cpu */
+#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */
+#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */
+#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */
+#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been
+ * adjusted */
+#define KDB_STATE_GO1 0x00010000 /* go only releases one cpu */
+#define KDB_STATE_KEYBOARD 0x00020000 /* kdb entered via
+ * keyboard on this cpu */
+#define KDB_STATE_KEXEC 0x00040000 /* kexec issued */
+#define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */
+#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */
+#define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */
+#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch
+ * specific use */
+
+#define KDB_STATE(flag) (kdb_state & KDB_STATE_##flag)
+#define KDB_STATE_SET(flag) ((void)(kdb_state |= KDB_STATE_##flag))
+#define KDB_STATE_CLEAR(flag) ((void)(kdb_state &= ~KDB_STATE_##flag))
+
+extern int kdb_nextline; /* Current number of lines displayed */
+
+typedef struct _kdb_bp {
+ unsigned long bp_addr; /* Address breakpoint is present at */
+ unsigned int bp_free:1; /* This entry is available */
+ unsigned int bp_enabled:1; /* Breakpoint is active in register */
+ unsigned int bp_type:4; /* Uses hardware register */
+ unsigned int bp_installed:1; /* Breakpoint is installed */
+ unsigned int bp_delay:1; /* Do delayed bp handling */
+ unsigned int bp_delayed:1; /* Delayed breakpoint */
+ unsigned int bph_length; /* HW break length */
+} kdb_bp_t;
+
+#ifdef CONFIG_KGDB_KDB
+extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
+
+/* The KDB shell command table */
+typedef struct _kdbtab {
+ char *cmd_name; /* Command name */
+ kdb_func_t cmd_func; /* Function to execute command */
+ char *cmd_usage; /* Usage String for this command */
+ char *cmd_help; /* Help message for this command */
+ short cmd_flags; /* Parsing flags */
+ short cmd_minlen; /* Minimum legal # command
+ * chars required */
+ kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */
+} kdbtab_t;
+
+extern int kdb_bt(int, const char **); /* KDB display back trace */
+
+/* KDB breakpoint management functions */
+extern void kdb_initbptab(void);
+extern void kdb_bp_install(struct pt_regs *);
+extern void kdb_bp_remove(void);
+
+typedef enum {
+ KDB_DB_BPT, /* Breakpoint */
+ KDB_DB_SS, /* Single-step trap */
+ KDB_DB_SSB, /* Single step to branch */
+ KDB_DB_SSBPT, /* Single step over breakpoint */
+ KDB_DB_NOBPT /* Spurious breakpoint */
+} kdb_dbtrap_t;
+
+extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
+ int, kdb_dbtrap_t, struct pt_regs *);
+
+/* Miscellaneous functions and data areas */
+extern int kdb_grepping_flag;
+extern char kdb_grep_string[];
+extern int kdb_grep_leading;
+extern int kdb_grep_trailing;
+extern char *kdb_cmds[];
+extern void kdb_syslog_data(char *syslog_data[]);
+extern unsigned long kdb_task_state_string(const char *);
+extern char kdb_task_state_char (const struct task_struct *);
+extern unsigned long kdb_task_state(const struct task_struct *p,
+ unsigned long mask);
+extern void kdb_ps_suppressed(void);
+extern void kdb_ps1(const struct task_struct *p);
+extern void kdb_print_nameval(const char *name, unsigned long val);
+extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
+extern void kdb_meminfo_proc_show(void);
+extern const char *kdb_walk_kallsyms(loff_t *pos);
+extern char *kdb_getstr(char *, size_t, char *);
+
+/* Defines for kdb_symbol_print */
+#define KDB_SP_SPACEB 0x0001 /* Space before string */
+#define KDB_SP_SPACEA 0x0002 /* Space after string */
+#define KDB_SP_PAREN 0x0004 /* Parenthesis around string */
+#define KDB_SP_VALUE 0x0008 /* Print the value of the address */
+#define KDB_SP_SYMSIZE 0x0010 /* Print the size of the symbol */
+#define KDB_SP_NEWLINE 0x0020 /* Newline after string */
+#define KDB_SP_DEFAULT (KDB_SP_VALUE|KDB_SP_PAREN)
+
+#define KDB_TSK(cpu) kgdb_info[cpu].task
+#define KDB_TSKREGS(cpu) kgdb_info[cpu].debuggerinfo
+
+extern struct task_struct *kdb_curr_task(int);
+
+#define kdb_task_has_cpu(p) (task_curr(p))
+
+/* Simplify coexistence with NPTL */
+#define kdb_do_each_thread(g, p) do_each_thread(g, p)
+#define kdb_while_each_thread(g, p) while_each_thread(g, p)
+
+#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
+
+extern void *debug_kmalloc(size_t size, gfp_t flags);
+extern void debug_kfree(void *);
+extern void debug_kusage(void);
+
+extern void kdb_set_current_task(struct task_struct *);
+extern struct task_struct *kdb_current_task;
+#ifdef CONFIG_MODULES
+extern struct list_head *kdb_modules;
+#endif /* CONFIG_MODULES */
+
+extern char kdb_prompt_str[];
+
+#define KDB_WORD_SIZE ((int)sizeof(unsigned long))
+
+#endif /* CONFIG_KGDB_KDB */
+#endif /* !_KDBPRIVATE_H */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
new file mode 100644
index 00000000000..45344d5c53d
--- /dev/null
+++ b/kernel/debug/kdb/kdb_support.c
@@ -0,0 +1,927 @@
+/*
+ * Kernel Debugger Architecture Independent Support Functions
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+ * 03/02/13 added new 2.5 kallsyms <xavier.bru@bull.net>
+ */
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kallsyms.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/kdb.h>
+#include <linux/slab.h>
+#include "kdb_private.h"
+
+/*
+ * kdbgetsymval - Return the address of the given symbol.
+ *
+ * Parameters:
+ * symname Character string containing symbol name
+ * symtab Structure to receive results
+ * Returns:
+ * 0 Symbol not found, symtab zero filled
+ * 1 Symbol mapped to module/symbol/section, data in symtab
+ */
+int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
+{
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname,
+ symtab);
+ memset(symtab, 0, sizeof(*symtab));
+ symtab->sym_start = kallsyms_lookup_name(symname);
+ if (symtab->sym_start) {
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbgetsymval: returns 1, "
+ "symtab->sym_start=0x%lx\n",
+ symtab->sym_start);
+ return 1;
+ }
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbgetsymval: returns 0\n");
+ return 0;
+}
+EXPORT_SYMBOL(kdbgetsymval);
+
+static char *kdb_name_table[100]; /* arbitrary size */
+
+/*
+ * kdbnearsym - Return the name of the symbol with the nearest address
+ * less than 'addr'.
+ *
+ * Parameters:
+ * addr Address to check for symbol near
+ * symtab Structure to receive results
+ * Returns:
+ * 0 No sections contain this address, symtab zero filled
+ * 1 Address mapped to module/symbol/section, data in symtab
+ * Remarks:
+ * 2.6 kallsyms has a "feature" where it unpacks the name into a
+ * string. If that string is reused before the caller expects it
+ * then the caller sees its string change without warning. To
+ * avoid cluttering up the main kdb code with lots of kdb_strdup,
+ * tests and kfree calls, kdbnearsym maintains an LRU list of the
+ * last few unique strings. The list is sized large enough to
+ * hold active strings, no kdb caller of kdbnearsym makes more
+ * than ~20 later calls before using a saved value.
+ */
+int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
+{
+ int ret = 0;
+ unsigned long symbolsize;
+ unsigned long offset;
+#define knt1_size 128 /* must be >= kallsyms table size */
+ char *knt1 = NULL;
+
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab);
+ memset(symtab, 0, sizeof(*symtab));
+
+ if (addr < 4096)
+ goto out;
+ knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
+ if (!knt1) {
+ kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n",
+ addr);
+ goto out;
+ }
+ symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset,
+ (char **)(&symtab->mod_name), knt1);
+ if (offset > 8*1024*1024) {
+ symtab->sym_name = NULL;
+ addr = offset = symbolsize = 0;
+ }
+ symtab->sym_start = addr - offset;
+ symtab->sym_end = symtab->sym_start + symbolsize;
+ ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
+
+ if (ret) {
+ int i;
+ /* Another 2.6 kallsyms "feature". Sometimes the sym_name is
+ * set but the buffer passed into kallsyms_lookup is not used,
+ * so it contains garbage. The caller has to work out which
+ * buffer needs to be saved.
+ *
+ * What was Rusty smoking when he wrote that code?
+ */
+ if (symtab->sym_name != knt1) {
+ strncpy(knt1, symtab->sym_name, knt1_size);
+ knt1[knt1_size-1] = '\0';
+ }
+ for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
+ if (kdb_name_table[i] &&
+ strcmp(kdb_name_table[i], knt1) == 0)
+ break;
+ }
+ if (i >= ARRAY_SIZE(kdb_name_table)) {
+ debug_kfree(kdb_name_table[0]);
+ memcpy(kdb_name_table, kdb_name_table+1,
+ sizeof(kdb_name_table[0]) *
+ (ARRAY_SIZE(kdb_name_table)-1));
+ } else {
+ debug_kfree(knt1);
+ knt1 = kdb_name_table[i];
+ memcpy(kdb_name_table+i, kdb_name_table+i+1,
+ sizeof(kdb_name_table[0]) *
+ (ARRAY_SIZE(kdb_name_table)-i-1));
+ }
+ i = ARRAY_SIZE(kdb_name_table) - 1;
+ kdb_name_table[i] = knt1;
+ symtab->sym_name = kdb_name_table[i];
+ knt1 = NULL;
+ }
+
+ if (symtab->mod_name == NULL)
+ symtab->mod_name = "kernel";
+ if (KDB_DEBUG(AR))
+ kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, "
+ "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret,
+ symtab->sym_start, symtab->mod_name, symtab->sym_name,
+ symtab->sym_name);
+
+out:
+ debug_kfree(knt1);
+ return ret;
+}
+
+void kdbnearsym_cleanup(void)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
+ if (kdb_name_table[i]) {
+ debug_kfree(kdb_name_table[i]);
+ kdb_name_table[i] = NULL;
+ }
+ }
+}
+
+static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
+
+/*
+ * kallsyms_symbol_complete
+ *
+ * Parameters:
+ * prefix_name prefix of a symbol name to lookup
+ * max_len maximum length that can be returned
+ * Returns:
+ * Number of symbols which match the given prefix.
+ * Notes:
+ * prefix_name is changed to contain the longest unique prefix that
+ * starts with this prefix (tab completion).
+ */
+int kallsyms_symbol_complete(char *prefix_name, int max_len)
+{
+ loff_t pos = 0;
+ int prefix_len = strlen(prefix_name), prev_len = 0;
+ int i, number = 0;
+ const char *name;
+
+ while ((name = kdb_walk_kallsyms(&pos))) {
+ if (strncmp(name, prefix_name, prefix_len) == 0) {
+ strcpy(ks_namebuf, name);
+ /* Work out the longest name that matches the prefix */
+ if (++number == 1) {
+ prev_len = min_t(int, max_len-1,
+ strlen(ks_namebuf));
+ memcpy(ks_namebuf_prev, ks_namebuf, prev_len);
+ ks_namebuf_prev[prev_len] = '\0';
+ continue;
+ }
+ for (i = 0; i < prev_len; i++) {
+ if (ks_namebuf[i] != ks_namebuf_prev[i]) {
+ prev_len = i;
+ ks_namebuf_prev[i] = '\0';
+ break;
+ }
+ }
+ }
+ }
+ if (prev_len > prefix_len)
+ memcpy(prefix_name, ks_namebuf_prev, prev_len+1);
+ return number;
+}
+
+/*
+ * kallsyms_symbol_next
+ *
+ * Parameters:
+ * prefix_name prefix of a symbol name to lookup
+ * flag 0 means search from the head, 1 means continue search.
+ * Returns:
+ * 1 if a symbol matches the given prefix.
+ * 0 if no string found
+ */
+int kallsyms_symbol_next(char *prefix_name, int flag)
+{
+ int prefix_len = strlen(prefix_name);
+ static loff_t pos;
+ const char *name;
+
+ if (!flag)
+ pos = 0;
+
+ while ((name = kdb_walk_kallsyms(&pos))) {
+ if (strncmp(name, prefix_name, prefix_len) == 0) {
+ strncpy(prefix_name, name, strlen(name)+1);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * kdb_symbol_print - Standard method for printing a symbol name and offset.
+ * Inputs:
+ * addr Address to be printed.
+ * symtab Address of symbol data, if NULL this routine does its
+ * own lookup.
+ * punc Punctuation for string, bit field.
+ * Remarks:
+ * The string and its punctuation is only printed if the address
+ * is inside the kernel, except that the value is always printed
+ * when requested.
+ */
+void kdb_symbol_print(unsigned long addr, const kdb_symtab_t *symtab_p,
+ unsigned int punc)
+{
+ kdb_symtab_t symtab, *symtab_p2;
+ if (symtab_p) {
+ symtab_p2 = (kdb_symtab_t *)symtab_p;
+ } else {
+ symtab_p2 = &symtab;
+ kdbnearsym(addr, symtab_p2);
+ }
+ if (!(symtab_p2->sym_name || (punc & KDB_SP_VALUE)))
+ return;
+ if (punc & KDB_SP_SPACEB)
+ kdb_printf(" ");
+ if (punc & KDB_SP_VALUE)
+ kdb_printf(kdb_machreg_fmt0, addr);
+ if (symtab_p2->sym_name) {
+ if (punc & KDB_SP_VALUE)
+ kdb_printf(" ");
+ if (punc & KDB_SP_PAREN)
+ kdb_printf("(");
+ if (strcmp(symtab_p2->mod_name, "kernel"))
+ kdb_printf("[%s]", symtab_p2->mod_name);
+ kdb_printf("%s", symtab_p2->sym_name);
+ if (addr != symtab_p2->sym_start)
+ kdb_printf("+0x%lx", addr - symtab_p2->sym_start);
+ if (punc & KDB_SP_SYMSIZE)
+ kdb_printf("/0x%lx",
+ symtab_p2->sym_end - symtab_p2->sym_start);
+ if (punc & KDB_SP_PAREN)
+ kdb_printf(")");
+ }
+ if (punc & KDB_SP_SPACEA)
+ kdb_printf(" ");
+ if (punc & KDB_SP_NEWLINE)
+ kdb_printf("\n");
+}
+
+/*
+ * kdb_strdup - kdb equivalent of strdup, for disasm code.
+ * Inputs:
+ * str The string to duplicate.
+ * type Flags to kmalloc for the new string.
+ * Returns:
+ * Address of the new string, NULL if storage could not be allocated.
+ * Remarks:
+ * This is not in lib/string.c because it uses kmalloc which is not
+ * available when string.o is used in boot loaders.
+ */
+char *kdb_strdup(const char *str, gfp_t type)
+{
+ int n = strlen(str)+1;
+ char *s = kmalloc(n, type);
+ if (!s)
+ return NULL;
+ return strcpy(s, str);
+}
+
+/*
+ * kdb_getarea_size - Read an area of data. The kdb equivalent of
+ * copy_from_user, with kdb messages for invalid addresses.
+ * Inputs:
+ * res Pointer to the area to receive the result.
+ * addr Address of the area to copy.
+ * size Size of the area.
+ * Returns:
+ * 0 for success, < 0 for error.
+ */
+int kdb_getarea_size(void *res, unsigned long addr, size_t size)
+{
+ int ret = probe_kernel_read((char *)res, (char *)addr, size);
+ if (ret) {
+ if (!KDB_STATE(SUPPRESS)) {
+ kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
+ KDB_STATE_SET(SUPPRESS);
+ }
+ ret = KDB_BADADDR;
+ } else {
+ KDB_STATE_CLEAR(SUPPRESS);
+ }
+ return ret;
+}
+
+/*
+ * kdb_putarea_size - Write an area of data. The kdb equivalent of
+ * copy_to_user, with kdb messages for invalid addresses.
+ * Inputs:
+ * addr Address of the area to write to.
+ * res Pointer to the area holding the data.
+ * size Size of the area.
+ * Returns:
+ * 0 for success, < 0 for error.
+ */
+int kdb_putarea_size(unsigned long addr, void *res, size_t size)
+{
+ int ret = probe_kernel_read((char *)addr, (char *)res, size);
+ if (ret) {
+ if (!KDB_STATE(SUPPRESS)) {
+ kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
+ KDB_STATE_SET(SUPPRESS);
+ }
+ ret = KDB_BADADDR;
+ } else {
+ KDB_STATE_CLEAR(SUPPRESS);
+ }
+ return ret;
+}
+
+/*
+ * kdb_getphys - Read data from a physical address. Validate the
+ * address is in range, use kmap_atomic() to get data
+ * similar to kdb_getarea() - but for phys addresses
+ * Inputs:
+ * res Pointer to the word to receive the result
+ * addr Physical address of the area to copy
+ * size Size of the area
+ * Returns:
+ * 0 for success, < 0 for error.
+ */
+static int kdb_getphys(void *res, unsigned long addr, size_t size)
+{
+ unsigned long pfn;
+ void *vaddr;
+ struct page *page;
+
+ pfn = (addr >> PAGE_SHIFT);
+ if (!pfn_valid(pfn))
+ return 1;
+ page = pfn_to_page(pfn);
+ vaddr = kmap_atomic(page, KM_KDB);
+ memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size);
+ kunmap_atomic(vaddr, KM_KDB);
+
+ return 0;
+}
+
+/*
+ * kdb_getphysword
+ * Inputs:
+ * word Pointer to the word to receive the result.
+ * addr Address of the area to copy.
+ * size Size of the area.
+ * Returns:
+ * 0 for success, < 0 for error.
+ */
+int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size)
+{
+ int diag;
+ __u8 w1;
+ __u16 w2;
+ __u32 w4;
+ __u64 w8;
+ *word = 0; /* Default value if addr or size is invalid */
+
+ switch (size) {
+ case 1:
+ diag = kdb_getphys(&w1, addr, sizeof(w1));
+ if (!diag)
+ *word = w1;
+ break;
+ case 2:
+ diag = kdb_getphys(&w2, addr, sizeof(w2));
+ if (!diag)
+ *word = w2;
+ break;
+ case 4:
+ diag = kdb_getphys(&w4, addr, sizeof(w4));
+ if (!diag)
+ *word = w4;
+ break;
+ case 8:
+ if (size <= sizeof(*word)) {
+ diag = kdb_getphys(&w8, addr, sizeof(w8));
+ if (!diag)
+ *word = w8;
+ break;
+ }
+ /* drop through */
+ default:
+ diag = KDB_BADWIDTH;
+ kdb_printf("kdb_getphysword: bad width %ld\n", (long) size);
+ }
+ return diag;
+}
+
+/*
+ * kdb_getword - Read a binary value. Unlike kdb_getarea, this treats
+ * data as numbers.
+ * Inputs:
+ * word Pointer to the word to receive the result.
+ * addr Address of the area to copy.
+ * size Size of the area.
+ * Returns:
+ * 0 for success, < 0 for error.
+ */
+int kdb_getword(unsigned long *word, unsigned long addr, size_t size)
+{
+ int diag;
+ __u8 w1;
+ __u16 w2;
+ __u32 w4;
+ __u64 w8;
+ *word = 0; /* Default value if addr or size is invalid */
+ switch (size) {
+ case 1:
+ diag = kdb_getarea(w1, addr);
+ if (!diag)
+ *word = w1;
+ break;
+ case 2:
+ diag = kdb_getarea(w2, addr);
+ if (!diag)
+ *word = w2;
+ break;
+ case 4:
+ diag = kdb_getarea(w4, addr);
+ if (!diag)
+ *word = w4;
+ break;
+ case 8:
+ if (size <= sizeof(*word)) {
+ diag = kdb_getarea(w8, addr);
+ if (!diag)
+ *word = w8;
+ break;
+ }
+ /* drop through */
+ default:
+ diag = KDB_BADWIDTH;
+ kdb_printf("kdb_getword: bad width %ld\n", (long) size);
+ }
+ return diag;
+}
+
+/*
+ * kdb_putword - Write a binary value. Unlike kdb_putarea, this
+ * treats data as numbers.
+ * Inputs:
+ * addr Address of the area to write to..
+ * word The value to set.
+ * size Size of the area.
+ * Returns:
+ * 0 for success, < 0 for error.
+ */
+int kdb_putword(unsigned long addr, unsigned long word, size_t size)
+{
+ int diag;
+ __u8 w1;
+ __u16 w2;
+ __u32 w4;
+ __u64 w8;
+ switch (size) {
+ case 1:
+ w1 = word;
+ diag = kdb_putarea(addr, w1);
+ break;
+ case 2:
+ w2 = word;
+ diag = kdb_putarea(addr, w2);
+ break;
+ case 4:
+ w4 = word;
+ diag = kdb_putarea(addr, w4);
+ break;
+ case 8:
+ if (size <= sizeof(word)) {
+ w8 = word;
+ diag = kdb_putarea(addr, w8);
+ break;
+ }
+ /* drop through */
+ default:
+ diag = KDB_BADWIDTH;
+ kdb_printf("kdb_putword: bad width %ld\n", (long) size);
+ }
+ return diag;
+}
+
+/*
+ * kdb_task_state_string - Convert a string containing any of the
+ * letters DRSTCZEUIMA to a mask for the process state field and
+ * return the value. If no argument is supplied, return the mask
+ * that corresponds to environment variable PS, DRSTCZEU by
+ * default.
+ * Inputs:
+ * s String to convert
+ * Returns:
+ * Mask for process state.
+ * Notes:
+ * The mask folds data from several sources into a single long value, so
+ * be carefull not to overlap the bits. TASK_* bits are in the LSB,
+ * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there
+ * is no overlap between TASK_* and EXIT_* but that may not always be
+ * true, so EXIT_* bits are shifted left 16 bits before being stored in
+ * the mask.
+ */
+
+/* unrunnable is < 0 */
+#define UNRUNNABLE (1UL << (8*sizeof(unsigned long) - 1))
+#define RUNNING (1UL << (8*sizeof(unsigned long) - 2))
+#define IDLE (1UL << (8*sizeof(unsigned long) - 3))
+#define DAEMON (1UL << (8*sizeof(unsigned long) - 4))
+
+unsigned long kdb_task_state_string(const char *s)
+{
+ long res = 0;
+ if (!s) {
+ s = kdbgetenv("PS");
+ if (!s)
+ s = "DRSTCZEU"; /* default value for ps */
+ }
+ while (*s) {
+ switch (*s) {
+ case 'D':
+ res |= TASK_UNINTERRUPTIBLE;
+ break;
+ case 'R':
+ res |= RUNNING;
+ break;
+ case 'S':
+ res |= TASK_INTERRUPTIBLE;
+ break;
+ case 'T':
+ res |= TASK_STOPPED;
+ break;
+ case 'C':
+ res |= TASK_TRACED;
+ break;
+ case 'Z':
+ res |= EXIT_ZOMBIE << 16;
+ break;
+ case 'E':
+ res |= EXIT_DEAD << 16;
+ break;
+ case 'U':
+ res |= UNRUNNABLE;
+ break;
+ case 'I':
+ res |= IDLE;
+ break;
+ case 'M':
+ res |= DAEMON;
+ break;
+ case 'A':
+ res = ~0UL;
+ break;
+ default:
+ kdb_printf("%s: unknown flag '%c' ignored\n",
+ __func__, *s);
+ break;
+ }
+ ++s;
+ }
+ return res;
+}
+
+/*
+ * kdb_task_state_char - Return the character that represents the task state.
+ * Inputs:
+ * p struct task for the process
+ * Returns:
+ * One character to represent the task state.
+ */
+char kdb_task_state_char (const struct task_struct *p)
+{
+ int cpu;
+ char state;
+ unsigned long tmp;
+
+ if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+ return 'E';
+
+ cpu = kdb_process_cpu(p);
+ state = (p->state == 0) ? 'R' :
+ (p->state < 0) ? 'U' :
+ (p->state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (p->state & TASK_STOPPED) ? 'T' :
+ (p->state & TASK_TRACED) ? 'C' :
+ (p->exit_state & EXIT_ZOMBIE) ? 'Z' :
+ (p->exit_state & EXIT_DEAD) ? 'E' :
+ (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+ if (p->pid == 0) {
+ /* Idle task. Is it really idle, apart from the kdb
+ * interrupt? */
+ if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {
+ if (cpu != kdb_initial_cpu)
+ state = 'I'; /* idle task */
+ }
+ } else if (!p->mm && state == 'S') {
+ state = 'M'; /* sleeping system daemon */
+ }
+ return state;
+}
+
+/*
+ * kdb_task_state - Return true if a process has the desired state
+ * given by the mask.
+ * Inputs:
+ * p struct task for the process
+ * mask mask from kdb_task_state_string to select processes
+ * Returns:
+ * True if the process matches at least one criteria defined by the mask.
+ */
+unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
+{
+ char state[] = { kdb_task_state_char(p), '\0' };
+ return (mask & kdb_task_state_string(state)) != 0;
+}
+
+/*
+ * kdb_print_nameval - Print a name and its value, converting the
+ * value to a symbol lookup if possible.
+ * Inputs:
+ * name field name to print
+ * val value of field
+ */
+void kdb_print_nameval(const char *name, unsigned long val)
+{
+ kdb_symtab_t symtab;
+ kdb_printf(" %-11.11s ", name);
+ if (kdbnearsym(val, &symtab))
+ kdb_symbol_print(val, &symtab,
+ KDB_SP_VALUE|KDB_SP_SYMSIZE|KDB_SP_NEWLINE);
+ else
+ kdb_printf("0x%lx\n", val);
+}
+
+/* Last ditch allocator for debugging, so we can still debug even when
+ * the GFP_ATOMIC pool has been exhausted. The algorithms are tuned
+ * for space usage, not for speed. One smallish memory pool, the free
+ * chain is always in ascending address order to allow coalescing,
+ * allocations are done in brute force best fit.
+ */
+
+struct debug_alloc_header {
+ u32 next; /* offset of next header from start of pool */
+ u32 size;
+ void *caller;
+};
+
+/* The memory returned by this allocator must be aligned, which means
+ * so must the header size. Do not assume that sizeof(struct
+ * debug_alloc_header) is a multiple of the alignment, explicitly
+ * calculate the overhead of this header, including the alignment.
+ * The rest of this code must not use sizeof() on any header or
+ * pointer to a header.
+ */
+#define dah_align 8
+#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
+
+static u64 debug_alloc_pool_aligned[256*1024/dah_align]; /* 256K pool */
+static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
+static u32 dah_first, dah_first_call = 1, dah_used, dah_used_max;
+
+/* Locking is awkward. The debug code is called from all contexts,
+ * including non maskable interrupts. A normal spinlock is not safe
+ * in NMI context. Try to get the debug allocator lock, if it cannot
+ * be obtained after a second then give up. If the lock could not be
+ * previously obtained on this cpu then only try once.
+ *
+ * sparse has no annotation for "this function _sometimes_ acquires a
+ * lock", so fudge the acquire/release notation.
+ */
+static DEFINE_SPINLOCK(dap_lock);
+static int get_dap_lock(void)
+ __acquires(dap_lock)
+{
+ static int dap_locked = -1;
+ int count;
+ if (dap_locked == smp_processor_id())
+ count = 1;
+ else
+ count = 1000;
+ while (1) {
+ if (spin_trylock(&dap_lock)) {
+ dap_locked = -1;
+ return 1;
+ }
+ if (!count--)
+ break;
+ udelay(1000);
+ }
+ dap_locked = smp_processor_id();
+ __acquire(dap_lock);
+ return 0;
+}
+
+void *debug_kmalloc(size_t size, gfp_t flags)
+{
+ unsigned int rem, h_offset;
+ struct debug_alloc_header *best, *bestprev, *prev, *h;
+ void *p = NULL;
+ if (!get_dap_lock()) {
+ __release(dap_lock); /* we never actually got it */
+ return NULL;
+ }
+ h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+ if (dah_first_call) {
+ h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
+ dah_first_call = 0;
+ }
+ size = ALIGN(size, dah_align);
+ prev = best = bestprev = NULL;
+ while (1) {
+ if (h->size >= size && (!best || h->size < best->size)) {
+ best = h;
+ bestprev = prev;
+ if (h->size == size)
+ break;
+ }
+ if (!h->next)
+ break;
+ prev = h;
+ h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
+ }
+ if (!best)
+ goto out;
+ rem = best->size - size;
+ /* The pool must always contain at least one header */
+ if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
+ goto out;
+ if (rem >= dah_overhead) {
+ best->size = size;
+ h_offset = ((char *)best - debug_alloc_pool) +
+ dah_overhead + best->size;
+ h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
+ h->size = rem - dah_overhead;
+ h->next = best->next;
+ } else
+ h_offset = best->next;
+ best->caller = __builtin_return_address(0);
+ dah_used += best->size;
+ dah_used_max = max(dah_used, dah_used_max);
+ if (bestprev)
+ bestprev->next = h_offset;
+ else
+ dah_first = h_offset;
+ p = (char *)best + dah_overhead;
+ memset(p, POISON_INUSE, best->size - 1);
+ *((char *)p + best->size - 1) = POISON_END;
+out:
+ spin_unlock(&dap_lock);
+ return p;
+}
+
+void debug_kfree(void *p)
+{
+ struct debug_alloc_header *h;
+ unsigned int h_offset;
+ if (!p)
+ return;
+ if ((char *)p < debug_alloc_pool ||
+ (char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
+ kfree(p);
+ return;
+ }
+ if (!get_dap_lock()) {
+ __release(dap_lock); /* we never actually got it */
+ return; /* memory leak, cannot be helped */
+ }
+ h = (struct debug_alloc_header *)((char *)p - dah_overhead);
+ memset(p, POISON_FREE, h->size - 1);
+ *((char *)p + h->size - 1) = POISON_END;
+ h->caller = NULL;
+ dah_used -= h->size;
+ h_offset = (char *)h - debug_alloc_pool;
+ if (h_offset < dah_first) {
+ h->next = dah_first;
+ dah_first = h_offset;
+ } else {
+ struct debug_alloc_header *prev;
+ unsigned int prev_offset;
+ prev = (struct debug_alloc_header *)(debug_alloc_pool +
+ dah_first);
+ while (1) {
+ if (!prev->next || prev->next > h_offset)
+ break;
+ prev = (struct debug_alloc_header *)
+ (debug_alloc_pool + prev->next);
+ }
+ prev_offset = (char *)prev - debug_alloc_pool;
+ if (prev_offset + dah_overhead + prev->size == h_offset) {
+ prev->size += dah_overhead + h->size;
+ memset(h, POISON_FREE, dah_overhead - 1);
+ *((char *)h + dah_overhead - 1) = POISON_END;
+ h = prev;
+ h_offset = prev_offset;
+ } else {
+ h->next = prev->next;
+ prev->next = h_offset;
+ }
+ }
+ if (h_offset + dah_overhead + h->size == h->next) {
+ struct debug_alloc_header *next;
+ next = (struct debug_alloc_header *)
+ (debug_alloc_pool + h->next);
+ h->size += dah_overhead + next->size;
+ h->next = next->next;
+ memset(next, POISON_FREE, dah_overhead - 1);
+ *((char *)next + dah_overhead - 1) = POISON_END;
+ }
+ spin_unlock(&dap_lock);
+}
+
+void debug_kusage(void)
+{
+ struct debug_alloc_header *h_free, *h_used;
+#ifdef CONFIG_IA64
+ /* FIXME: using dah for ia64 unwind always results in a memory leak.
+ * Fix that memory leak first, then set debug_kusage_one_time = 1 for
+ * all architectures.
+ */
+ static int debug_kusage_one_time;
+#else
+ static int debug_kusage_one_time = 1;
+#endif
+ if (!get_dap_lock()) {
+ __release(dap_lock); /* we never actually got it */
+ return;
+ }
+ h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+ if (dah_first == 0 &&
+ (h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
+ dah_first_call))
+ goto out;
+ if (!debug_kusage_one_time)
+ goto out;
+ debug_kusage_one_time = 0;
+ kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n",
+ __func__, dah_first);
+ if (dah_first) {
+ h_used = (struct debug_alloc_header *)debug_alloc_pool;
+ kdb_printf("%s: h_used %p size %d\n", __func__, h_used,
+ h_used->size);
+ }
+ do {
+ h_used = (struct debug_alloc_header *)
+ ((char *)h_free + dah_overhead + h_free->size);
+ kdb_printf("%s: h_used %p size %d caller %p\n",
+ __func__, h_used, h_used->size, h_used->caller);
+ h_free = (struct debug_alloc_header *)
+ (debug_alloc_pool + h_free->next);
+ } while (h_free->next);
+ h_used = (struct debug_alloc_header *)
+ ((char *)h_free + dah_overhead + h_free->size);
+ if ((char *)h_used - debug_alloc_pool !=
+ sizeof(debug_alloc_pool_aligned))
+ kdb_printf("%s: h_used %p size %d caller %p\n",
+ __func__, h_used, h_used->size, h_used->caller);
+out:
+ spin_unlock(&dap_lock);
+}
+
+/* Maintain a small stack of kdb_flags to allow recursion without disturbing
+ * the global kdb state.
+ */
+
+static int kdb_flags_stack[4], kdb_flags_index;
+
+void kdb_save_flags(void)
+{
+ BUG_ON(kdb_flags_index >= ARRAY_SIZE(kdb_flags_stack));
+ kdb_flags_stack[kdb_flags_index++] = kdb_flags;
+}
+
+void kdb_restore_flags(void)
+{
+ BUG_ON(kdb_flags_index <= 0);
+ kdb_flags = kdb_flags_stack[--kdb_flags_index];
+}
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f2683a10ac..ceffc67b564 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -55,15 +55,14 @@
#include <asm/unistd.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
-#include "cred-internals.h"
static void exit_mm(struct task_struct * tsk);
-static void __unhash_process(struct task_struct *p)
+static void __unhash_process(struct task_struct *p, bool group_dead)
{
nr_threads--;
detach_pid(p, PIDTYPE_PID);
- if (thread_group_leader(p)) {
+ if (group_dead) {
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
@@ -80,10 +79,9 @@ static void __unhash_process(struct task_struct *p)
static void __exit_signal(struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
+ bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
-
- BUG_ON(!sig);
- BUG_ON(!atomic_read(&sig->count));
+ struct tty_struct *uninitialized_var(tty);
sighand = rcu_dereference_check(tsk->sighand,
rcu_read_lock_held() ||
@@ -91,14 +89,16 @@ static void __exit_signal(struct task_struct *tsk)
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
- if (atomic_dec_and_test(&sig->count))
+ if (group_dead) {
posix_cpu_timers_exit_group(tsk);
- else {
+ tty = sig->tty;
+ sig->tty = NULL;
+ } else {
/*
* If there is any task waiting for the group exit
* then notify it:
*/
- if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
+ if (sig->notify_count > 0 && !--sig->notify_count)
wake_up_process(sig->group_exit_task);
if (tsk == sig->curr_target)
@@ -124,32 +124,24 @@ static void __exit_signal(struct task_struct *tsk)
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
- sig = NULL; /* Marker for below. */
}
- __unhash_process(tsk);
+ sig->nr_threads--;
+ __unhash_process(tsk, group_dead);
/*
* Do this under ->siglock, we can race with another thread
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
*/
flush_sigqueue(&tsk->pending);
-
- tsk->signal = NULL;
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
__cleanup_sighand(sighand);
clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
- if (sig) {
+ if (group_dead) {
flush_sigqueue(&sig->shared_pending);
- taskstats_tgid_free(sig);
- /*
- * Make sure ->signal can't go away under rq->lock,
- * see account_group_exec_runtime().
- */
- task_rq_unlock_wait(tsk);
- __cleanup_signal(sig);
+ tty_kref_put(tty);
}
}
@@ -857,12 +849,9 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
- /* mt-exec, de_thread() is waiting for us */
- if (thread_group_leader(tsk) &&
- tsk->signal->group_exit_task &&
- tsk->signal->notify_count < 0)
+ /* mt-exec, de_thread() is waiting for group leader */
+ if (unlikely(tsk->signal->notify_count < 0))
wake_up_process(tsk->signal->group_exit_task);
-
write_unlock_irq(&tasklist_lock);
tracehook_report_death(tsk, signal, cookie, group_dead);
@@ -1003,8 +992,10 @@ NORET_TYPE void do_exit(long code)
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
+ task_lock(tsk);
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
+ task_unlock(tsk);
#endif
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c14942a0ee..bf9fef6d1bf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -165,6 +165,18 @@ void free_task(struct task_struct *tsk)
}
EXPORT_SYMBOL(free_task);
+static inline void free_signal_struct(struct signal_struct *sig)
+{
+ taskstats_tgid_free(sig);
+ kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void put_signal_struct(struct signal_struct *sig)
+{
+ if (atomic_dec_and_test(&sig->sigcnt))
+ free_signal_struct(sig);
+}
+
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
@@ -173,6 +185,7 @@ void __put_task_struct(struct task_struct *tsk)
exit_creds(tsk);
delayacct_tsk_free(tsk);
+ put_signal_struct(tsk->signal);
if (!profile_handoff_task(tsk))
free_task(tsk);
@@ -864,8 +877,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
if (!sig)
return -ENOMEM;
- atomic_set(&sig->count, 1);
+ sig->nr_threads = 1;
atomic_set(&sig->live, 1);
+ atomic_set(&sig->sigcnt, 1);
init_waitqueue_head(&sig->wait_chldexit);
if (clone_flags & CLONE_NEWPID)
sig->flags |= SIGNAL_UNKILLABLE;
@@ -889,13 +903,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
return 0;
}
-void __cleanup_signal(struct signal_struct *sig)
-{
- thread_group_cputime_free(sig);
- tty_kref_put(sig->tty);
- kmem_cache_free(signal_cachep, sig);
-}
-
static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
@@ -1079,6 +1086,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
mpol_fix_fork_child_flag(p);
#endif
+#ifdef CONFIG_CPUSETS
+ p->cpuset_mem_spread_rotor = node_random(p->mems_allowed);
+ p->cpuset_slab_spread_rotor = node_random(p->mems_allowed);
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
@@ -1112,8 +1123,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->memcg_batch.memcg = NULL;
#endif
- p->bts = NULL;
-
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
@@ -1247,8 +1256,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
if (clone_flags & CLONE_THREAD) {
- atomic_inc(&current->signal->count);
+ current->signal->nr_threads++;
atomic_inc(&current->signal->live);
+ atomic_inc(&current->signal->sigcnt);
p->group_leader = current->group_leader;
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
}
@@ -1261,7 +1271,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->nsproxy->pid_ns->child_reaper = p;
p->signal->leader_pid = pid;
- tty_kref_put(p->signal->tty);
p->signal->tty = tty_kref_get(current->signal->tty);
attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
attach_pid(p, PIDTYPE_SID, task_session(current));
@@ -1294,7 +1303,7 @@ bad_fork_cleanup_mm:
mmput(p->mm);
bad_fork_cleanup_signal:
if (!(clone_flags & CLONE_THREAD))
- __cleanup_signal(p->signal);
+ free_signal_struct(p->signal);
bad_fork_cleanup_sighand:
__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
@@ -1329,6 +1338,16 @@ noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_re
return regs;
}
+static inline void init_idle_pids(struct pid_link *links)
+{
+ enum pid_type type;
+
+ for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+ INIT_HLIST_NODE(&links[type].node); /* not really needed */
+ links[type].pid = &init_struct_pid;
+ }
+}
+
struct task_struct * __cpuinit fork_idle(int cpu)
{
struct task_struct *task;
@@ -1336,8 +1355,10 @@ struct task_struct * __cpuinit fork_idle(int cpu)
task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
&init_struct_pid, 0);
- if (!IS_ERR(task))
+ if (!IS_ERR(task)) {
+ init_idle_pids(task->pids);
init_idle(task, cpu);
+ }
return task;
}
@@ -1509,14 +1530,6 @@ static void check_unshare_flags(unsigned long *flags_ptr)
*flags_ptr |= CLONE_SIGHAND;
/*
- * If unsharing signal handlers and the task was created
- * using CLONE_THREAD, then must unshare the thread
- */
- if ((*flags_ptr & CLONE_SIGHAND) &&
- (atomic_read(&current->signal->count) > 1))
- *flags_ptr |= CLONE_THREAD;
-
- /*
* If unsharing namespace, must also unshare filesystem information.
*/
if (*flags_ptr & CLONE_NEWNS)
diff --git a/kernel/groups.c b/kernel/groups.c
index 2b45b2ee396..53b1916c949 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -164,12 +164,6 @@ int groups_search(const struct group_info *group_info, gid_t grp)
*/
int set_groups(struct cred *new, struct group_info *group_info)
{
- int retval;
-
- retval = security_task_setgroups(group_info);
- if (retval)
- return retval;
-
put_group_info(new->group_info);
groups_sort(group_info);
get_group_info(group_info);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e9..5c69e996bd0 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -89,7 +89,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
do {
seq = read_seqbegin(&xtime_lock);
- xts = current_kernel_time();
+ xts = __current_kernel_time();
tom = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
}
/**
- * schedule_hrtimeout_range - sleep until timeout
+ * schedule_hrtimeout_range_clock - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
+ * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
*/
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
- const enum hrtimer_mode mode)
+int __sched
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+ const enum hrtimer_mode mode, int clock)
{
struct hrtimer_sleeper t;
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return -EINTR;
}
- hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+ hrtimer_init_on_stack(&t.timer, clock, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return !t.task ? 0 : -EINTR;
}
+
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires: timeout value (ktime_t)
+ * @delta: slack in expires timeout (ktime_t)
+ * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+ const enum hrtimer_mode mode)
+{
+ return schedule_hrtimeout_range_clock(expires, delta, mode,
+ CLOCK_MONOTONIC);
+}
EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
/**
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 03808ed342a..7a56b22e060 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,23 +40,29 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/hw_breakpoint.h>
+
/*
* Constraints data
*/
/* Number of pinned cpu breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
/* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
/* Number of non-pinned cpu/task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
+
+static int nr_slots[TYPE_MAX];
+
+static int constraints_initialized;
/* Gather the number of total pinned and un-pinned bp in a cpuset */
struct bp_busy_slots {
@@ -67,16 +73,29 @@ struct bp_busy_slots {
/* Serialize accesses to the above constraints */
static DEFINE_MUTEX(nr_bp_mutex);
+__weak int hw_breakpoint_weight(struct perf_event *bp)
+{
+ return 1;
+}
+
+static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
+{
+ if (bp->attr.bp_type & HW_BREAKPOINT_RW)
+ return TYPE_DATA;
+
+ return TYPE_INST;
+}
+
/*
* Report the maximum number of pinned breakpoints a task
* have in this cpu
*/
-static unsigned int max_task_bp_pinned(int cpu)
+static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
{
int i;
- unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+ unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
- for (i = HBP_NUM -1; i >= 0; i--) {
+ for (i = nr_slots[type] - 1; i >= 0; i--) {
if (tsk_pinned[i] > 0)
return i + 1;
}
@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)
return 0;
}
-static int task_bp_pinned(struct task_struct *tsk)
+static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
{
struct perf_event_context *ctx = tsk->perf_event_ctxp;
struct list_head *list;
@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)
*/
list_for_each_entry(bp, list, event_entry) {
if (bp->attr.type == PERF_TYPE_BREAKPOINT)
- count++;
+ if (find_slot_idx(bp) == type)
+ count += hw_breakpoint_weight(bp);
}
raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)
* a given cpu (cpu > -1) or in all of them (cpu = -1).
*/
static void
-fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
+ enum bp_type_idx type)
{
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
if (cpu >= 0) {
- slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+ slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
if (!tsk)
- slots->pinned += max_task_bp_pinned(cpu);
+ slots->pinned += max_task_bp_pinned(cpu, type);
else
- slots->pinned += task_bp_pinned(tsk);
- slots->flexible = per_cpu(nr_bp_flexible, cpu);
+ slots->pinned += task_bp_pinned(tsk, type);
+ slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
return;
}
@@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
for_each_online_cpu(cpu) {
unsigned int nr;
- nr = per_cpu(nr_cpu_bp_pinned, cpu);
+ nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
if (!tsk)
- nr += max_task_bp_pinned(cpu);
+ nr += max_task_bp_pinned(cpu, type);
else
- nr += task_bp_pinned(tsk);
+ nr += task_bp_pinned(tsk, type);
if (nr > slots->pinned)
slots->pinned = nr;
- nr = per_cpu(nr_bp_flexible, cpu);
+ nr = per_cpu(nr_bp_flexible[type], cpu);
if (nr > slots->flexible)
slots->flexible = nr;
@@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
}
/*
+ * For now, continue to consider flexible as pinned, until we can
+ * ensure no flexible event can ever be scheduled before a pinned event
+ * in a same cpu.
+ */
+static void
+fetch_this_slot(struct bp_busy_slots *slots, int weight)
+{
+ slots->pinned += weight;
+}
+
+/*
* Add a pinned breakpoint for the given task in our constraint table
*/
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+ enum bp_type_idx type, int weight)
{
unsigned int *tsk_pinned;
- int count = 0;
+ int old_count = 0;
+ int old_idx = 0;
+ int idx = 0;
- count = task_bp_pinned(tsk);
+ old_count = task_bp_pinned(tsk, type);
+ old_idx = old_count - 1;
+ idx = old_idx + weight;
- tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+ tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
if (enable) {
- tsk_pinned[count]++;
- if (count > 0)
- tsk_pinned[count-1]--;
+ tsk_pinned[idx]++;
+ if (old_count > 0)
+ tsk_pinned[old_idx]--;
} else {
- tsk_pinned[count]--;
- if (count > 0)
- tsk_pinned[count-1]++;
+ tsk_pinned[idx]--;
+ if (old_count > 0)
+ tsk_pinned[old_idx]++;
}
}
/*
* Add/remove the given breakpoint in our constraint table
*/
-static void toggle_bp_slot(struct perf_event *bp, bool enable)
+static void
+toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
+ int weight)
{
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
/* Pinned counter task profiling */
if (tsk) {
if (cpu >= 0) {
- toggle_bp_task_slot(tsk, cpu, enable);
+ toggle_bp_task_slot(tsk, cpu, enable, type, weight);
return;
}
for_each_online_cpu(cpu)
- toggle_bp_task_slot(tsk, cpu, enable);
+ toggle_bp_task_slot(tsk, cpu, enable, type, weight);
return;
}
/* Pinned counter cpu profiling */
if (enable)
- per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+ per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
else
- per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+ per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
}
/*
@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
static int __reserve_bp_slot(struct perf_event *bp)
{
struct bp_busy_slots slots = {0};
+ enum bp_type_idx type;
+ int weight;
- fetch_bp_busy_slots(&slots, bp);
+ /* We couldn't initialize breakpoint constraints on boot */
+ if (!constraints_initialized)
+ return -ENOMEM;
+
+ /* Basic checks */
+ if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
+ bp->attr.bp_type == HW_BREAKPOINT_INVALID)
+ return -EINVAL;
+
+ type = find_slot_idx(bp);
+ weight = hw_breakpoint_weight(bp);
+
+ fetch_bp_busy_slots(&slots, bp, type);
+ fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */
- if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+ if (slots.pinned + (!!slots.flexible) > nr_slots[type])
return -ENOSPC;
- toggle_bp_slot(bp, true);
+ toggle_bp_slot(bp, true, type, weight);
return 0;
}
@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)
static void __release_bp_slot(struct perf_event *bp)
{
- toggle_bp_slot(bp, false);
+ enum bp_type_idx type;
+ int weight;
+
+ type = find_slot_idx(bp);
+ weight = hw_breakpoint_weight(bp);
+ toggle_bp_slot(bp, false, type, weight);
}
void release_bp_slot(struct perf_event *bp)
@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)
return 0;
}
+static int validate_hw_breakpoint(struct perf_event *bp)
+{
+ int ret;
+
+ ret = arch_validate_hwbkpt_settings(bp);
+ if (ret)
+ return ret;
+
+ if (arch_check_bp_in_kernelspace(bp)) {
+ if (bp->attr.exclude_kernel)
+ return -EINVAL;
+ /*
+ * Don't let unprivileged users set a breakpoint in the trap
+ * path to avoid trap recursion attacks.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ }
+
+ return 0;
+}
+
int register_perf_hw_breakpoint(struct perf_event *bp)
{
int ret;
@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
if (ret)
return ret;
- /*
- * Ptrace breakpoints can be temporary perf events only
- * meant to reserve a slot. In this case, it is created disabled and
- * we don't want to check the params right now (as we put a null addr)
- * But perf tools create events as disabled and we want to check
- * the params for them.
- * This is a quick hack that will be removed soon, once we remove
- * the tmp breakpoints from ptrace
- */
- if (!bp->attr.disabled || !bp->overflow_handler)
- ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+ ret = validate_hw_breakpoint(bp);
/* if arch_validate_hwbkpt_settings() fails then release bp slot */
if (ret)
@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
if (attr->disabled)
goto end;
- err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+ err = validate_hw_breakpoint(bp);
if (!err)
perf_event_enable(bp);
@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
static int __init init_hw_breakpoint(void)
{
+ unsigned int **task_bp_pinned;
+ int cpu, err_cpu;
+ int i;
+
+ for (i = 0; i < TYPE_MAX; i++)
+ nr_slots[i] = hw_breakpoint_slots(i);
+
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < TYPE_MAX; i++) {
+ task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
+ *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
+ GFP_KERNEL);
+ if (!*task_bp_pinned)
+ goto err_alloc;
+ }
+ }
+
+ constraints_initialized = 1;
+
return register_die_notifier(&hw_breakpoint_exceptions_nb);
+
+ err_alloc:
+ for_each_possible_cpu(err_cpu) {
+ if (err_cpu == cpu)
+ break;
+ for (i = 0; i < TYPE_MAX; i++)
+ kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+ }
+
+ return -ENOMEM;
}
core_initcall(init_hw_breakpoint);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a671bfe..27e5c691122 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
- if (!(action->flags & IRQF_DISABLED))
- local_irq_enable_in_hardirq();
-
do {
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488730a..3164ba7ce15 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
return 0;
}
+int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ unsigned long flags;
+
+ if (!desc)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ desc->affinity_hint = m;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+
#ifndef CONFIG_AUTO_IRQ_AFFINITY
/*
* Generic version of the affinity autoselector.
@@ -757,16 +773,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (new->flags & IRQF_ONESHOT)
desc->status |= IRQ_ONESHOT;
- /*
- * Force MSI interrupts to run with interrupts
- * disabled. The multi vector cards can cause stack
- * overflows due to nested interrupts when enough of
- * them are directed to a core and fire at the same
- * time.
- */
- if (desc->msi_desc)
- new->flags |= IRQF_DISABLED;
-
if (!(desc->status & IRQ_NOAUTOEN)) {
desc->depth = 0;
desc->status &= ~IRQ_DISABLED;
@@ -916,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
+#ifdef CONFIG_SMP
+ /* make sure affinity_hint is cleaned up */
+ if (WARN_ON_ONCE(desc->affinity_hint))
+ desc->affinity_hint = NULL;
+#endif
+
raw_spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -1027,7 +1039,6 @@ EXPORT_SYMBOL(free_irq);
* Flags:
*
* IRQF_SHARED Interrupt is shared
- * IRQF_DISABLED Disable local interrupts while processing
* IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
* IRQF_TRIGGER_* Specify active edge(s) or level
*
@@ -1041,25 +1052,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
int retval;
/*
- * handle_IRQ_event() always ignores IRQF_DISABLED except for
- * the _first_ irqaction (sigh). That can cause oopsing, but
- * the behavior is classified as "will not fix" so we need to
- * start nudging drivers away from using that idiom.
- */
- if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
- (IRQF_SHARED|IRQF_DISABLED)) {
- pr_warning(
- "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
- irq, devname);
- }
-
-#ifdef CONFIG_LOCKDEP
- /*
- * Lockdep wants atomic interrupt handlers:
- */
- irqflags |= IRQF_DISABLED;
-#endif
- /*
* Sanity-check: shared interrupts must pass in a real dev-ID,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing
@@ -1120,3 +1112,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
return retval;
}
EXPORT_SYMBOL(request_threaded_irq);
+
+/**
+ * request_any_context_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * Threaded handler for threaded interrupts.
+ * @flags: Interrupt type flags
+ * @name: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. It selects either a
+ * hardirq or threaded handling method depending on the
+ * context.
+ *
+ * On failure, it returns a negative value. On success,
+ * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
+ */
+int request_any_context_irq(unsigned int irq, irq_handler_t handler,
+ unsigned long flags, const char *name, void *dev_id)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ int ret;
+
+ if (!desc)
+ return -EINVAL;
+
+ if (desc->status & IRQ_NESTED_THREAD) {
+ ret = request_threaded_irq(irq, NULL, handler,
+ flags, name, dev_id);
+ return !ret ? IRQC_IS_NESTED : ret;
+ }
+
+ ret = request_irq(irq, handler, flags, name, dev_id);
+ return !ret ? IRQC_IS_HARDIRQ : ret;
+}
+EXPORT_SYMBOL_GPL(request_any_context_irq);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7a6eb04ef6b..09a2ee540bd 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,27 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
return 0;
}
+static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
+{
+ struct irq_desc *desc = irq_to_desc((long)m->private);
+ unsigned long flags;
+ cpumask_var_t mask;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ if (desc->affinity_hint)
+ cpumask_copy(mask, desc->affinity_hint);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ seq_cpumask(m, mask);
+ seq_putc(m, '\n');
+ free_cpumask_var(mask);
+
+ return 0;
+}
+
#ifndef is_affinity_mask_valid
#define is_affinity_mask_valid(val) 1
#endif
@@ -84,6 +105,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file)
return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
}
+static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
+}
+
static const struct file_operations irq_affinity_proc_fops = {
.open = irq_affinity_proc_open,
.read = seq_read,
@@ -92,6 +118,13 @@ static const struct file_operations irq_affinity_proc_fops = {
.write = irq_affinity_proc_write,
};
+static const struct file_operations irq_affinity_hint_proc_fops = {
+ .open = irq_affinity_hint_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_cpumask(m, irq_default_affinity);
@@ -147,6 +180,26 @@ static const struct file_operations default_affinity_proc_fops = {
.release = single_release,
.write = default_affinity_write,
};
+
+static int irq_node_proc_show(struct seq_file *m, void *v)
+{
+ struct irq_desc *desc = irq_to_desc((long) m->private);
+
+ seq_printf(m, "%d\n", desc->node);
+ return 0;
+}
+
+static int irq_node_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_node_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations irq_node_proc_fops = {
+ .open = irq_node_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif
static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -231,6 +284,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
/* create /proc/irq/<irq>/smp_affinity */
proc_create_data("smp_affinity", 0600, desc->dir,
&irq_affinity_proc_fops, (void *)(long)irq);
+
+ /* create /proc/irq/<irq>/affinity_hint */
+ proc_create_data("affinity_hint", 0400, desc->dir,
+ &irq_affinity_hint_proc_fops, (void *)(long)irq);
+
+ proc_create_data("node", 0444, desc->dir,
+ &irq_node_proc_fops, (void *)(long)irq);
#endif
proc_create_data("spurious", 0444, desc->dir,
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 13aff293f4d..6f6d091b575 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/fs.h>
+#include <linux/kdb.h>
#include <linux/err.h>
#include <linux/proc_fs.h>
#include <linux/sched.h> /* for cond_resched */
@@ -516,6 +517,26 @@ static int kallsyms_open(struct inode *inode, struct file *file)
return ret;
}
+#ifdef CONFIG_KGDB_KDB
+const char *kdb_walk_kallsyms(loff_t *pos)
+{
+ static struct kallsym_iter kdb_walk_kallsyms_iter;
+ if (*pos == 0) {
+ memset(&kdb_walk_kallsyms_iter, 0,
+ sizeof(kdb_walk_kallsyms_iter));
+ reset_iter(&kdb_walk_kallsyms_iter, 0);
+ }
+ while (1) {
+ if (!update_iter(&kdb_walk_kallsyms_iter, *pos))
+ return NULL;
+ ++*pos;
+ /* Some debugging symbols have no name. Ignore them. */
+ if (kdb_walk_kallsyms_iter.name[0])
+ return kdb_walk_kallsyms_iter.name;
+ }
+}
+#endif /* CONFIG_KGDB_KDB */
+
static const struct file_operations kallsyms_operations = {
.open = kallsyms_open,
.read = seq_read,
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
deleted file mode 100644
index 11f3515ca83..00000000000
--- a/kernel/kgdb.c
+++ /dev/null
@@ -1,1764 +0,0 @@
-/*
- * KGDB stub.
- *
- * Maintainer: Jason Wessel <jason.wessel@windriver.com>
- *
- * Copyright (C) 2000-2001 VERITAS Software Corporation.
- * Copyright (C) 2002-2004 Timesys Corporation
- * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
- * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
- * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
- * Copyright (C) 2005-2008 Wind River Systems, Inc.
- * Copyright (C) 2007 MontaVista Software, Inc.
- * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *
- * Contributors at various stages not listed above:
- * Jason Wessel ( jason.wessel@windriver.com )
- * George Anzinger <george@mvista.com>
- * Anurekh Saxena (anurekh.saxena@timesys.com)
- * Lake Stevens Instrument Division (Glenn Engel)
- * Jim Kingdon, Cygnus Support.
- *
- * Original KGDB stub: David Grothe <dave@gcom.com>,
- * Tigran Aivazian <tigran@sco.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-#include <linux/pid_namespace.h>
-#include <linux/clocksource.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/console.h>
-#include <linux/threads.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ptrace.h>
-#include <linux/reboot.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/sysrq.h>
-#include <linux/init.h>
-#include <linux/kgdb.h>
-#include <linux/pid.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm/cacheflush.h>
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/unaligned.h>
-
-static int kgdb_break_asap;
-
-#define KGDB_MAX_THREAD_QUERY 17
-struct kgdb_state {
- int ex_vector;
- int signo;
- int err_code;
- int cpu;
- int pass_exception;
- unsigned long thr_query;
- unsigned long threadid;
- long kgdb_usethreadid;
- struct pt_regs *linux_regs;
-};
-
-/* Exception state values */
-#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
-#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
-#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
-#define DCPU_SSTEP 0x8 /* CPU is single stepping */
-
-static struct debuggerinfo_struct {
- void *debuggerinfo;
- struct task_struct *task;
- int exception_state;
-} kgdb_info[NR_CPUS];
-
-/**
- * kgdb_connected - Is a host GDB connected to us?
- */
-int kgdb_connected;
-EXPORT_SYMBOL_GPL(kgdb_connected);
-
-/* All the KGDB handlers are installed */
-static int kgdb_io_module_registered;
-
-/* Guard for recursive entry */
-static int exception_level;
-
-static struct kgdb_io *kgdb_io_ops;
-static DEFINE_SPINLOCK(kgdb_registration_lock);
-
-/* kgdb console driver is loaded */
-static int kgdb_con_registered;
-/* determine if kgdb console output should be used */
-static int kgdb_use_con;
-
-static int __init opt_kgdb_con(char *str)
-{
- kgdb_use_con = 1;
- return 0;
-}
-
-early_param("kgdbcon", opt_kgdb_con);
-
-module_param(kgdb_use_con, int, 0644);
-
-/*
- * Holds information about breakpoints in a kernel. These breakpoints are
- * added and removed by gdb.
- */
-static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = {
- [0 ... KGDB_MAX_BREAKPOINTS-1] = { .state = BP_UNDEFINED }
-};
-
-/*
- * The CPU# of the active CPU, or -1 if none:
- */
-atomic_t kgdb_active = ATOMIC_INIT(-1);
-
-/*
- * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
- * bootup code (which might not have percpu set up yet):
- */
-static atomic_t passive_cpu_wait[NR_CPUS];
-static atomic_t cpu_in_kgdb[NR_CPUS];
-atomic_t kgdb_setting_breakpoint;
-
-struct task_struct *kgdb_usethread;
-struct task_struct *kgdb_contthread;
-
-int kgdb_single_step;
-pid_t kgdb_sstep_pid;
-
-/* Our I/O buffers. */
-static char remcom_in_buffer[BUFMAX];
-static char remcom_out_buffer[BUFMAX];
-
-/* Storage for the registers, in GDB format. */
-static unsigned long gdb_regs[(NUMREGBYTES +
- sizeof(unsigned long) - 1) /
- sizeof(unsigned long)];
-
-/* to keep track of the CPU which is doing the single stepping*/
-atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
-
-/*
- * If you are debugging a problem where roundup (the collection of
- * all other CPUs) is a problem [this should be extremely rare],
- * then use the nokgdbroundup option to avoid roundup. In that case
- * the other CPUs might interfere with your debugging context, so
- * use this with care:
- */
-static int kgdb_do_roundup = 1;
-
-static int __init opt_nokgdbroundup(char *str)
-{
- kgdb_do_roundup = 0;
-
- return 0;
-}
-
-early_param("nokgdbroundup", opt_nokgdbroundup);
-
-/*
- * Finally, some KGDB code :-)
- */
-
-/*
- * Weak aliases for breakpoint management,
- * can be overriden by architectures when needed:
- */
-int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
-{
- int err;
-
- err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE);
- if (err)
- return err;
-
- return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr,
- BREAK_INSTR_SIZE);
-}
-
-int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
-{
- return probe_kernel_write((char *)addr,
- (char *)bundle, BREAK_INSTR_SIZE);
-}
-
-int __weak kgdb_validate_break_address(unsigned long addr)
-{
- char tmp_variable[BREAK_INSTR_SIZE];
- int err;
- /* Validate setting the breakpoint and then removing it. In the
- * remove fails, the kernel needs to emit a bad message because we
- * are deep trouble not being able to put things back the way we
- * found them.
- */
- err = kgdb_arch_set_breakpoint(addr, tmp_variable);
- if (err)
- return err;
- err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
- if (err)
- printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
- "memory destroyed at: %lx", addr);
- return err;
-}
-
-unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
-{
- return instruction_pointer(regs);
-}
-
-int __weak kgdb_arch_init(void)
-{
- return 0;
-}
-
-int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
-{
- return 0;
-}
-
-void __weak
-kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
-{
- return;
-}
-
-/**
- * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
- * @regs: Current &struct pt_regs.
- *
- * This function will be called if the particular architecture must
- * disable hardware debugging while it is processing gdb packets or
- * handling exception.
- */
-void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
-{
-}
-
-/*
- * GDB remote protocol parser:
- */
-
-static int hex(char ch)
-{
- if ((ch >= 'a') && (ch <= 'f'))
- return ch - 'a' + 10;
- if ((ch >= '0') && (ch <= '9'))
- return ch - '0';
- if ((ch >= 'A') && (ch <= 'F'))
- return ch - 'A' + 10;
- return -1;
-}
-
-/* scan for the sequence $<data>#<checksum> */
-static void get_packet(char *buffer)
-{
- unsigned char checksum;
- unsigned char xmitcsum;
- int count;
- char ch;
-
- do {
- /*
- * Spin and wait around for the start character, ignore all
- * other characters:
- */
- while ((ch = (kgdb_io_ops->read_char())) != '$')
- /* nothing */;
-
- kgdb_connected = 1;
- checksum = 0;
- xmitcsum = -1;
-
- count = 0;
-
- /*
- * now, read until a # or end of buffer is found:
- */
- while (count < (BUFMAX - 1)) {
- ch = kgdb_io_ops->read_char();
- if (ch == '#')
- break;
- checksum = checksum + ch;
- buffer[count] = ch;
- count = count + 1;
- }
- buffer[count] = 0;
-
- if (ch == '#') {
- xmitcsum = hex(kgdb_io_ops->read_char()) << 4;
- xmitcsum += hex(kgdb_io_ops->read_char());
-
- if (checksum != xmitcsum)
- /* failed checksum */
- kgdb_io_ops->write_char('-');
- else
- /* successful transfer */
- kgdb_io_ops->write_char('+');
- if (kgdb_io_ops->flush)
- kgdb_io_ops->flush();
- }
- } while (checksum != xmitcsum);
-}
-
-/*
- * Send the packet in buffer.
- * Check for gdb connection if asked for.
- */
-static void put_packet(char *buffer)
-{
- unsigned char checksum;
- int count;
- char ch;
-
- /*
- * $<packet info>#<checksum>.
- */
- while (1) {
- kgdb_io_ops->write_char('$');
- checksum = 0;
- count = 0;
-
- while ((ch = buffer[count])) {
- kgdb_io_ops->write_char(ch);
- checksum += ch;
- count++;
- }
-
- kgdb_io_ops->write_char('#');
- kgdb_io_ops->write_char(hex_asc_hi(checksum));
- kgdb_io_ops->write_char(hex_asc_lo(checksum));
- if (kgdb_io_ops->flush)
- kgdb_io_ops->flush();
-
- /* Now see what we get in reply. */
- ch = kgdb_io_ops->read_char();
-
- if (ch == 3)
- ch = kgdb_io_ops->read_char();
-
- /* If we get an ACK, we are done. */
- if (ch == '+')
- return;
-
- /*
- * If we get the start of another packet, this means
- * that GDB is attempting to reconnect. We will NAK
- * the packet being sent, and stop trying to send this
- * packet.
- */
- if (ch == '$') {
- kgdb_io_ops->write_char('-');
- if (kgdb_io_ops->flush)
- kgdb_io_ops->flush();
- return;
- }
- }
-}
-
-/*
- * Convert the memory pointed to by mem into hex, placing result in buf.
- * Return a pointer to the last char put in buf (null). May return an error.
- */
-int kgdb_mem2hex(char *mem, char *buf, int count)
-{
- char *tmp;
- int err;
-
- /*
- * We use the upper half of buf as an intermediate buffer for the
- * raw memory copy. Hex conversion will work against this one.
- */
- tmp = buf + count;
-
- err = probe_kernel_read(tmp, mem, count);
- if (!err) {
- while (count > 0) {
- buf = pack_hex_byte(buf, *tmp);
- tmp++;
- count--;
- }
-
- *buf = 0;
- }
-
- return err;
-}
-
-/*
- * Copy the binary array pointed to by buf into mem. Fix $, #, and
- * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
- * The input buf is overwitten with the result to write to mem.
- */
-static int kgdb_ebin2mem(char *buf, char *mem, int count)
-{
- int size = 0;
- char *c = buf;
-
- while (count-- > 0) {
- c[size] = *buf++;
- if (c[size] == 0x7d)
- c[size] = *buf++ ^ 0x20;
- size++;
- }
-
- return probe_kernel_write(mem, c, size);
-}
-
-/*
- * Convert the hex array pointed to by buf into binary to be placed in mem.
- * Return a pointer to the character AFTER the last byte written.
- * May return an error.
- */
-int kgdb_hex2mem(char *buf, char *mem, int count)
-{
- char *tmp_raw;
- char *tmp_hex;
-
- /*
- * We use the upper half of buf as an intermediate buffer for the
- * raw memory that is converted from hex.
- */
- tmp_raw = buf + count * 2;
-
- tmp_hex = tmp_raw - 1;
- while (tmp_hex >= buf) {
- tmp_raw--;
- *tmp_raw = hex(*tmp_hex--);
- *tmp_raw |= hex(*tmp_hex--) << 4;
- }
-
- return probe_kernel_write(mem, tmp_raw, count);
-}
-
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int kgdb_hex2long(char **ptr, unsigned long *long_val)
-{
- int hex_val;
- int num = 0;
- int negate = 0;
-
- *long_val = 0;
-
- if (**ptr == '-') {
- negate = 1;
- (*ptr)++;
- }
- while (**ptr) {
- hex_val = hex(**ptr);
- if (hex_val < 0)
- break;
-
- *long_val = (*long_val << 4) | hex_val;
- num++;
- (*ptr)++;
- }
-
- if (negate)
- *long_val = -*long_val;
-
- return num;
-}
-
-/* Write memory due to an 'M' or 'X' packet. */
-static int write_mem_msg(int binary)
-{
- char *ptr = &remcom_in_buffer[1];
- unsigned long addr;
- unsigned long length;
- int err;
-
- if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
- kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
- if (binary)
- err = kgdb_ebin2mem(ptr, (char *)addr, length);
- else
- err = kgdb_hex2mem(ptr, (char *)addr, length);
- if (err)
- return err;
- if (CACHE_FLUSH_IS_SAFE)
- flush_icache_range(addr, addr + length);
- return 0;
- }
-
- return -EINVAL;
-}
-
-static void error_packet(char *pkt, int error)
-{
- error = -error;
- pkt[0] = 'E';
- pkt[1] = hex_asc[(error / 10)];
- pkt[2] = hex_asc[(error % 10)];
- pkt[3] = '\0';
-}
-
-/*
- * Thread ID accessors. We represent a flat TID space to GDB, where
- * the per CPU idle threads (which under Linux all have PID 0) are
- * remapped to negative TIDs.
- */
-
-#define BUF_THREAD_ID_SIZE 16
-
-static char *pack_threadid(char *pkt, unsigned char *id)
-{
- char *limit;
-
- limit = pkt + BUF_THREAD_ID_SIZE;
- while (pkt < limit)
- pkt = pack_hex_byte(pkt, *id++);
-
- return pkt;
-}
-
-static void int_to_threadref(unsigned char *id, int value)
-{
- unsigned char *scan;
- int i = 4;
-
- scan = (unsigned char *)id;
- while (i--)
- *scan++ = 0;
- put_unaligned_be32(value, scan);
-}
-
-static struct task_struct *getthread(struct pt_regs *regs, int tid)
-{
- /*
- * Non-positive TIDs are remapped to the cpu shadow information
- */
- if (tid == 0 || tid == -1)
- tid = -atomic_read(&kgdb_active) - 2;
- if (tid < -1 && tid > -NR_CPUS - 2) {
- if (kgdb_info[-tid - 2].task)
- return kgdb_info[-tid - 2].task;
- else
- return idle_task(-tid - 2);
- }
- if (tid <= 0) {
- printk(KERN_ERR "KGDB: Internal thread select error\n");
- dump_stack();
- return NULL;
- }
-
- /*
- * find_task_by_pid_ns() does not take the tasklist lock anymore
- * but is nicely RCU locked - hence is a pretty resilient
- * thing to use:
- */
- return find_task_by_pid_ns(tid, &init_pid_ns);
-}
-
-/*
- * Some architectures need cache flushes when we set/clear a
- * breakpoint:
- */
-static void kgdb_flush_swbreak_addr(unsigned long addr)
-{
- if (!CACHE_FLUSH_IS_SAFE)
- return;
-
- if (current->mm && current->mm->mmap_cache) {
- flush_cache_range(current->mm->mmap_cache,
- addr, addr + BREAK_INSTR_SIZE);
- }
- /* Force flush instruction cache if it was outside the mm */
- flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
-}
-
-/*
- * SW breakpoint management:
- */
-static int kgdb_activate_sw_breakpoints(void)
-{
- unsigned long addr;
- int error;
- int ret = 0;
- int i;
-
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if (kgdb_break[i].state != BP_SET)
- continue;
-
- addr = kgdb_break[i].bpt_addr;
- error = kgdb_arch_set_breakpoint(addr,
- kgdb_break[i].saved_instr);
- if (error) {
- ret = error;
- printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
- continue;
- }
-
- kgdb_flush_swbreak_addr(addr);
- kgdb_break[i].state = BP_ACTIVE;
- }
- return ret;
-}
-
-static int kgdb_set_sw_break(unsigned long addr)
-{
- int err = kgdb_validate_break_address(addr);
- int breakno = -1;
- int i;
-
- if (err)
- return err;
-
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if ((kgdb_break[i].state == BP_SET) &&
- (kgdb_break[i].bpt_addr == addr))
- return -EEXIST;
- }
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if (kgdb_break[i].state == BP_REMOVED &&
- kgdb_break[i].bpt_addr == addr) {
- breakno = i;
- break;
- }
- }
-
- if (breakno == -1) {
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if (kgdb_break[i].state == BP_UNDEFINED) {
- breakno = i;
- break;
- }
- }
- }
-
- if (breakno == -1)
- return -E2BIG;
-
- kgdb_break[breakno].state = BP_SET;
- kgdb_break[breakno].type = BP_BREAKPOINT;
- kgdb_break[breakno].bpt_addr = addr;
-
- return 0;
-}
-
-static int kgdb_deactivate_sw_breakpoints(void)
-{
- unsigned long addr;
- int error;
- int ret = 0;
- int i;
-
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if (kgdb_break[i].state != BP_ACTIVE)
- continue;
- addr = kgdb_break[i].bpt_addr;
- error = kgdb_arch_remove_breakpoint(addr,
- kgdb_break[i].saved_instr);
- if (error) {
- printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
- ret = error;
- }
-
- kgdb_flush_swbreak_addr(addr);
- kgdb_break[i].state = BP_SET;
- }
- return ret;
-}
-
-static int kgdb_remove_sw_break(unsigned long addr)
-{
- int i;
-
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if ((kgdb_break[i].state == BP_SET) &&
- (kgdb_break[i].bpt_addr == addr)) {
- kgdb_break[i].state = BP_REMOVED;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-int kgdb_isremovedbreak(unsigned long addr)
-{
- int i;
-
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if ((kgdb_break[i].state == BP_REMOVED) &&
- (kgdb_break[i].bpt_addr == addr))
- return 1;
- }
- return 0;
-}
-
-static int remove_all_break(void)
-{
- unsigned long addr;
- int error;
- int i;
-
- /* Clear memory breakpoints. */
- for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
- if (kgdb_break[i].state != BP_ACTIVE)
- goto setundefined;
- addr = kgdb_break[i].bpt_addr;
- error = kgdb_arch_remove_breakpoint(addr,
- kgdb_break[i].saved_instr);
- if (error)
- printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
- addr);
-setundefined:
- kgdb_break[i].state = BP_UNDEFINED;
- }
-
- /* Clear hardware breakpoints. */
- if (arch_kgdb_ops.remove_all_hw_break)
- arch_kgdb_ops.remove_all_hw_break();
-
- return 0;
-}
-
-/*
- * Remap normal tasks to their real PID,
- * CPU shadow threads are mapped to -CPU - 2
- */
-static inline int shadow_pid(int realpid)
-{
- if (realpid)
- return realpid;
-
- return -raw_smp_processor_id() - 2;
-}
-
-static char gdbmsgbuf[BUFMAX + 1];
-
-static void kgdb_msg_write(const char *s, int len)
-{
- char *bufptr;
- int wcount;
- int i;
-
- /* 'O'utput */
- gdbmsgbuf[0] = 'O';
-
- /* Fill and send buffers... */
- while (len > 0) {
- bufptr = gdbmsgbuf + 1;
-
- /* Calculate how many this time */
- if ((len << 1) > (BUFMAX - 2))
- wcount = (BUFMAX - 2) >> 1;
- else
- wcount = len;
-
- /* Pack in hex chars */
- for (i = 0; i < wcount; i++)
- bufptr = pack_hex_byte(bufptr, s[i]);
- *bufptr = '\0';
-
- /* Move up */
- s += wcount;
- len -= wcount;
-
- /* Write packet */
- put_packet(gdbmsgbuf);
- }
-}
-
-/*
- * Return true if there is a valid kgdb I/O module. Also if no
- * debugger is attached a message can be printed to the console about
- * waiting for the debugger to attach.
- *
- * The print_wait argument is only to be true when called from inside
- * the core kgdb_handle_exception, because it will wait for the
- * debugger to attach.
- */
-static int kgdb_io_ready(int print_wait)
-{
- if (!kgdb_io_ops)
- return 0;
- if (kgdb_connected)
- return 1;
- if (atomic_read(&kgdb_setting_breakpoint))
- return 1;
- if (print_wait)
- printk(KERN_CRIT "KGDB: Waiting for remote debugger\n");
- return 1;
-}
-
-/*
- * All the functions that start with gdb_cmd are the various
- * operations to implement the handlers for the gdbserial protocol
- * where KGDB is communicating with an external debugger
- */
-
-/* Handle the '?' status packets */
-static void gdb_cmd_status(struct kgdb_state *ks)
-{
- /*
- * We know that this packet is only sent
- * during initial connect. So to be safe,
- * we clear out our breakpoints now in case
- * GDB is reconnecting.
- */
- remove_all_break();
-
- remcom_out_buffer[0] = 'S';
- pack_hex_byte(&remcom_out_buffer[1], ks->signo);
-}
-
-/* Handle the 'g' get registers request */
-static void gdb_cmd_getregs(struct kgdb_state *ks)
-{
- struct task_struct *thread;
- void *local_debuggerinfo;
- int i;
-
- thread = kgdb_usethread;
- if (!thread) {
- thread = kgdb_info[ks->cpu].task;
- local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
- } else {
- local_debuggerinfo = NULL;
- for_each_online_cpu(i) {
- /*
- * Try to find the task on some other
- * or possibly this node if we do not
- * find the matching task then we try
- * to approximate the results.
- */
- if (thread == kgdb_info[i].task)
- local_debuggerinfo = kgdb_info[i].debuggerinfo;
- }
- }
-
- /*
- * All threads that don't have debuggerinfo should be
- * in schedule() sleeping, since all other CPUs
- * are in kgdb_wait, and thus have debuggerinfo.
- */
- if (local_debuggerinfo) {
- pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
- } else {
- /*
- * Pull stuff saved during switch_to; nothing
- * else is accessible (or even particularly
- * relevant).
- *
- * This should be enough for a stack trace.
- */
- sleeping_thread_to_gdb_regs(gdb_regs, thread);
- }
- kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
-}
-
-/* Handle the 'G' set registers request */
-static void gdb_cmd_setregs(struct kgdb_state *ks)
-{
- kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES);
-
- if (kgdb_usethread && kgdb_usethread != current) {
- error_packet(remcom_out_buffer, -EINVAL);
- } else {
- gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs);
- strcpy(remcom_out_buffer, "OK");
- }
-}
-
-/* Handle the 'm' memory read bytes */
-static void gdb_cmd_memread(struct kgdb_state *ks)
-{
- char *ptr = &remcom_in_buffer[1];
- unsigned long length;
- unsigned long addr;
- int err;
-
- if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
- kgdb_hex2long(&ptr, &length) > 0) {
- err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
- if (err)
- error_packet(remcom_out_buffer, err);
- } else {
- error_packet(remcom_out_buffer, -EINVAL);
- }
-}
-
-/* Handle the 'M' memory write bytes */
-static void gdb_cmd_memwrite(struct kgdb_state *ks)
-{
- int err = write_mem_msg(0);
-
- if (err)
- error_packet(remcom_out_buffer, err);
- else
- strcpy(remcom_out_buffer, "OK");
-}
-
-/* Handle the 'X' memory binary write bytes */
-static void gdb_cmd_binwrite(struct kgdb_state *ks)
-{
- int err = write_mem_msg(1);
-
- if (err)
- error_packet(remcom_out_buffer, err);
- else
- strcpy(remcom_out_buffer, "OK");
-}
-
-/* Handle the 'D' or 'k', detach or kill packets */
-static void gdb_cmd_detachkill(struct kgdb_state *ks)
-{
- int error;
-
- /* The detach case */
- if (remcom_in_buffer[0] == 'D') {
- error = remove_all_break();
- if (error < 0) {
- error_packet(remcom_out_buffer, error);
- } else {
- strcpy(remcom_out_buffer, "OK");
- kgdb_connected = 0;
- }
- put_packet(remcom_out_buffer);
- } else {
- /*
- * Assume the kill case, with no exit code checking,
- * trying to force detach the debugger:
- */
- remove_all_break();
- kgdb_connected = 0;
- }
-}
-
-/* Handle the 'R' reboot packets */
-static int gdb_cmd_reboot(struct kgdb_state *ks)
-{
- /* For now, only honor R0 */
- if (strcmp(remcom_in_buffer, "R0") == 0) {
- printk(KERN_CRIT "Executing emergency reboot\n");
- strcpy(remcom_out_buffer, "OK");
- put_packet(remcom_out_buffer);
-
- /*
- * Execution should not return from
- * machine_emergency_restart()
- */
- machine_emergency_restart();
- kgdb_connected = 0;
-
- return 1;
- }
- return 0;
-}
-
-/* Handle the 'q' query packets */
-static void gdb_cmd_query(struct kgdb_state *ks)
-{
- struct task_struct *g;
- struct task_struct *p;
- unsigned char thref[8];
- char *ptr;
- int i;
- int cpu;
- int finished = 0;
-
- switch (remcom_in_buffer[1]) {
- case 's':
- case 'f':
- if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) {
- error_packet(remcom_out_buffer, -EINVAL);
- break;
- }
-
- i = 0;
- remcom_out_buffer[0] = 'm';
- ptr = remcom_out_buffer + 1;
- if (remcom_in_buffer[1] == 'f') {
- /* Each cpu is a shadow thread */
- for_each_online_cpu(cpu) {
- ks->thr_query = 0;
- int_to_threadref(thref, -cpu - 2);
- pack_threadid(ptr, thref);
- ptr += BUF_THREAD_ID_SIZE;
- *(ptr++) = ',';
- i++;
- }
- }
-
- do_each_thread(g, p) {
- if (i >= ks->thr_query && !finished) {
- int_to_threadref(thref, p->pid);
- pack_threadid(ptr, thref);
- ptr += BUF_THREAD_ID_SIZE;
- *(ptr++) = ',';
- ks->thr_query++;
- if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
- finished = 1;
- }
- i++;
- } while_each_thread(g, p);
-
- *(--ptr) = '\0';
- break;
-
- case 'C':
- /* Current thread id */
- strcpy(remcom_out_buffer, "QC");
- ks->threadid = shadow_pid(current->pid);
- int_to_threadref(thref, ks->threadid);
- pack_threadid(remcom_out_buffer + 2, thref);
- break;
- case 'T':
- if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) {
- error_packet(remcom_out_buffer, -EINVAL);
- break;
- }
- ks->threadid = 0;
- ptr = remcom_in_buffer + 17;
- kgdb_hex2long(&ptr, &ks->threadid);
- if (!getthread(ks->linux_regs, ks->threadid)) {
- error_packet(remcom_out_buffer, -EINVAL);
- break;
- }
- if ((int)ks->threadid > 0) {
- kgdb_mem2hex(getthread(ks->linux_regs,
- ks->threadid)->comm,
- remcom_out_buffer, 16);
- } else {
- static char tmpstr[23 + BUF_THREAD_ID_SIZE];
-
- sprintf(tmpstr, "shadowCPU%d",
- (int)(-ks->threadid - 2));
- kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
- }
- break;
- }
-}
-
-/* Handle the 'H' task query packets */
-static void gdb_cmd_task(struct kgdb_state *ks)
-{
- struct task_struct *thread;
- char *ptr;
-
- switch (remcom_in_buffer[1]) {
- case 'g':
- ptr = &remcom_in_buffer[2];
- kgdb_hex2long(&ptr, &ks->threadid);
- thread = getthread(ks->linux_regs, ks->threadid);
- if (!thread && ks->threadid > 0) {
- error_packet(remcom_out_buffer, -EINVAL);
- break;
- }
- kgdb_usethread = thread;
- ks->kgdb_usethreadid = ks->threadid;
- strcpy(remcom_out_buffer, "OK");
- break;
- case 'c':
- ptr = &remcom_in_buffer[2];
- kgdb_hex2long(&ptr, &ks->threadid);
- if (!ks->threadid) {
- kgdb_contthread = NULL;
- } else {
- thread = getthread(ks->linux_regs, ks->threadid);
- if (!thread && ks->threadid > 0) {
- error_packet(remcom_out_buffer, -EINVAL);
- break;
- }
- kgdb_contthread = thread;
- }
- strcpy(remcom_out_buffer, "OK");
- break;
- }
-}
-
-/* Handle the 'T' thread query packets */
-static void gdb_cmd_thread(struct kgdb_state *ks)
-{
- char *ptr = &remcom_in_buffer[1];
- struct task_struct *thread;
-
- kgdb_hex2long(&ptr, &ks->threadid);
- thread = getthread(ks->linux_regs, ks->threadid);
- if (thread)
- strcpy(remcom_out_buffer, "OK");
- else
- error_packet(remcom_out_buffer, -EINVAL);
-}
-
-/* Handle the 'z' or 'Z' breakpoint remove or set packets */
-static void gdb_cmd_break(struct kgdb_state *ks)
-{
- /*
- * Since GDB-5.3, it's been drafted that '0' is a software
- * breakpoint, '1' is a hardware breakpoint, so let's do that.
- */
- char *bpt_type = &remcom_in_buffer[1];
- char *ptr = &remcom_in_buffer[2];
- unsigned long addr;
- unsigned long length;
- int error = 0;
-
- if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') {
- /* Unsupported */
- if (*bpt_type > '4')
- return;
- } else {
- if (*bpt_type != '0' && *bpt_type != '1')
- /* Unsupported. */
- return;
- }
-
- /*
- * Test if this is a hardware breakpoint, and
- * if we support it:
- */
- if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT))
- /* Unsupported. */
- return;
-
- if (*(ptr++) != ',') {
- error_packet(remcom_out_buffer, -EINVAL);
- return;
- }
- if (!kgdb_hex2long(&ptr, &addr)) {
- error_packet(remcom_out_buffer, -EINVAL);
- return;
- }
- if (*(ptr++) != ',' ||
- !kgdb_hex2long(&ptr, &length)) {
- error_packet(remcom_out_buffer, -EINVAL);
- return;
- }
-
- if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
- error = kgdb_set_sw_break(addr);
- else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
- error = kgdb_remove_sw_break(addr);
- else if (remcom_in_buffer[0] == 'Z')
- error = arch_kgdb_ops.set_hw_breakpoint(addr,
- (int)length, *bpt_type - '0');
- else if (remcom_in_buffer[0] == 'z')
- error = arch_kgdb_ops.remove_hw_breakpoint(addr,
- (int) length, *bpt_type - '0');
-
- if (error == 0)
- strcpy(remcom_out_buffer, "OK");
- else
- error_packet(remcom_out_buffer, error);
-}
-
-/* Handle the 'C' signal / exception passing packets */
-static int gdb_cmd_exception_pass(struct kgdb_state *ks)
-{
- /* C09 == pass exception
- * C15 == detach kgdb, pass exception
- */
- if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') {
-
- ks->pass_exception = 1;
- remcom_in_buffer[0] = 'c';
-
- } else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') {
-
- ks->pass_exception = 1;
- remcom_in_buffer[0] = 'D';
- remove_all_break();
- kgdb_connected = 0;
- return 1;
-
- } else {
- kgdb_msg_write("KGDB only knows signal 9 (pass)"
- " and 15 (pass and disconnect)\n"
- "Executing a continue without signal passing\n", 0);
- remcom_in_buffer[0] = 'c';
- }
-
- /* Indicate fall through */
- return -1;
-}
-
-/*
- * This function performs all gdbserial command procesing
- */
-static int gdb_serial_stub(struct kgdb_state *ks)
-{
- int error = 0;
- int tmp;
-
- /* Clear the out buffer. */
- memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
-
- if (kgdb_connected) {
- unsigned char thref[8];
- char *ptr;
-
- /* Reply to host that an exception has occurred */
- ptr = remcom_out_buffer;
- *ptr++ = 'T';
- ptr = pack_hex_byte(ptr, ks->signo);
- ptr += strlen(strcpy(ptr, "thread:"));
- int_to_threadref(thref, shadow_pid(current->pid));
- ptr = pack_threadid(ptr, thref);
- *ptr++ = ';';
- put_packet(remcom_out_buffer);
- }
-
- kgdb_usethread = kgdb_info[ks->cpu].task;
- ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
- ks->pass_exception = 0;
-
- while (1) {
- error = 0;
-
- /* Clear the out buffer. */
- memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
-
- get_packet(remcom_in_buffer);
-
- switch (remcom_in_buffer[0]) {
- case '?': /* gdbserial status */
- gdb_cmd_status(ks);
- break;
- case 'g': /* return the value of the CPU registers */
- gdb_cmd_getregs(ks);
- break;
- case 'G': /* set the value of the CPU registers - return OK */
- gdb_cmd_setregs(ks);
- break;
- case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
- gdb_cmd_memread(ks);
- break;
- case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
- gdb_cmd_memwrite(ks);
- break;
- case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
- gdb_cmd_binwrite(ks);
- break;
- /* kill or detach. KGDB should treat this like a
- * continue.
- */
- case 'D': /* Debugger detach */
- case 'k': /* Debugger detach via kill */
- gdb_cmd_detachkill(ks);
- goto default_handle;
- case 'R': /* Reboot */
- if (gdb_cmd_reboot(ks))
- goto default_handle;
- break;
- case 'q': /* query command */
- gdb_cmd_query(ks);
- break;
- case 'H': /* task related */
- gdb_cmd_task(ks);
- break;
- case 'T': /* Query thread status */
- gdb_cmd_thread(ks);
- break;
- case 'z': /* Break point remove */
- case 'Z': /* Break point set */
- gdb_cmd_break(ks);
- break;
- case 'C': /* Exception passing */
- tmp = gdb_cmd_exception_pass(ks);
- if (tmp > 0)
- goto default_handle;
- if (tmp == 0)
- break;
- /* Fall through on tmp < 0 */
- case 'c': /* Continue packet */
- case 's': /* Single step packet */
- if (kgdb_contthread && kgdb_contthread != current) {
- /* Can't switch threads in kgdb */
- error_packet(remcom_out_buffer, -EINVAL);
- break;
- }
- kgdb_activate_sw_breakpoints();
- /* Fall through to default processing */
- default:
-default_handle:
- error = kgdb_arch_handle_exception(ks->ex_vector,
- ks->signo,
- ks->err_code,
- remcom_in_buffer,
- remcom_out_buffer,
- ks->linux_regs);
- /*
- * Leave cmd processing on error, detach,
- * kill, continue, or single step.
- */
- if (error >= 0 || remcom_in_buffer[0] == 'D' ||
- remcom_in_buffer[0] == 'k') {
- error = 0;
- goto kgdb_exit;
- }
-
- }
-
- /* reply to the request */
- put_packet(remcom_out_buffer);
- }
-
-kgdb_exit:
- if (ks->pass_exception)
- error = 1;
- return error;
-}
-
-static int kgdb_reenter_check(struct kgdb_state *ks)
-{
- unsigned long addr;
-
- if (atomic_read(&kgdb_active) != raw_smp_processor_id())
- return 0;
-
- /* Panic on recursive debugger calls: */
- exception_level++;
- addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
- kgdb_deactivate_sw_breakpoints();
-
- /*
- * If the break point removed ok at the place exception
- * occurred, try to recover and print a warning to the end
- * user because the user planted a breakpoint in a place that
- * KGDB needs in order to function.
- */
- if (kgdb_remove_sw_break(addr) == 0) {
- exception_level = 0;
- kgdb_skipexception(ks->ex_vector, ks->linux_regs);
- kgdb_activate_sw_breakpoints();
- printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n",
- addr);
- WARN_ON_ONCE(1);
-
- return 1;
- }
- remove_all_break();
- kgdb_skipexception(ks->ex_vector, ks->linux_regs);
-
- if (exception_level > 1) {
- dump_stack();
- panic("Recursive entry to debugger");
- }
-
- printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n");
- dump_stack();
- panic("Recursive entry to debugger");
-
- return 1;
-}
-
-static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
-{
- unsigned long flags;
- int sstep_tries = 100;
- int error = 0;
- int i, cpu;
- int trace_on = 0;
-acquirelock:
- /*
- * Interrupts will be restored by the 'trap return' code, except when
- * single stepping.
- */
- local_irq_save(flags);
-
- cpu = ks->cpu;
- kgdb_info[cpu].debuggerinfo = regs;
- kgdb_info[cpu].task = current;
- /*
- * Make sure the above info reaches the primary CPU before
- * our cpu_in_kgdb[] flag setting does:
- */
- atomic_inc(&cpu_in_kgdb[cpu]);
-
- /*
- * CPU will loop if it is a slave or request to become a kgdb
- * master cpu and acquire the kgdb_active lock:
- */
- while (1) {
- if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
- if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
- break;
- } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
- if (!atomic_read(&passive_cpu_wait[cpu]))
- goto return_normal;
- } else {
-return_normal:
- /* Return to normal operation by executing any
- * hw breakpoint fixup.
- */
- if (arch_kgdb_ops.correct_hw_break)
- arch_kgdb_ops.correct_hw_break();
- if (trace_on)
- tracing_on();
- atomic_dec(&cpu_in_kgdb[cpu]);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
- local_irq_restore(flags);
- return 0;
- }
- cpu_relax();
- }
-
- /*
- * For single stepping, try to only enter on the processor
- * that was single stepping. To gaurd against a deadlock, the
- * kernel will only try for the value of sstep_tries before
- * giving up and continuing on.
- */
- if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
- (kgdb_info[cpu].task &&
- kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
- atomic_set(&kgdb_active, -1);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
- local_irq_restore(flags);
-
- goto acquirelock;
- }
-
- if (!kgdb_io_ready(1)) {
- error = 1;
- goto kgdb_restore; /* No I/O connection, so resume the system */
- }
-
- /*
- * Don't enter if we have hit a removed breakpoint.
- */
- if (kgdb_skipexception(ks->ex_vector, ks->linux_regs))
- goto kgdb_restore;
-
- /* Call the I/O driver's pre_exception routine */
- if (kgdb_io_ops->pre_exception)
- kgdb_io_ops->pre_exception();
-
- kgdb_disable_hw_debug(ks->linux_regs);
-
- /*
- * Get the passive CPU lock which will hold all the non-primary
- * CPU in a spin state while the debugger is active
- */
- if (!kgdb_single_step) {
- for (i = 0; i < NR_CPUS; i++)
- atomic_inc(&passive_cpu_wait[i]);
- }
-
-#ifdef CONFIG_SMP
- /* Signal the other CPUs to enter kgdb_wait() */
- if ((!kgdb_single_step) && kgdb_do_roundup)
- kgdb_roundup_cpus(flags);
-#endif
-
- /*
- * Wait for the other CPUs to be notified and be waiting for us:
- */
- for_each_online_cpu(i) {
- while (!atomic_read(&cpu_in_kgdb[i]))
- cpu_relax();
- }
-
- /*
- * At this point the primary processor is completely
- * in the debugger and all secondary CPUs are quiescent
- */
- kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code);
- kgdb_deactivate_sw_breakpoints();
- kgdb_single_step = 0;
- kgdb_contthread = current;
- exception_level = 0;
- trace_on = tracing_is_on();
- if (trace_on)
- tracing_off();
-
- /* Talk to debugger with gdbserial protocol */
- error = gdb_serial_stub(ks);
-
- /* Call the I/O driver's post_exception routine */
- if (kgdb_io_ops->post_exception)
- kgdb_io_ops->post_exception();
-
- atomic_dec(&cpu_in_kgdb[ks->cpu]);
-
- if (!kgdb_single_step) {
- for (i = NR_CPUS-1; i >= 0; i--)
- atomic_dec(&passive_cpu_wait[i]);
- /*
- * Wait till all the CPUs have quit
- * from the debugger.
- */
- for_each_online_cpu(i) {
- while (atomic_read(&cpu_in_kgdb[i]))
- cpu_relax();
- }
- }
-
-kgdb_restore:
- if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
- int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
- if (kgdb_info[sstep_cpu].task)
- kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
- else
- kgdb_sstep_pid = 0;
- }
- if (trace_on)
- tracing_on();
- /* Free kgdb_active */
- atomic_set(&kgdb_active, -1);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
- local_irq_restore(flags);
-
- return error;
-}
-
-/*
- * kgdb_handle_exception() - main entry point from a kernel exception
- *
- * Locking hierarchy:
- * interface locks, if any (begin_session)
- * kgdb lock (kgdb_active)
- */
-int
-kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
-{
- struct kgdb_state kgdb_var;
- struct kgdb_state *ks = &kgdb_var;
- int ret;
-
- ks->cpu = raw_smp_processor_id();
- ks->ex_vector = evector;
- ks->signo = signo;
- ks->ex_vector = evector;
- ks->err_code = ecode;
- ks->kgdb_usethreadid = 0;
- ks->linux_regs = regs;
-
- if (kgdb_reenter_check(ks))
- return 0; /* Ouch, double exception ! */
- kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
- ret = kgdb_cpu_enter(ks, regs);
- kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER;
- return ret;
-}
-
-int kgdb_nmicallback(int cpu, void *regs)
-{
-#ifdef CONFIG_SMP
- struct kgdb_state kgdb_var;
- struct kgdb_state *ks = &kgdb_var;
-
- memset(ks, 0, sizeof(struct kgdb_state));
- ks->cpu = cpu;
- ks->linux_regs = regs;
-
- if (!atomic_read(&cpu_in_kgdb[cpu]) &&
- atomic_read(&kgdb_active) != -1 &&
- atomic_read(&kgdb_active) != cpu) {
- kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
- kgdb_cpu_enter(ks, regs);
- kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
- return 0;
- }
-#endif
- return 1;
-}
-
-static void kgdb_console_write(struct console *co, const char *s,
- unsigned count)
-{
- unsigned long flags;
-
- /* If we're debugging, or KGDB has not connected, don't try
- * and print. */
- if (!kgdb_connected || atomic_read(&kgdb_active) != -1)
- return;
-
- local_irq_save(flags);
- kgdb_msg_write(s, count);
- local_irq_restore(flags);
-}
-
-static struct console kgdbcons = {
- .name = "kgdb",
- .write = kgdb_console_write,
- .flags = CON_PRINTBUFFER | CON_ENABLED,
- .index = -1,
-};
-
-#ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_gdb(int key, struct tty_struct *tty)
-{
- if (!kgdb_io_ops) {
- printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
- return;
- }
- if (!kgdb_connected)
- printk(KERN_CRIT "Entering KGDB\n");
-
- kgdb_breakpoint();
-}
-
-static struct sysrq_key_op sysrq_gdb_op = {
- .handler = sysrq_handle_gdb,
- .help_msg = "debug(G)",
- .action_msg = "DEBUG",
-};
-#endif
-
-static void kgdb_register_callbacks(void)
-{
- if (!kgdb_io_module_registered) {
- kgdb_io_module_registered = 1;
- kgdb_arch_init();
-#ifdef CONFIG_MAGIC_SYSRQ
- register_sysrq_key('g', &sysrq_gdb_op);
-#endif
- if (kgdb_use_con && !kgdb_con_registered) {
- register_console(&kgdbcons);
- kgdb_con_registered = 1;
- }
- }
-}
-
-static void kgdb_unregister_callbacks(void)
-{
- /*
- * When this routine is called KGDB should unregister from the
- * panic handler and clean up, making sure it is not handling any
- * break exceptions at the time.
- */
- if (kgdb_io_module_registered) {
- kgdb_io_module_registered = 0;
- kgdb_arch_exit();
-#ifdef CONFIG_MAGIC_SYSRQ
- unregister_sysrq_key('g', &sysrq_gdb_op);
-#endif
- if (kgdb_con_registered) {
- unregister_console(&kgdbcons);
- kgdb_con_registered = 0;
- }
- }
-}
-
-static void kgdb_initial_breakpoint(void)
-{
- kgdb_break_asap = 0;
-
- printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n");
- kgdb_breakpoint();
-}
-
-/**
- * kgdb_register_io_module - register KGDB IO module
- * @new_kgdb_io_ops: the io ops vector
- *
- * Register it with the KGDB core.
- */
-int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
-{
- int err;
-
- spin_lock(&kgdb_registration_lock);
-
- if (kgdb_io_ops) {
- spin_unlock(&kgdb_registration_lock);
-
- printk(KERN_ERR "kgdb: Another I/O driver is already "
- "registered with KGDB.\n");
- return -EBUSY;
- }
-
- if (new_kgdb_io_ops->init) {
- err = new_kgdb_io_ops->init();
- if (err) {
- spin_unlock(&kgdb_registration_lock);
- return err;
- }
- }
-
- kgdb_io_ops = new_kgdb_io_ops;
-
- spin_unlock(&kgdb_registration_lock);
-
- printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
- new_kgdb_io_ops->name);
-
- /* Arm KGDB now. */
- kgdb_register_callbacks();
-
- if (kgdb_break_asap)
- kgdb_initial_breakpoint();
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(kgdb_register_io_module);
-
-/**
- * kkgdb_unregister_io_module - unregister KGDB IO module
- * @old_kgdb_io_ops: the io ops vector
- *
- * Unregister it with the KGDB core.
- */
-void kgdb_unregister_io_module(struct kgdb_io *old_kgdb_io_ops)
-{
- BUG_ON(kgdb_connected);
-
- /*
- * KGDB is no longer able to communicate out, so
- * unregister our callbacks and reset state.
- */
- kgdb_unregister_callbacks();
-
- spin_lock(&kgdb_registration_lock);
-
- WARN_ON_ONCE(kgdb_io_ops != old_kgdb_io_ops);
- kgdb_io_ops = NULL;
-
- spin_unlock(&kgdb_registration_lock);
-
- printk(KERN_INFO
- "kgdb: Unregistered I/O driver %s, debugger disabled.\n",
- old_kgdb_io_ops->name);
-}
-EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
-
-/**
- * kgdb_breakpoint - generate breakpoint exception
- *
- * This function will generate a breakpoint exception. It is used at the
- * beginning of a program to sync up with a debugger and can be used
- * otherwise as a quick means to stop program execution and "break" into
- * the debugger.
- */
-void kgdb_breakpoint(void)
-{
- atomic_inc(&kgdb_setting_breakpoint);
- wmb(); /* Sync point before breakpoint */
- arch_kgdb_breakpoint();
- wmb(); /* Sync point after breakpoint */
- atomic_dec(&kgdb_setting_breakpoint);
-}
-EXPORT_SYMBOL_GPL(kgdb_breakpoint);
-
-static int __init opt_kgdb_wait(char *str)
-{
- kgdb_break_asap = 1;
-
- if (kgdb_io_module_registered)
- kgdb_initial_breakpoint();
-
- return 0;
-}
-
-early_param("kgdbwait", opt_kgdb_wait);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bf0e231d970..6e9b19667a8 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -116,27 +116,16 @@ int __request_module(bool wait, const char *fmt, ...)
trace_module_request(module_name, wait, _RET_IP_);
- ret = call_usermodehelper(modprobe_path, argv, envp,
- wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
+ ret = call_usermodehelper_fns(modprobe_path, argv, envp,
+ wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC,
+ NULL, NULL, NULL);
+
atomic_dec(&kmod_concurrent);
return ret;
}
EXPORT_SYMBOL(__request_module);
#endif /* CONFIG_MODULES */
-struct subprocess_info {
- struct work_struct work;
- struct completion *complete;
- struct cred *cred;
- char *path;
- char **argv;
- char **envp;
- enum umh_wait wait;
- int retval;
- struct file *stdin;
- void (*cleanup)(char **argv, char **envp);
-};
-
/*
* This is the task which runs the usermode application
*/
@@ -145,36 +134,10 @@ static int ____call_usermodehelper(void *data)
struct subprocess_info *sub_info = data;
int retval;
- BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
-
- /* Unblock all signals */
spin_lock_irq(&current->sighand->siglock);
flush_signal_handlers(current, 1);
- sigemptyset(&current->blocked);
- recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
- /* Install the credentials */
- commit_creds(sub_info->cred);
- sub_info->cred = NULL;
-
- /* Install input pipe when needed */
- if (sub_info->stdin) {
- struct files_struct *f = current->files;
- struct fdtable *fdt;
- /* no races because files should be private here */
- sys_close(0);
- fd_install(0, sub_info->stdin);
- spin_lock(&f->file_lock);
- fdt = files_fdtable(f);
- FD_SET(0, fdt->open_fds);
- FD_CLR(0, fdt->close_on_exec);
- spin_unlock(&f->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0};
- }
-
/* We can run anywhere, unlike our parent keventd(). */
set_cpus_allowed_ptr(current, cpu_all_mask);
@@ -184,9 +147,16 @@ static int ____call_usermodehelper(void *data)
*/
set_user_nice(current, 0);
+ if (sub_info->init) {
+ retval = sub_info->init(sub_info);
+ if (retval)
+ goto fail;
+ }
+
retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
/* Exec failed? */
+fail:
sub_info->retval = retval;
do_exit(0);
}
@@ -194,9 +164,7 @@ static int ____call_usermodehelper(void *data)
void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
if (info->cleanup)
- (*info->cleanup)(info->argv, info->envp);
- if (info->cred)
- put_cred(info->cred);
+ (*info->cleanup)(info);
kfree(info);
}
EXPORT_SYMBOL(call_usermodehelper_freeinfo);
@@ -207,16 +175,16 @@ static int wait_for_helper(void *data)
struct subprocess_info *sub_info = data;
pid_t pid;
- /* Install a handler: if SIGCLD isn't handled sys_wait4 won't
- * populate the status, but will return -ECHILD. */
- allow_signal(SIGCHLD);
+ /* If SIGCLD is ignored sys_wait4 won't populate the status. */
+ spin_lock_irq(&current->sighand->siglock);
+ current->sighand->action[SIGCHLD-1].sa.sa_handler = SIG_DFL;
+ spin_unlock_irq(&current->sighand->siglock);
pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
if (pid < 0) {
sub_info->retval = pid;
} else {
- int ret;
-
+ int ret = -ECHILD;
/*
* Normally it is bogus to call wait4() from in-kernel because
* wait4() wants to write the exit code to a userspace address.
@@ -237,10 +205,7 @@ static int wait_for_helper(void *data)
sub_info->retval = ret;
}
- if (sub_info->wait == UMH_NO_WAIT)
- call_usermodehelper_freeinfo(sub_info);
- else
- complete(sub_info->complete);
+ complete(sub_info->complete);
return 0;
}
@@ -249,15 +214,13 @@ static void __call_usermodehelper(struct work_struct *work)
{
struct subprocess_info *sub_info =
container_of(work, struct subprocess_info, work);
- pid_t pid;
enum umh_wait wait = sub_info->wait;
-
- BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+ pid_t pid;
/* CLONE_VFORK: wait until the usermode helper has execve'd
* successfully We need the data structures to stay around
* until that is done. */
- if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT)
+ if (wait == UMH_WAIT_PROC)
pid = kernel_thread(wait_for_helper, sub_info,
CLONE_FS | CLONE_FILES | SIGCHLD);
else
@@ -266,15 +229,16 @@ static void __call_usermodehelper(struct work_struct *work)
switch (wait) {
case UMH_NO_WAIT:
+ call_usermodehelper_freeinfo(sub_info);
break;
case UMH_WAIT_PROC:
if (pid > 0)
break;
- sub_info->retval = pid;
/* FALLTHROUGH */
-
case UMH_WAIT_EXEC:
+ if (pid < 0)
+ sub_info->retval = pid;
complete(sub_info->complete);
}
}
@@ -376,80 +340,37 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
sub_info->path = path;
sub_info->argv = argv;
sub_info->envp = envp;
- sub_info->cred = prepare_usermodehelper_creds();
- if (!sub_info->cred) {
- kfree(sub_info);
- return NULL;
- }
-
out:
return sub_info;
}
EXPORT_SYMBOL(call_usermodehelper_setup);
/**
- * call_usermodehelper_setkeys - set the session keys for usermode helper
- * @info: a subprocess_info returned by call_usermodehelper_setup
- * @session_keyring: the session keyring for the process
- */
-void call_usermodehelper_setkeys(struct subprocess_info *info,
- struct key *session_keyring)
-{
-#ifdef CONFIG_KEYS
- struct thread_group_cred *tgcred = info->cred->tgcred;
- key_put(tgcred->session_keyring);
- tgcred->session_keyring = key_get(session_keyring);
-#else
- BUG();
-#endif
-}
-EXPORT_SYMBOL(call_usermodehelper_setkeys);
-
-/**
- * call_usermodehelper_setcleanup - set a cleanup function
+ * call_usermodehelper_setfns - set a cleanup/init function
* @info: a subprocess_info returned by call_usermodehelper_setup
* @cleanup: a cleanup function
+ * @init: an init function
+ * @data: arbitrary context sensitive data
*
- * The cleanup function is just befor ethe subprocess_info is about to
+ * The init function is used to customize the helper process prior to
+ * exec. A non-zero return code causes the process to error out, exit,
+ * and return the failure to the calling process
+ *
+ * The cleanup function is just before ethe subprocess_info is about to
* be freed. This can be used for freeing the argv and envp. The
* Function must be runnable in either a process context or the
* context in which call_usermodehelper_exec is called.
*/
-void call_usermodehelper_setcleanup(struct subprocess_info *info,
- void (*cleanup)(char **argv, char **envp))
+void call_usermodehelper_setfns(struct subprocess_info *info,
+ int (*init)(struct subprocess_info *info),
+ void (*cleanup)(struct subprocess_info *info),
+ void *data)
{
info->cleanup = cleanup;
+ info->init = init;
+ info->data = data;
}
-EXPORT_SYMBOL(call_usermodehelper_setcleanup);
-
-/**
- * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
- * @sub_info: a subprocess_info returned by call_usermodehelper_setup
- * @filp: set to the write-end of a pipe
- *
- * This constructs a pipe, and sets the read end to be the stdin of the
- * subprocess, and returns the write-end in *@filp.
- */
-int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
- struct file **filp)
-{
- struct file *f;
-
- f = create_write_pipe(0);
- if (IS_ERR(f))
- return PTR_ERR(f);
- *filp = f;
-
- f = create_read_pipe(f, 0);
- if (IS_ERR(f)) {
- free_write_pipe(*filp);
- return PTR_ERR(f);
- }
- sub_info->stdin = f;
-
- return 0;
-}
-EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
+EXPORT_SYMBOL(call_usermodehelper_setfns);
/**
* call_usermodehelper_exec - start a usermode application
@@ -469,9 +390,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
- BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
- validate_creds(sub_info->cred);
-
helper_lock();
if (sub_info->path[0] == '\0')
goto out;
@@ -498,41 +416,6 @@ unlock:
}
EXPORT_SYMBOL(call_usermodehelper_exec);
-/**
- * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
- * @path: path to usermode executable
- * @argv: arg vector for process
- * @envp: environment for process
- * @filp: set to the write-end of a pipe
- *
- * This is a simple wrapper which executes a usermode-helper function
- * with a pipe as stdin. It is implemented entirely in terms of
- * lower-level call_usermodehelper_* functions.
- */
-int call_usermodehelper_pipe(char *path, char **argv, char **envp,
- struct file **filp)
-{
- struct subprocess_info *sub_info;
- int ret;
-
- sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
- if (sub_info == NULL)
- return -ENOMEM;
-
- ret = call_usermodehelper_stdinpipe(sub_info, filp);
- if (ret < 0) {
- call_usermodehelper_freeinfo(sub_info);
- return ret;
- }
-
- ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
- if (ret < 0) /* Failed to execute helper, close pipe */
- filp_close(*filp, NULL);
-
- return ret;
-}
-EXPORT_SYMBOL(call_usermodehelper_pipe);
-
void __init usermodehelper_init(void)
{
khelper_wq = create_singlethread_workqueue("khelper");
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 0ed46f3e51e..282035f3ae9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,6 +1588,72 @@ static void __kprobes kill_kprobe(struct kprobe *p)
arch_remove_kprobe(p);
}
+/* Disable one kprobe */
+int __kprobes disable_kprobe(struct kprobe *kp)
+{
+ int ret = 0;
+ struct kprobe *p;
+
+ mutex_lock(&kprobe_mutex);
+
+ /* Check whether specified probe is valid. */
+ p = __get_valid_kprobe(kp);
+ if (unlikely(p == NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* If the probe is already disabled (or gone), just return */
+ if (kprobe_disabled(kp))
+ goto out;
+
+ kp->flags |= KPROBE_FLAG_DISABLED;
+ if (p != kp)
+ /* When kp != p, p is always enabled. */
+ try_to_disable_aggr_kprobe(p);
+
+ if (!kprobes_all_disarmed && kprobe_disabled(p))
+ disarm_kprobe(p);
+out:
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(disable_kprobe);
+
+/* Enable one kprobe */
+int __kprobes enable_kprobe(struct kprobe *kp)
+{
+ int ret = 0;
+ struct kprobe *p;
+
+ mutex_lock(&kprobe_mutex);
+
+ /* Check whether specified probe is valid. */
+ p = __get_valid_kprobe(kp);
+ if (unlikely(p == NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (kprobe_gone(kp)) {
+ /* This kprobe has gone, we couldn't enable it. */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (p != kp)
+ kp->flags &= ~KPROBE_FLAG_DISABLED;
+
+ if (!kprobes_all_disarmed && kprobe_disabled(p)) {
+ p->flags &= ~KPROBE_FLAG_DISABLED;
+ arm_kprobe(p);
+ }
+out:
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(enable_kprobe);
+
void __kprobes dump_kprobe(struct kprobe *kp)
{
printk(KERN_WARNING "Dumping kprobe:\n");
@@ -1805,72 +1871,6 @@ static const struct file_operations debugfs_kprobes_operations = {
.release = seq_release,
};
-/* Disable one kprobe */
-int __kprobes disable_kprobe(struct kprobe *kp)
-{
- int ret = 0;
- struct kprobe *p;
-
- mutex_lock(&kprobe_mutex);
-
- /* Check whether specified probe is valid. */
- p = __get_valid_kprobe(kp);
- if (unlikely(p == NULL)) {
- ret = -EINVAL;
- goto out;
- }
-
- /* If the probe is already disabled (or gone), just return */
- if (kprobe_disabled(kp))
- goto out;
-
- kp->flags |= KPROBE_FLAG_DISABLED;
- if (p != kp)
- /* When kp != p, p is always enabled. */
- try_to_disable_aggr_kprobe(p);
-
- if (!kprobes_all_disarmed && kprobe_disabled(p))
- disarm_kprobe(p);
-out:
- mutex_unlock(&kprobe_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(disable_kprobe);
-
-/* Enable one kprobe */
-int __kprobes enable_kprobe(struct kprobe *kp)
-{
- int ret = 0;
- struct kprobe *p;
-
- mutex_lock(&kprobe_mutex);
-
- /* Check whether specified probe is valid. */
- p = __get_valid_kprobe(kp);
- if (unlikely(p == NULL)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (kprobe_gone(kp)) {
- /* This kprobe has gone, we couldn't enable it. */
- ret = -EINVAL;
- goto out;
- }
-
- if (p != kp)
- kp->flags &= ~KPROBE_FLAG_DISABLED;
-
- if (!kprobes_all_disarmed && kprobe_disabled(p)) {
- p->flags &= ~KPROBE_FLAG_DISABLED;
- arm_kprobe(p);
- }
-out:
- mutex_unlock(&kprobe_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(enable_kprobe);
-
static void __kprobes arm_all_kprobes(void)
{
struct hlist_head *head;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 21fe3c42694..0b624e79180 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak));
extern const void __stop_notes __attribute__((weak));
#define notes_size (&__stop_notes - &__start_notes)
-static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+static ssize_t notes_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
memcpy(buf, &__start_notes + off, count);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2594e1ce41c..54286798c37 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -431,20 +431,7 @@ static struct stack_trace lockdep_init_trace = {
/*
* Various lockdep statistics:
*/
-atomic_t chain_lookup_hits;
-atomic_t chain_lookup_misses;
-atomic_t hardirqs_on_events;
-atomic_t hardirqs_off_events;
-atomic_t redundant_hardirqs_on;
-atomic_t redundant_hardirqs_off;
-atomic_t softirqs_on_events;
-atomic_t softirqs_off_events;
-atomic_t redundant_softirqs_on;
-atomic_t redundant_softirqs_off;
-atomic_t nr_unused_locks;
-atomic_t nr_cyclic_checks;
-atomic_t nr_find_usage_forwards_checks;
-atomic_t nr_find_usage_backwards_checks;
+DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
#endif
/*
@@ -748,7 +735,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
return NULL;
}
class = lock_classes + nr_lock_classes++;
- debug_atomic_inc(&nr_unused_locks);
+ debug_atomic_inc(nr_unused_locks);
class->key = key;
class->name = lock->name;
class->subclass = subclass;
@@ -818,7 +805,8 @@ static struct lock_list *alloc_list_entry(void)
* Add a new dependency to the head of the list:
*/
static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
- struct list_head *head, unsigned long ip, int distance)
+ struct list_head *head, unsigned long ip,
+ int distance, struct stack_trace *trace)
{
struct lock_list *entry;
/*
@@ -829,11 +817,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
if (!entry)
return 0;
- if (!save_trace(&entry->trace))
- return 0;
-
entry->class = this;
entry->distance = distance;
+ entry->trace = *trace;
/*
* Since we never remove from the dependency list, the list can
* be walked lockless by other CPUs, it's only allocation
@@ -1205,7 +1191,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
{
int result;
- debug_atomic_inc(&nr_cyclic_checks);
+ debug_atomic_inc(nr_cyclic_checks);
result = __bfs_forwards(root, target, class_equal, target_entry);
@@ -1242,7 +1228,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
{
int result;
- debug_atomic_inc(&nr_find_usage_forwards_checks);
+ debug_atomic_inc(nr_find_usage_forwards_checks);
result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
@@ -1265,7 +1251,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
{
int result;
- debug_atomic_inc(&nr_find_usage_backwards_checks);
+ debug_atomic_inc(nr_find_usage_backwards_checks);
result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
@@ -1635,12 +1621,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
*/
static int
check_prev_add(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next, int distance)
+ struct held_lock *next, int distance, int trylock_loop)
{
struct lock_list *entry;
int ret;
struct lock_list this;
struct lock_list *uninitialized_var(target_entry);
+ /*
+ * Static variable, serialized by the graph_lock().
+ *
+ * We use this static variable to save the stack trace in case
+ * we call into this function multiple times due to encountering
+ * trylocks in the held lock stack.
+ */
+ static struct stack_trace trace;
/*
* Prove that the new <prev> -> <next> dependency would not
@@ -1688,20 +1682,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
}
}
+ if (!trylock_loop && !save_trace(&trace))
+ return 0;
+
/*
* Ok, all validations passed, add the new lock
* to the previous lock's dependency list:
*/
ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
&hlock_class(prev)->locks_after,
- next->acquire_ip, distance);
+ next->acquire_ip, distance, &trace);
if (!ret)
return 0;
ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
&hlock_class(next)->locks_before,
- next->acquire_ip, distance);
+ next->acquire_ip, distance, &trace);
if (!ret)
return 0;
@@ -1731,6 +1728,7 @@ static int
check_prevs_add(struct task_struct *curr, struct held_lock *next)
{
int depth = curr->lockdep_depth;
+ int trylock_loop = 0;
struct held_lock *hlock;
/*
@@ -1756,7 +1754,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
* added:
*/
if (hlock->read != 2) {
- if (!check_prev_add(curr, hlock, next, distance))
+ if (!check_prev_add(curr, hlock, next,
+ distance, trylock_loop))
return 0;
/*
* Stop after the first non-trylock entry,
@@ -1779,6 +1778,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
if (curr->held_locks[depth].irq_context !=
curr->held_locks[depth-1].irq_context)
break;
+ trylock_loop = 1;
}
return 1;
out_bug:
@@ -1825,7 +1825,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
list_for_each_entry(chain, hash_head, entry) {
if (chain->chain_key == chain_key) {
cache_hit:
- debug_atomic_inc(&chain_lookup_hits);
+ debug_atomic_inc(chain_lookup_hits);
if (very_verbose(class))
printk("\nhash chain already cached, key: "
"%016Lx tail class: [%p] %s\n",
@@ -1890,7 +1890,7 @@ cache_hit:
chain_hlocks[chain->base + j] = class - lock_classes;
}
list_add_tail_rcu(&chain->entry, hash_head);
- debug_atomic_inc(&chain_lookup_misses);
+ debug_atomic_inc(chain_lookup_misses);
inc_chains();
return 1;
@@ -2311,7 +2311,12 @@ void trace_hardirqs_on_caller(unsigned long ip)
return;
if (unlikely(curr->hardirqs_enabled)) {
- debug_atomic_inc(&redundant_hardirqs_on);
+ /*
+ * Neither irq nor preemption are disabled here
+ * so this is racy by nature but loosing one hit
+ * in a stat is not a big deal.
+ */
+ __debug_atomic_inc(redundant_hardirqs_on);
return;
}
/* we'll do an OFF -> ON transition: */
@@ -2338,7 +2343,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
- debug_atomic_inc(&hardirqs_on_events);
+ debug_atomic_inc(hardirqs_on_events);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
@@ -2370,9 +2375,9 @@ void trace_hardirqs_off_caller(unsigned long ip)
curr->hardirqs_enabled = 0;
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
- debug_atomic_inc(&hardirqs_off_events);
+ debug_atomic_inc(hardirqs_off_events);
} else
- debug_atomic_inc(&redundant_hardirqs_off);
+ debug_atomic_inc(redundant_hardirqs_off);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
@@ -2396,7 +2401,7 @@ void trace_softirqs_on(unsigned long ip)
return;
if (curr->softirqs_enabled) {
- debug_atomic_inc(&redundant_softirqs_on);
+ debug_atomic_inc(redundant_softirqs_on);
return;
}
@@ -2406,7 +2411,7 @@ void trace_softirqs_on(unsigned long ip)
curr->softirqs_enabled = 1;
curr->softirq_enable_ip = ip;
curr->softirq_enable_event = ++curr->irq_events;
- debug_atomic_inc(&softirqs_on_events);
+ debug_atomic_inc(softirqs_on_events);
/*
* We are going to turn softirqs on, so set the
* usage bit for all held locks, if hardirqs are
@@ -2436,10 +2441,10 @@ void trace_softirqs_off(unsigned long ip)
curr->softirqs_enabled = 0;
curr->softirq_disable_ip = ip;
curr->softirq_disable_event = ++curr->irq_events;
- debug_atomic_inc(&softirqs_off_events);
+ debug_atomic_inc(softirqs_off_events);
DEBUG_LOCKS_WARN_ON(!softirq_count());
} else
- debug_atomic_inc(&redundant_softirqs_off);
+ debug_atomic_inc(redundant_softirqs_off);
}
static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
@@ -2644,7 +2649,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
break;
case LOCK_USED:
- debug_atomic_dec(&nr_unused_locks);
+ debug_atomic_dec(nr_unused_locks);
break;
default:
if (!debug_locks_off_graph_unlock())
@@ -2706,6 +2711,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
}
EXPORT_SYMBOL_GPL(lockdep_init_map);
+struct lock_class_key __lockdep_no_validate__;
+
/*
* This gets called for every mutex_lock*()/spin_lock*() operation.
* We maintain the dependency maps and validate the locking attempt:
@@ -2740,6 +2747,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
return 0;
}
+ if (lock->key == &__lockdep_no_validate__)
+ check = 1;
+
if (!subclass)
class = lock->class_cache;
/*
@@ -2750,7 +2760,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!class)
return 0;
}
- debug_atomic_inc((atomic_t *)&class->ops);
+ atomic_inc((atomic_t *)&class->ops);
if (very_verbose(class)) {
printk("\nacquire class [%p] %s", class->key, class->name);
if (class->name_version > 1)
@@ -3227,7 +3237,7 @@ void lock_release(struct lockdep_map *lock, int nested,
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
- trace_lock_release(lock, nested, ip);
+ trace_lock_release(lock, ip);
__lock_release(lock, nested, ip);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
@@ -3380,7 +3390,7 @@ found_it:
hlock->holdtime_stamp = now;
}
- trace_lock_acquired(lock, ip, waittime);
+ trace_lock_acquired(lock, ip);
stats = get_lock_stats(hlock_class(hlock));
if (waittime) {
@@ -3801,8 +3811,11 @@ void lockdep_rcu_dereference(const char *file, const int line)
{
struct task_struct *curr = current;
+#ifndef CONFIG_PROVE_RCU_REPEATEDLY
if (!debug_locks_off())
return;
+#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
+ /* Note: the following can be executed concurrently, so be careful. */
printk("\n===================================================\n");
printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
printk( "---------------------------------------------------\n");
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index a2ee95ad131..4f560cfedc8 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -110,30 +110,60 @@ lockdep_count_backward_deps(struct lock_class *class)
#endif
#ifdef CONFIG_DEBUG_LOCKDEP
+
+#include <asm/local.h>
/*
- * Various lockdep statistics:
+ * Various lockdep statistics.
+ * We want them per cpu as they are often accessed in fast path
+ * and we want to avoid too much cache bouncing.
*/
-extern atomic_t chain_lookup_hits;
-extern atomic_t chain_lookup_misses;
-extern atomic_t hardirqs_on_events;
-extern atomic_t hardirqs_off_events;
-extern atomic_t redundant_hardirqs_on;
-extern atomic_t redundant_hardirqs_off;
-extern atomic_t softirqs_on_events;
-extern atomic_t softirqs_off_events;
-extern atomic_t redundant_softirqs_on;
-extern atomic_t redundant_softirqs_off;
-extern atomic_t nr_unused_locks;
-extern atomic_t nr_cyclic_checks;
-extern atomic_t nr_cyclic_check_recursions;
-extern atomic_t nr_find_usage_forwards_checks;
-extern atomic_t nr_find_usage_forwards_recursions;
-extern atomic_t nr_find_usage_backwards_checks;
-extern atomic_t nr_find_usage_backwards_recursions;
-# define debug_atomic_inc(ptr) atomic_inc(ptr)
-# define debug_atomic_dec(ptr) atomic_dec(ptr)
-# define debug_atomic_read(ptr) atomic_read(ptr)
+struct lockdep_stats {
+ int chain_lookup_hits;
+ int chain_lookup_misses;
+ int hardirqs_on_events;
+ int hardirqs_off_events;
+ int redundant_hardirqs_on;
+ int redundant_hardirqs_off;
+ int softirqs_on_events;
+ int softirqs_off_events;
+ int redundant_softirqs_on;
+ int redundant_softirqs_off;
+ int nr_unused_locks;
+ int nr_cyclic_checks;
+ int nr_cyclic_check_recursions;
+ int nr_find_usage_forwards_checks;
+ int nr_find_usage_forwards_recursions;
+ int nr_find_usage_backwards_checks;
+ int nr_find_usage_backwards_recursions;
+};
+
+DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
+
+#define __debug_atomic_inc(ptr) \
+ this_cpu_inc(lockdep_stats.ptr);
+
+#define debug_atomic_inc(ptr) { \
+ WARN_ON_ONCE(!irqs_disabled()); \
+ __this_cpu_inc(lockdep_stats.ptr); \
+}
+
+#define debug_atomic_dec(ptr) { \
+ WARN_ON_ONCE(!irqs_disabled()); \
+ __this_cpu_dec(lockdep_stats.ptr); \
+}
+
+#define debug_atomic_read(ptr) ({ \
+ struct lockdep_stats *__cpu_lockdep_stats; \
+ unsigned long long __total = 0; \
+ int __cpu; \
+ for_each_possible_cpu(__cpu) { \
+ __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \
+ __total += __cpu_lockdep_stats->ptr; \
+ } \
+ __total; \
+})
#else
+# define __debug_atomic_inc(ptr) do { } while (0)
# define debug_atomic_inc(ptr) do { } while (0)
# define debug_atomic_dec(ptr) do { } while (0)
# define debug_atomic_read(ptr) 0
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d4aba4f3584..59b76c8ce9d 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = {
static void lockdep_stats_debug_show(struct seq_file *m)
{
#ifdef CONFIG_DEBUG_LOCKDEP
- unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
- hi2 = debug_atomic_read(&hardirqs_off_events),
- hr1 = debug_atomic_read(&redundant_hardirqs_on),
- hr2 = debug_atomic_read(&redundant_hardirqs_off),
- si1 = debug_atomic_read(&softirqs_on_events),
- si2 = debug_atomic_read(&softirqs_off_events),
- sr1 = debug_atomic_read(&redundant_softirqs_on),
- sr2 = debug_atomic_read(&redundant_softirqs_off);
-
- seq_printf(m, " chain lookup misses: %11u\n",
- debug_atomic_read(&chain_lookup_misses));
- seq_printf(m, " chain lookup hits: %11u\n",
- debug_atomic_read(&chain_lookup_hits));
- seq_printf(m, " cyclic checks: %11u\n",
- debug_atomic_read(&nr_cyclic_checks));
- seq_printf(m, " find-mask forwards checks: %11u\n",
- debug_atomic_read(&nr_find_usage_forwards_checks));
- seq_printf(m, " find-mask backwards checks: %11u\n",
- debug_atomic_read(&nr_find_usage_backwards_checks));
-
- seq_printf(m, " hardirq on events: %11u\n", hi1);
- seq_printf(m, " hardirq off events: %11u\n", hi2);
- seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
- seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
- seq_printf(m, " softirq on events: %11u\n", si1);
- seq_printf(m, " softirq off events: %11u\n", si2);
- seq_printf(m, " redundant softirq ons: %11u\n", sr1);
- seq_printf(m, " redundant softirq offs: %11u\n", sr2);
+ unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
+ hi2 = debug_atomic_read(hardirqs_off_events),
+ hr1 = debug_atomic_read(redundant_hardirqs_on),
+ hr2 = debug_atomic_read(redundant_hardirqs_off),
+ si1 = debug_atomic_read(softirqs_on_events),
+ si2 = debug_atomic_read(softirqs_off_events),
+ sr1 = debug_atomic_read(redundant_softirqs_on),
+ sr2 = debug_atomic_read(redundant_softirqs_off);
+
+ seq_printf(m, " chain lookup misses: %11llu\n",
+ debug_atomic_read(chain_lookup_misses));
+ seq_printf(m, " chain lookup hits: %11llu\n",
+ debug_atomic_read(chain_lookup_hits));
+ seq_printf(m, " cyclic checks: %11llu\n",
+ debug_atomic_read(nr_cyclic_checks));
+ seq_printf(m, " find-mask forwards checks: %11llu\n",
+ debug_atomic_read(nr_find_usage_forwards_checks));
+ seq_printf(m, " find-mask backwards checks: %11llu\n",
+ debug_atomic_read(nr_find_usage_backwards_checks));
+
+ seq_printf(m, " hardirq on events: %11llu\n", hi1);
+ seq_printf(m, " hardirq off events: %11llu\n", hi2);
+ seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
+ seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
+ seq_printf(m, " softirq on events: %11llu\n", si1);
+ seq_printf(m, " softirq off events: %11llu\n", si2);
+ seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
+ seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
#endif
}
@@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#endif
}
#ifdef CONFIG_DEBUG_LOCKDEP
- DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
+ DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
#endif
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
nr_lock_classes, MAX_LOCKDEP_KEYS);
diff --git a/kernel/module.c b/kernel/module.c
index 1016b75b026..333fbcc9697 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -59,8 +59,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/module.h>
-EXPORT_TRACEPOINT_SYMBOL(module_get);
-
#if 0
#define DEBUGP printk
#else
@@ -79,6 +77,10 @@ EXPORT_TRACEPOINT_SYMBOL(module_get);
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
+#ifdef CONFIG_KGDB_KDB
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_KGDB_KDB */
+
/* Block module loading/unloading? */
int modules_disabled = 0;
@@ -178,8 +180,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
extern const struct kernel_symbol __stop___ksymtab_gpl[];
extern const struct kernel_symbol __start___ksymtab_gpl_future[];
extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
-extern const struct kernel_symbol __start___ksymtab_gpl_future[];
-extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
extern const unsigned long __start___kcrctab[];
extern const unsigned long __start___kcrctab_gpl[];
extern const unsigned long __start___kcrctab_gpl_future[];
@@ -515,6 +515,9 @@ MODINFO_ATTR(srcversion);
static char last_unloaded_module[MODULE_NAME_LEN+1];
#ifdef CONFIG_MODULE_UNLOAD
+
+EXPORT_TRACEPOINT_SYMBOL(module_get);
+
/* Init the unload section of the module. */
static void module_unload_init(struct module *mod)
{
@@ -723,16 +726,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';
- /* Create stop_machine threads since free_module relies on
- * a non-failing stop_machine call. */
- ret = stop_machine_create();
- if (ret)
- return ret;
-
- if (mutex_lock_interruptible(&module_mutex) != 0) {
- ret = -EINTR;
- goto out_stop;
- }
+ if (mutex_lock_interruptible(&module_mutex) != 0)
+ return -EINTR;
mod = find_module(name);
if (!mod) {
@@ -792,8 +787,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
out:
mutex_unlock(&module_mutex);
-out_stop:
- stop_machine_destroy();
return ret;
}
@@ -867,8 +860,7 @@ void module_put(struct module *module)
smp_wmb(); /* see comment in module_refcount */
__this_cpu_inc(module->refptr->decs);
- trace_module_put(module, _RET_IP_,
- __this_cpu_read(module->refptr->decs));
+ trace_module_put(module, _RET_IP_);
/* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
@@ -1192,7 +1184,7 @@ struct module_notes_attrs {
struct bin_attribute attrs[0];
};
-static ssize_t module_notes_read(struct kobject *kobj,
+static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
diff --git a/kernel/padata.c b/kernel/padata.c
index fd03513c732..fdd8ae609ce 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -29,7 +29,7 @@
#include <linux/rcupdate.h>
#define MAX_SEQ_NR INT_MAX - NR_CPUS
-#define MAX_OBJ_NUM 10000 * NR_CPUS
+#define MAX_OBJ_NUM 1000
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
@@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work)
local_bh_enable();
}
-/*
+/**
* padata_do_parallel - padata parallelization function
*
* @pinst: padata instance
@@ -152,6 +152,23 @@ out:
}
EXPORT_SYMBOL(padata_do_parallel);
+/*
+ * padata_get_next - Get the next object that needs serialization.
+ *
+ * Return values are:
+ *
+ * A pointer to the control struct of the next object that needs
+ * serialization, if present in one of the percpu reorder queues.
+ *
+ * NULL, if all percpu reorder queues are empty.
+ *
+ * -EINPROGRESS, if the next object that needs serialization will
+ * be parallel processed by another cpu and is not yet present in
+ * the cpu's reorder queue.
+ *
+ * -ENODATA, if this cpu has to do the parallel processing for
+ * the next object.
+ */
static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
/*
* Calculate the seq_nr of the object that should be
- * next in this queue.
+ * next in this reorder queue.
*/
overrun = 0;
calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -231,7 +248,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
goto out;
}
- if (next_nr % num_cpus == next_queue->cpu_index) {
+ queue = per_cpu_ptr(pd->queue, smp_processor_id());
+ if (queue->cpu_index == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
goto out;
}
@@ -247,19 +265,40 @@ static void padata_reorder(struct parallel_data *pd)
struct padata_queue *queue;
struct padata_instance *pinst = pd->pinst;
-try_again:
+ /*
+ * We need to ensure that only one cpu can work on dequeueing of
+ * the reorder queue the time. Calculating in which percpu reorder
+ * queue the next object will arrive takes some time. A spinlock
+ * would be highly contended. Also it is not clear in which order
+ * the objects arrive to the reorder queues. So a cpu could wait to
+ * get the lock just to notice that there is nothing to do at the
+ * moment. Therefore we use a trylock and let the holder of the lock
+ * care for all the objects enqueued during the holdtime of the lock.
+ */
if (!spin_trylock_bh(&pd->lock))
- goto out;
+ return;
while (1) {
padata = padata_get_next(pd);
+ /*
+ * All reorder queues are empty, or the next object that needs
+ * serialization is parallel processed by another cpu and is
+ * still on it's way to the cpu's reorder queue, nothing to
+ * do for now.
+ */
if (!padata || PTR_ERR(padata) == -EINPROGRESS)
break;
+ /*
+ * This cpu has to do the parallel processing of the next
+ * object. It's waiting in the cpu's parallelization queue,
+ * so exit imediately.
+ */
if (PTR_ERR(padata) == -ENODATA) {
+ del_timer(&pd->timer);
spin_unlock_bh(&pd->lock);
- goto out;
+ return;
}
queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
@@ -273,13 +312,27 @@ try_again:
spin_unlock_bh(&pd->lock);
- if (atomic_read(&pd->reorder_objects))
- goto try_again;
+ /*
+ * The next object that needs serialization might have arrived to
+ * the reorder queues in the meantime, we will be called again
+ * from the timer function if noone else cares for it.
+ */
+ if (atomic_read(&pd->reorder_objects)
+ && !(pinst->flags & PADATA_RESET))
+ mod_timer(&pd->timer, jiffies + HZ);
+ else
+ del_timer(&pd->timer);
-out:
return;
}
+static void padata_reorder_timer(unsigned long arg)
+{
+ struct parallel_data *pd = (struct parallel_data *)arg;
+
+ padata_reorder(pd);
+}
+
static void padata_serial_worker(struct work_struct *work)
{
struct padata_queue *queue;
@@ -308,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work)
local_bh_enable();
}
-/*
+/**
* padata_do_serial - padata serialization function
*
* @padata: object to be serialized.
@@ -338,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
}
EXPORT_SYMBOL(padata_do_serial);
+/* Allocate and initialize the internal cpumask dependend resources. */
static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
const struct cpumask *cpumask)
{
@@ -358,17 +412,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
goto err_free_queue;
- for_each_possible_cpu(cpu) {
+ cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+
+ for_each_cpu(cpu, pd->cpumask) {
queue = per_cpu_ptr(pd->queue, cpu);
queue->pd = pd;
- if (cpumask_test_cpu(cpu, cpumask)
- && cpumask_test_cpu(cpu, cpu_active_mask)) {
- queue->cpu_index = cpu_index;
- cpu_index++;
- } else
- queue->cpu_index = -1;
+ queue->cpu_index = cpu_index;
+ cpu_index++;
INIT_LIST_HEAD(&queue->reorder.list);
INIT_LIST_HEAD(&queue->parallel.list);
@@ -382,11 +434,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
atomic_set(&queue->num_obj, 0);
}
- cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
-
num_cpus = cpumask_weight(pd->cpumask);
pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+ setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
atomic_set(&pd->refcnt, 0);
@@ -410,6 +461,31 @@ static void padata_free_pd(struct parallel_data *pd)
kfree(pd);
}
+/* Flush all objects out of the padata queues. */
+static void padata_flush_queues(struct parallel_data *pd)
+{
+ int cpu;
+ struct padata_queue *queue;
+
+ for_each_cpu(cpu, pd->cpumask) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+ flush_work(&queue->pwork);
+ }
+
+ del_timer_sync(&pd->timer);
+
+ if (atomic_read(&pd->reorder_objects))
+ padata_reorder(pd);
+
+ for_each_cpu(cpu, pd->cpumask) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+ flush_work(&queue->swork);
+ }
+
+ BUG_ON(atomic_read(&pd->refcnt) != 0);
+}
+
+/* Replace the internal control stucture with a new one. */
static void padata_replace(struct padata_instance *pinst,
struct parallel_data *pd_new)
{
@@ -421,17 +497,13 @@ static void padata_replace(struct padata_instance *pinst,
synchronize_rcu();
- while (atomic_read(&pd_old->refcnt) != 0)
- yield();
-
- flush_workqueue(pinst->wq);
-
+ padata_flush_queues(pd_old);
padata_free_pd(pd_old);
pinst->flags &= ~PADATA_RESET;
}
-/*
+/**
* padata_set_cpumask - set the cpumask that padata should use
*
* @pinst: padata instance
@@ -443,10 +515,10 @@ int padata_set_cpumask(struct padata_instance *pinst,
struct parallel_data *pd;
int err = 0;
- might_sleep();
-
mutex_lock(&pinst->lock);
+ get_online_cpus();
+
pd = padata_alloc_pd(pinst, cpumask);
if (!pd) {
err = -ENOMEM;
@@ -458,6 +530,8 @@ int padata_set_cpumask(struct padata_instance *pinst,
padata_replace(pinst, pd);
out:
+ put_online_cpus();
+
mutex_unlock(&pinst->lock);
return err;
@@ -479,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
return 0;
}
-/*
+/**
* padata_add_cpu - add a cpu to the padata cpumask
*
* @pinst: padata instance
@@ -489,12 +563,12 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu)
{
int err;
- might_sleep();
-
mutex_lock(&pinst->lock);
+ get_online_cpus();
cpumask_set_cpu(cpu, pinst->cpumask);
err = __padata_add_cpu(pinst, cpu);
+ put_online_cpus();
mutex_unlock(&pinst->lock);
@@ -517,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
return 0;
}
-/*
+/**
* padata_remove_cpu - remove a cpu from the padata cpumask
*
* @pinst: padata instance
@@ -527,12 +601,12 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
{
int err;
- might_sleep();
-
mutex_lock(&pinst->lock);
+ get_online_cpus();
cpumask_clear_cpu(cpu, pinst->cpumask);
err = __padata_remove_cpu(pinst, cpu);
+ put_online_cpus();
mutex_unlock(&pinst->lock);
@@ -540,38 +614,35 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
}
EXPORT_SYMBOL(padata_remove_cpu);
-/*
+/**
* padata_start - start the parallel processing
*
* @pinst: padata instance to start
*/
void padata_start(struct padata_instance *pinst)
{
- might_sleep();
-
mutex_lock(&pinst->lock);
pinst->flags |= PADATA_INIT;
mutex_unlock(&pinst->lock);
}
EXPORT_SYMBOL(padata_start);
-/*
+/**
* padata_stop - stop the parallel processing
*
* @pinst: padata instance to stop
*/
void padata_stop(struct padata_instance *pinst)
{
- might_sleep();
-
mutex_lock(&pinst->lock);
pinst->flags &= ~PADATA_INIT;
mutex_unlock(&pinst->lock);
}
EXPORT_SYMBOL(padata_stop);
-static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static int padata_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
{
int err;
struct padata_instance *pinst;
@@ -588,7 +659,7 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
err = __padata_add_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
if (err)
- return NOTIFY_BAD;
+ return notifier_from_errno(err);
break;
case CPU_DOWN_PREPARE:
@@ -599,7 +670,7 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
err = __padata_remove_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
if (err)
- return NOTIFY_BAD;
+ return notifier_from_errno(err);
break;
case CPU_UP_CANCELED:
@@ -621,8 +692,9 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
+#endif
-/*
+/**
* padata_alloc - allocate and initialize a padata instance
*
* @cpumask: cpumask that padata uses for parallelization
@@ -631,7 +703,6 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
struct padata_instance *padata_alloc(const struct cpumask *cpumask,
struct workqueue_struct *wq)
{
- int err;
struct padata_instance *pinst;
struct parallel_data *pd;
@@ -639,6 +710,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
if (!pinst)
goto err;
+ get_online_cpus();
+
pd = padata_alloc_pd(pinst, cpumask);
if (!pd)
goto err_free_inst;
@@ -654,31 +727,32 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
pinst->flags = 0;
+#ifdef CONFIG_HOTPLUG_CPU
pinst->cpu_notifier.notifier_call = padata_cpu_callback;
pinst->cpu_notifier.priority = 0;
- err = register_hotcpu_notifier(&pinst->cpu_notifier);
- if (err)
- goto err_free_cpumask;
+ register_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
+ put_online_cpus();
mutex_init(&pinst->lock);
return pinst;
-err_free_cpumask:
- free_cpumask_var(pinst->cpumask);
err_free_pd:
padata_free_pd(pd);
err_free_inst:
kfree(pinst);
+ put_online_cpus();
err:
return NULL;
}
EXPORT_SYMBOL(padata_alloc);
-/*
+/**
* padata_free - free a padata instance
*
- * @ padata_inst: padata instance to free
+ * @padata_inst: padata instance to free
*/
void padata_free(struct padata_instance *pinst)
{
@@ -686,10 +760,13 @@ void padata_free(struct padata_instance *pinst)
synchronize_rcu();
- while (atomic_read(&pinst->pd->refcnt) != 0)
- yield();
-
+#ifdef CONFIG_HOTPLUG_CPU
unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+ get_online_cpus();
+ padata_flush_queues(pinst->pd);
+ put_online_cpus();
+
padata_free_pd(pinst->pd);
free_cpumask_var(pinst->cpumask);
kfree(pinst);
diff --git a/kernel/panic.c b/kernel/panic.c
index 13d966b4c14..3b16cd93fa7 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -87,6 +87,7 @@ NORET_TYPE void panic(const char * fmt, ...)
*/
preempt_disable();
+ console_verbose();
bust_spinlocks(1);
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
@@ -178,6 +179,7 @@ static const struct tnt tnts[] = {
{ TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' },
{ TAINT_WARN, 'W', ' ' },
{ TAINT_CRAP, 'C', ' ' },
+ { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' },
};
/**
@@ -194,6 +196,7 @@ static const struct tnt tnts[] = {
* 'A' - ACPI table overridden.
* 'W' - Taint on warning.
* 'C' - modules from drivers/staging are loaded.
+ * 'I' - Working around severe firmware bug.
*
* The string is overwritten by the next call to print_tainted().
*/
@@ -365,7 +368,8 @@ struct slowpath_args {
va_list args;
};
-static void warn_slowpath_common(const char *file, int line, void *caller, struct slowpath_args *args)
+static void warn_slowpath_common(const char *file, int line, void *caller,
+ unsigned taint, struct slowpath_args *args)
{
const char *board;
@@ -381,7 +385,7 @@ static void warn_slowpath_common(const char *file, int line, void *caller, struc
print_modules();
dump_stack();
print_oops_end_marker();
- add_taint(TAINT_WARN);
+ add_taint(taint);
}
void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
@@ -390,14 +394,29 @@ void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
args.fmt = fmt;
va_start(args.args, fmt);
- warn_slowpath_common(file, line, __builtin_return_address(0), &args);
+ warn_slowpath_common(file, line, __builtin_return_address(0),
+ TAINT_WARN, &args);
va_end(args.args);
}
EXPORT_SYMBOL(warn_slowpath_fmt);
+void warn_slowpath_fmt_taint(const char *file, int line,
+ unsigned taint, const char *fmt, ...)
+{
+ struct slowpath_args args;
+
+ args.fmt = fmt;
+ va_start(args.args, fmt);
+ warn_slowpath_common(file, line, __builtin_return_address(0),
+ taint, &args);
+ va_end(args.args);
+}
+EXPORT_SYMBOL(warn_slowpath_fmt_taint);
+
void warn_slowpath_null(const char *file, int line)
{
- warn_slowpath_common(file, line, __builtin_return_address(0), NULL);
+ warn_slowpath_common(file, line, __builtin_return_address(0),
+ TAINT_WARN, NULL);
}
EXPORT_SYMBOL(warn_slowpath_null);
#endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3d1552d3c12..bd7ce8ca5bb 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -16,6 +16,7 @@
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
+#include <linux/hash.h>
#include <linux/sysfs.h>
#include <linux/dcache.h>
#include <linux/percpu.h>
@@ -82,14 +83,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
-int __weak
-hw_perf_group_sched_in(struct perf_event *group_leader,
- struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- return 0;
-}
-
void __weak perf_event_print_debug(void) { }
static DEFINE_PER_CPU(int, perf_disable_count);
@@ -262,6 +255,18 @@ static void update_event_times(struct perf_event *event)
event->total_time_running = run_end - event->tstamp_running;
}
+/*
+ * Update total_time_enabled and total_time_running for all events in a group.
+ */
+static void update_group_times(struct perf_event *leader)
+{
+ struct perf_event *event;
+
+ update_event_times(leader);
+ list_for_each_entry(event, &leader->sibling_list, group_entry)
+ update_event_times(event);
+}
+
static struct list_head *
ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
{
@@ -315,8 +320,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
- struct perf_event *sibling, *tmp;
-
if (list_empty(&event->group_entry))
return;
ctx->nr_events--;
@@ -329,7 +332,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
if (event->group_leader != event)
event->group_leader->nr_siblings--;
- update_event_times(event);
+ update_group_times(event);
/*
* If event was in error state, then keep it
@@ -340,6 +343,12 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
*/
if (event->state > PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_OFF;
+}
+
+static void
+perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
+{
+ struct perf_event *sibling, *tmp;
/*
* If this was a group event with sibling events then
@@ -505,18 +514,6 @@ retry:
}
/*
- * Update total_time_enabled and total_time_running for all events in a group.
- */
-static void update_group_times(struct perf_event *leader)
-{
- struct perf_event *event;
-
- update_event_times(leader);
- list_for_each_entry(event, &leader->sibling_list, group_entry)
- update_event_times(event);
-}
-
-/*
* Cross CPU call to disable a performance event
*/
static void __perf_event_disable(void *info)
@@ -640,15 +637,20 @@ group_sched_in(struct perf_event *group_event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
- struct perf_event *event, *partial_group;
+ struct perf_event *event, *partial_group = NULL;
+ const struct pmu *pmu = group_event->pmu;
+ bool txn = false;
int ret;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
- ret = hw_perf_group_sched_in(group_event, cpuctx, ctx);
- if (ret)
- return ret < 0 ? ret : 0;
+ /* Check if group transaction availabe */
+ if (pmu->start_txn)
+ txn = true;
+
+ if (txn)
+ pmu->start_txn(pmu);
if (event_sched_in(group_event, cpuctx, ctx))
return -EAGAIN;
@@ -663,9 +665,19 @@ group_sched_in(struct perf_event *group_event,
}
}
- return 0;
+ if (!txn)
+ return 0;
+
+ ret = pmu->commit_txn(pmu);
+ if (!ret) {
+ pmu->cancel_txn(pmu);
+ return 0;
+ }
group_error:
+ if (txn)
+ pmu->cancel_txn(pmu);
+
/*
* Groups can be scheduled in as one unit only, so undo any
* partial group before returning:
@@ -1367,6 +1379,8 @@ void perf_event_task_sched_in(struct task_struct *task)
if (cpuctx->task_ctx == ctx)
return;
+ perf_disable();
+
/*
* We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned,
@@ -1379,6 +1393,8 @@ void perf_event_task_sched_in(struct task_struct *task)
ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
cpuctx->task_ctx = ctx;
+
+ perf_enable();
}
#define MAX_INTERRUPTS (~0ULL)
@@ -1856,9 +1872,30 @@ int perf_event_release_kernel(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
+ /*
+ * Remove from the PMU, can't get re-enabled since we got
+ * here because the last ref went.
+ */
+ perf_event_disable(event);
+
WARN_ON_ONCE(ctx->parent_ctx);
- mutex_lock(&ctx->mutex);
- perf_event_remove_from_context(event);
+ /*
+ * There are two ways this annotation is useful:
+ *
+ * 1) there is a lock recursion from perf_event_exit_task
+ * see the comment there.
+ *
+ * 2) there is a lock-inversion with mmap_sem through
+ * perf_event_read_group(), which takes faults while
+ * holding ctx->mutex, however this is called after
+ * the last filedesc died, so there is no possibility
+ * to trigger the AB-BA case.
+ */
+ mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
+ raw_spin_lock_irq(&ctx->lock);
+ list_del_event(event, ctx);
+ perf_destroy_group(event, ctx);
+ raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);
mutex_lock(&event->owner->perf_event_mutex);
@@ -2260,11 +2297,6 @@ unlock:
rcu_read_unlock();
}
-static unsigned long perf_data_size(struct perf_mmap_data *data)
-{
- return data->nr_pages << (PAGE_SHIFT + data->data_order);
-}
-
#ifndef CONFIG_PERF_USE_VMALLOC
/*
@@ -2283,6 +2315,19 @@ perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
return virt_to_page(data->data_pages[pgoff - 1]);
}
+static void *perf_mmap_alloc_page(int cpu)
+{
+ struct page *page;
+ int node;
+
+ node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
static struct perf_mmap_data *
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
{
@@ -2299,17 +2344,16 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
if (!data)
goto fail;
- data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+ data->user_page = perf_mmap_alloc_page(event->cpu);
if (!data->user_page)
goto fail_user_page;
for (i = 0; i < nr_pages; i++) {
- data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+ data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
if (!data->data_pages[i])
goto fail_data_pages;
}
- data->data_order = 0;
data->nr_pages = nr_pages;
return data;
@@ -2345,6 +2389,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
kfree(data);
}
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return 0;
+}
+
#else
/*
@@ -2353,10 +2402,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
* Required for architectures that have d-cache aliasing issues.
*/
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return data->page_order;
+}
+
static struct page *
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
{
- if (pgoff > (1UL << data->data_order))
+ if (pgoff > (1UL << page_order(data)))
return NULL;
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
@@ -2376,7 +2430,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
int i, nr;
data = container_of(work, struct perf_mmap_data, work);
- nr = 1 << data->data_order;
+ nr = 1 << page_order(data);
base = data->user_page;
for (i = 0; i < nr + 1; i++)
@@ -2415,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
data->user_page = all_buf;
data->data_pages[0] = all_buf + PAGE_SIZE;
- data->data_order = ilog2(nr_pages);
+ data->page_order = ilog2(nr_pages);
data->nr_pages = 1;
return data;
@@ -2429,6 +2483,11 @@ fail:
#endif
+static unsigned long perf_data_size(struct perf_mmap_data *data)
+{
+ return data->nr_pages << (PAGE_SHIFT + page_order(data));
+}
+
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct perf_event *event = vma->vm_file->private_data;
@@ -2469,8 +2528,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
{
long max_size = perf_data_size(data);
- atomic_set(&data->lock, -1);
-
if (event->attr.watermark) {
data->watermark = min_t(long, max_size,
event->attr.wakeup_watermark);
@@ -2543,6 +2600,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
long user_extra, extra;
int ret = 0;
+ /*
+ * Don't allow mmap() of inherited per-task counters. This would
+ * create a performance issue due to all children writing to the
+ * same buffer.
+ */
+ if (event->cpu == -1 && event->attr.inherit)
+ return -EINVAL;
+
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
@@ -2642,6 +2707,7 @@ static int perf_fasync(int fd, struct file *filp, int on)
}
static const struct file_operations perf_fops = {
+ .llseek = no_llseek,
.release = perf_release,
.read = perf_read,
.poll = perf_poll,
@@ -2792,6 +2858,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
/*
+ * We assume there is only KVM supporting the callbacks.
+ * Later on, we might change it to a list if there is
+ * another virtualization implementation supporting the callbacks.
+ */
+struct perf_guest_info_callbacks *perf_guest_cbs;
+
+int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+ perf_guest_cbs = cbs;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
+
+int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+ perf_guest_cbs = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
+
+/*
* Output
*/
static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
@@ -2826,120 +2913,80 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
}
/*
- * Curious locking construct.
- *
* We need to ensure a later event_id doesn't publish a head when a former
- * event_id isn't done writing. However since we need to deal with NMIs we
+ * event isn't done writing. However since we need to deal with NMIs we
* cannot fully serialize things.
*
- * What we do is serialize between CPUs so we only have to deal with NMI
- * nesting on a single CPU.
- *
* We only publish the head (and generate a wakeup) when the outer-most
- * event_id completes.
+ * event completes.
*/
-static void perf_output_lock(struct perf_output_handle *handle)
+static void perf_output_get_handle(struct perf_output_handle *handle)
{
struct perf_mmap_data *data = handle->data;
- int cur, cpu = get_cpu();
- handle->locked = 0;
-
- for (;;) {
- cur = atomic_cmpxchg(&data->lock, -1, cpu);
- if (cur == -1) {
- handle->locked = 1;
- break;
- }
- if (cur == cpu)
- break;
-
- cpu_relax();
- }
+ preempt_disable();
+ local_inc(&data->nest);
+ handle->wakeup = local_read(&data->wakeup);
}
-static void perf_output_unlock(struct perf_output_handle *handle)
+static void perf_output_put_handle(struct perf_output_handle *handle)
{
struct perf_mmap_data *data = handle->data;
unsigned long head;
- int cpu;
-
- data->done_head = data->head;
-
- if (!handle->locked)
- goto out;
again:
- /*
- * The xchg implies a full barrier that ensures all writes are done
- * before we publish the new head, matched by a rmb() in userspace when
- * reading this position.
- */
- while ((head = atomic_long_xchg(&data->done_head, 0)))
- data->user_page->data_head = head;
+ head = local_read(&data->head);
/*
- * NMI can happen here, which means we can miss a done_head update.
+ * IRQ/NMI can happen here, which means we can miss a head update.
*/
- cpu = atomic_xchg(&data->lock, -1);
- WARN_ON_ONCE(cpu != smp_processor_id());
+ if (!local_dec_and_test(&data->nest))
+ goto out;
/*
- * Therefore we have to validate we did not indeed do so.
+ * Publish the known good head. Rely on the full barrier implied
+ * by atomic_dec_and_test() order the data->head read and this
+ * write.
*/
- if (unlikely(atomic_long_read(&data->done_head))) {
- /*
- * Since we had it locked, we can lock it again.
- */
- while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
- cpu_relax();
+ data->user_page->data_head = head;
+ /*
+ * Now check if we missed an update, rely on the (compiler)
+ * barrier in atomic_dec_and_test() to re-read data->head.
+ */
+ if (unlikely(head != local_read(&data->head))) {
+ local_inc(&data->nest);
goto again;
}
- if (atomic_xchg(&data->wakeup, 0))
+ if (handle->wakeup != local_read(&data->wakeup))
perf_output_wakeup(handle);
-out:
- put_cpu();
+
+ out:
+ preempt_enable();
}
-void perf_output_copy(struct perf_output_handle *handle,
+__always_inline void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
- unsigned int pages_mask;
- unsigned long offset;
- unsigned int size;
- void **pages;
-
- offset = handle->offset;
- pages_mask = handle->data->nr_pages - 1;
- pages = handle->data->data_pages;
-
do {
- unsigned long page_offset;
- unsigned long page_size;
- int nr;
+ unsigned long size = min_t(unsigned long, handle->size, len);
- nr = (offset >> PAGE_SHIFT) & pages_mask;
- page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
- page_offset = offset & (page_size - 1);
- size = min_t(unsigned int, page_size - page_offset, len);
+ memcpy(handle->addr, buf, size);
- memcpy(pages[nr] + page_offset, buf, size);
+ len -= size;
+ handle->addr += size;
+ handle->size -= size;
+ if (!handle->size) {
+ struct perf_mmap_data *data = handle->data;
- len -= size;
- buf += size;
- offset += size;
+ handle->page++;
+ handle->page &= data->nr_pages - 1;
+ handle->addr = data->data_pages[handle->page];
+ handle->size = PAGE_SIZE << page_order(data);
+ }
} while (len);
-
- handle->offset = offset;
-
- /*
- * Check we didn't copy past our reservation window, taking the
- * possible unsigned int wrap into account.
- */
- WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
}
int perf_output_begin(struct perf_output_handle *handle,
@@ -2977,13 +3024,13 @@ int perf_output_begin(struct perf_output_handle *handle,
handle->sample = sample;
if (!data->nr_pages)
- goto fail;
+ goto out;
- have_lost = atomic_read(&data->lost);
+ have_lost = local_read(&data->lost);
if (have_lost)
size += sizeof(lost_event);
- perf_output_lock(handle);
+ perf_output_get_handle(handle);
do {
/*
@@ -2993,24 +3040,28 @@ int perf_output_begin(struct perf_output_handle *handle,
*/
tail = ACCESS_ONCE(data->user_page->data_tail);
smp_rmb();
- offset = head = atomic_long_read(&data->head);
+ offset = head = local_read(&data->head);
head += size;
if (unlikely(!perf_output_space(data, tail, offset, head)))
goto fail;
- } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
+ } while (local_cmpxchg(&data->head, offset, head) != offset);
- handle->offset = offset;
- handle->head = head;
+ if (head - local_read(&data->wakeup) > data->watermark)
+ local_add(data->watermark, &data->wakeup);
- if (head - tail > data->watermark)
- atomic_set(&data->wakeup, 1);
+ handle->page = offset >> (PAGE_SHIFT + page_order(data));
+ handle->page &= data->nr_pages - 1;
+ handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
+ handle->addr = data->data_pages[handle->page];
+ handle->addr += handle->size;
+ handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.header.size = sizeof(lost_event);
lost_event.id = event->id;
- lost_event.lost = atomic_xchg(&data->lost, 0);
+ lost_event.lost = local_xchg(&data->lost, 0);
perf_output_put(handle, lost_event);
}
@@ -3018,8 +3069,8 @@ int perf_output_begin(struct perf_output_handle *handle,
return 0;
fail:
- atomic_inc(&data->lost);
- perf_output_unlock(handle);
+ local_inc(&data->lost);
+ perf_output_put_handle(handle);
out:
rcu_read_unlock();
@@ -3034,14 +3085,14 @@ void perf_output_end(struct perf_output_handle *handle)
int wakeup_events = event->attr.wakeup_events;
if (handle->sample && wakeup_events) {
- int events = atomic_inc_return(&data->events);
+ int events = local_inc_return(&data->events);
if (events >= wakeup_events) {
- atomic_sub(wakeup_events, &data->events);
- atomic_set(&data->wakeup, 1);
+ local_sub(wakeup_events, &data->events);
+ local_inc(&data->wakeup);
}
}
- perf_output_unlock(handle);
+ perf_output_put_handle(handle);
rcu_read_unlock();
}
@@ -3377,22 +3428,13 @@ static void perf_event_task_output(struct perf_event *event,
{
struct perf_output_handle handle;
struct task_struct *task = task_event->task;
- unsigned long flags;
int size, ret;
- /*
- * If this CPU attempts to acquire an rq lock held by a CPU spinning
- * in perf_output_lock() from interrupt context, it's game over.
- */
- local_irq_save(flags);
-
size = task_event->event_id.header.size;
ret = perf_output_begin(&handle, event, size, 0, 0);
- if (ret) {
- local_irq_restore(flags);
+ if (ret)
return;
- }
task_event->event_id.pid = perf_event_pid(event, task);
task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3403,7 +3445,6 @@ static void perf_event_task_output(struct perf_event *event,
perf_output_put(&handle, task_event->event_id);
perf_output_end(&handle);
- local_irq_restore(flags);
}
static int perf_event_task_match(struct perf_event *event)
@@ -3743,7 +3784,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
.event_id = {
.header = {
.type = PERF_RECORD_MMAP,
- .misc = 0,
+ .misc = PERF_RECORD_MISC_USER,
/* .size */
},
/* .pid */
@@ -3961,39 +4002,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
perf_swevent_overflow(event, 0, nmi, data, regs);
}
-static int perf_swevent_is_counting(struct perf_event *event)
-{
- /*
- * The event is active, we're good!
- */
- if (event->state == PERF_EVENT_STATE_ACTIVE)
- return 1;
-
- /*
- * The event is off/error, not counting.
- */
- if (event->state != PERF_EVENT_STATE_INACTIVE)
- return 0;
-
- /*
- * The event is inactive, if the context is active
- * we're part of a group that didn't make it on the 'pmu',
- * not counting.
- */
- if (event->ctx->is_active)
- return 0;
-
- /*
- * We're inactive and the context is too, this means the
- * task is scheduled out, we're counting events that happen
- * to us, like migration events.
- */
- return 1;
-}
-
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data);
-
static int perf_exclude_event(struct perf_event *event,
struct pt_regs *regs)
{
@@ -4014,12 +4022,6 @@ static int perf_swevent_match(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
- if (event->cpu != -1 && event->cpu != smp_processor_id())
- return 0;
-
- if (!perf_swevent_is_counting(event))
- return 0;
-
if (event->attr.type != type)
return 0;
@@ -4029,30 +4031,88 @@ static int perf_swevent_match(struct perf_event *event,
if (perf_exclude_event(event, regs))
return 0;
- if (event->attr.type == PERF_TYPE_TRACEPOINT &&
- !perf_tp_event_match(event, data))
- return 0;
-
return 1;
}
-static void perf_swevent_ctx_event(struct perf_event_context *ctx,
- enum perf_type_id type,
- u32 event_id, u64 nr, int nmi,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static inline u64 swevent_hash(u64 type, u32 event_id)
{
+ u64 val = event_id | (type << 32);
+
+ return hash_64(val, SWEVENT_HLIST_BITS);
+}
+
+static inline struct hlist_head *
+__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
+{
+ u64 hash = swevent_hash(type, event_id);
+
+ return &hlist->heads[hash];
+}
+
+/* For the read side: events when they trigger */
+static inline struct hlist_head *
+find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
+{
+ struct swevent_hlist *hlist;
+
+ hlist = rcu_dereference(ctx->swevent_hlist);
+ if (!hlist)
+ return NULL;
+
+ return __find_swevent_head(hlist, type, event_id);
+}
+
+/* For the event head insertion and removal in the hlist */
+static inline struct hlist_head *
+find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
+{
+ struct swevent_hlist *hlist;
+ u32 event_id = event->attr.config;
+ u64 type = event->attr.type;
+
+ /*
+ * Event scheduling is always serialized against hlist allocation
+ * and release. Which makes the protected version suitable here.
+ * The context lock guarantees that.
+ */
+ hlist = rcu_dereference_protected(ctx->swevent_hlist,
+ lockdep_is_held(&event->ctx->lock));
+ if (!hlist)
+ return NULL;
+
+ return __find_swevent_head(hlist, type, event_id);
+}
+
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+ u64 nr, int nmi,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct perf_cpu_context *cpuctx;
struct perf_event *event;
+ struct hlist_node *node;
+ struct hlist_head *head;
- list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ cpuctx = &__get_cpu_var(perf_cpu_context);
+
+ rcu_read_lock();
+
+ head = find_swevent_head_rcu(cpuctx, type, event_id);
+
+ if (!head)
+ goto end;
+
+ hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
if (perf_swevent_match(event, type, event_id, data, regs))
perf_swevent_add(event, nr, nmi, data, regs);
}
+end:
+ rcu_read_unlock();
}
int perf_swevent_get_recursion_context(void)
{
- struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
int rctx;
if (in_nmi())
@@ -4064,10 +4124,8 @@ int perf_swevent_get_recursion_context(void)
else
rctx = 0;
- if (cpuctx->recursion[rctx]) {
- put_cpu_var(perf_cpu_context);
+ if (cpuctx->recursion[rctx])
return -1;
- }
cpuctx->recursion[rctx]++;
barrier();
@@ -4081,31 +4139,9 @@ void perf_swevent_put_recursion_context(int rctx)
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
cpuctx->recursion[rctx]--;
- put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
- u64 nr, int nmi,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- struct perf_cpu_context *cpuctx;
- struct perf_event_context *ctx;
-
- cpuctx = &__get_cpu_var(perf_cpu_context);
- rcu_read_lock();
- perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
- nr, nmi, data, regs);
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
- ctx = rcu_dereference(current->perf_event_ctxp);
- if (ctx)
- perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
- rcu_read_unlock();
-}
void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr)
@@ -4113,6 +4149,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct perf_sample_data data;
int rctx;
+ preempt_disable_notrace();
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
return;
@@ -4122,6 +4159,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
perf_swevent_put_recursion_context(rctx);
+ preempt_enable_notrace();
}
static void perf_swevent_read(struct perf_event *event)
@@ -4131,16 +4169,28 @@ static void perf_swevent_read(struct perf_event *event)
static int perf_swevent_enable(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+ struct perf_cpu_context *cpuctx;
+ struct hlist_head *head;
+
+ cpuctx = &__get_cpu_var(perf_cpu_context);
if (hwc->sample_period) {
hwc->last_period = hwc->sample_period;
perf_swevent_set_period(event);
}
+
+ head = find_swevent_head(cpuctx, event);
+ if (WARN_ON_ONCE(!head))
+ return -EINVAL;
+
+ hlist_add_head_rcu(&event->hlist_entry, head);
+
return 0;
}
static void perf_swevent_disable(struct perf_event *event)
{
+ hlist_del_rcu(&event->hlist_entry);
}
static const struct pmu perf_ops_generic = {
@@ -4168,15 +4218,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
perf_sample_data_init(&data, 0);
data.period = event->hw.last_period;
regs = get_irq_regs();
- /*
- * In case we exclude kernel IPs or are somehow not in interrupt
- * context, provide the next best thing, the user IP.
- */
- if ((event->attr.exclude_kernel || !regs) &&
- !event->attr.exclude_user)
- regs = task_pt_regs(current);
- if (regs) {
+ if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && current->pid == 0))
if (perf_event_overflow(event, 0, &data, regs))
ret = HRTIMER_NORESTART;
@@ -4324,27 +4367,122 @@ static const struct pmu perf_ops_task_clock = {
.read = task_clock_perf_event_read,
};
-#ifdef CONFIG_EVENT_TRACING
+/* Deref the hlist from the update side */
+static inline struct swevent_hlist *
+swevent_hlist_deref(struct perf_cpu_context *cpuctx)
+{
+ return rcu_dereference_protected(cpuctx->swevent_hlist,
+ lockdep_is_held(&cpuctx->hlist_mutex));
+}
-void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
- int entry_size, struct pt_regs *regs)
+static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
{
- struct perf_sample_data data;
- struct perf_raw_record raw = {
- .size = entry_size,
- .data = record,
- };
+ struct swevent_hlist *hlist;
- perf_sample_data_init(&data, addr);
- data.raw = &raw;
+ hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
+ kfree(hlist);
+}
- /* Trace events already protected against recursion */
- do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
- &data, regs);
+static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
+{
+ struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx);
+
+ if (!hlist)
+ return;
+
+ rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
+ call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
}
-EXPORT_SYMBOL_GPL(perf_tp_event);
-static int perf_tp_event_match(struct perf_event *event,
+static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+
+ mutex_lock(&cpuctx->hlist_mutex);
+
+ if (!--cpuctx->hlist_refcount)
+ swevent_hlist_release(cpuctx);
+
+ mutex_unlock(&cpuctx->hlist_mutex);
+}
+
+static void swevent_hlist_put(struct perf_event *event)
+{
+ int cpu;
+
+ if (event->cpu != -1) {
+ swevent_hlist_put_cpu(event, event->cpu);
+ return;
+ }
+
+ for_each_possible_cpu(cpu)
+ swevent_hlist_put_cpu(event, cpu);
+}
+
+static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ int err = 0;
+
+ mutex_lock(&cpuctx->hlist_mutex);
+
+ if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) {
+ struct swevent_hlist *hlist;
+
+ hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
+ if (!hlist) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+ }
+ cpuctx->hlist_refcount++;
+ exit:
+ mutex_unlock(&cpuctx->hlist_mutex);
+
+ return err;
+}
+
+static int swevent_hlist_get(struct perf_event *event)
+{
+ int err;
+ int cpu, failed_cpu;
+
+ if (event->cpu != -1)
+ return swevent_hlist_get_cpu(event, event->cpu);
+
+ get_online_cpus();
+ for_each_possible_cpu(cpu) {
+ err = swevent_hlist_get_cpu(event, cpu);
+ if (err) {
+ failed_cpu = cpu;
+ goto fail;
+ }
+ }
+ put_online_cpus();
+
+ return 0;
+ fail:
+ for_each_possible_cpu(cpu) {
+ if (cpu == failed_cpu)
+ break;
+ swevent_hlist_put_cpu(event, cpu);
+ }
+
+ put_online_cpus();
+ return err;
+}
+
+#ifdef CONFIG_EVENT_TRACING
+
+static const struct pmu perf_ops_tracepoint = {
+ .enable = perf_trace_enable,
+ .disable = perf_trace_disable,
+ .read = perf_swevent_read,
+ .unthrottle = perf_swevent_unthrottle,
+};
+
+static int perf_tp_filter_match(struct perf_event *event,
struct perf_sample_data *data)
{
void *record = data->raw->data;
@@ -4354,13 +4492,55 @@ static int perf_tp_event_match(struct perf_event *event,
return 0;
}
+static int perf_tp_event_match(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ /*
+ * All tracepoints are from kernel-space.
+ */
+ if (event->attr.exclude_kernel)
+ return 0;
+
+ if (!perf_tp_filter_match(event, data))
+ return 0;
+
+ return 1;
+}
+
+void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
+ struct pt_regs *regs, struct hlist_head *head)
+{
+ struct perf_sample_data data;
+ struct perf_event *event;
+ struct hlist_node *node;
+
+ struct perf_raw_record raw = {
+ .size = entry_size,
+ .data = record,
+ };
+
+ perf_sample_data_init(&data, addr);
+ data.raw = &raw;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
+ if (perf_tp_event_match(event, &data, regs))
+ perf_swevent_add(event, count, 1, &data, regs);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(perf_tp_event);
+
static void tp_perf_event_destroy(struct perf_event *event)
{
- perf_trace_disable(event->attr.config);
+ perf_trace_destroy(event);
}
static const struct pmu *tp_perf_event_init(struct perf_event *event)
{
+ int err;
+
/*
* Raw tracepoint data is a severe data leak, only allow root to
* have these.
@@ -4370,12 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- if (perf_trace_enable(event->attr.config))
+ err = perf_trace_init(event);
+ if (err)
return NULL;
event->destroy = tp_perf_event_destroy;
- return &perf_ops_generic;
+ return &perf_ops_tracepoint;
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4403,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event)
#else
-static int perf_tp_event_match(struct perf_event *event,
- struct perf_sample_data *data)
-{
- return 1;
-}
-
static const struct pmu *tp_perf_event_init(struct perf_event *event)
{
return NULL;
@@ -4474,6 +4649,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
WARN_ON(event->parent);
atomic_dec(&perf_swevent_enabled[event_id]);
+ swevent_hlist_put(event);
}
static const struct pmu *sw_perf_event_init(struct perf_event *event)
@@ -4512,6 +4688,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) {
+ int err;
+
+ err = swevent_hlist_get(event);
+ if (err)
+ return ERR_PTR(err);
+
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
}
@@ -4738,6 +4920,13 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
int fput_needed = 0;
int ret = -EINVAL;
+ /*
+ * Don't allow output of inherited per-task events. This would
+ * create performance issues due to cross cpu access.
+ */
+ if (event->cpu == -1 && event->attr.inherit)
+ return -EINVAL;
+
if (!output_fd)
goto set;
@@ -4758,6 +4947,18 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
if (event->data)
goto out;
+ /*
+ * Don't allow cross-cpu buffers
+ */
+ if (output_event->cpu != event->cpu)
+ goto out;
+
+ /*
+ * If its not a per-cpu buffer, it must be the same task.
+ */
+ if (output_event->cpu == -1 && output_event->ctx != event->ctx)
+ goto out;
+
atomic_long_inc(&output_file->f_count);
set:
@@ -4798,8 +4999,8 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_context *ctx;
struct file *event_file = NULL;
struct file *group_file = NULL;
+ int event_fd;
int fput_needed = 0;
- int fput_needed2 = 0;
int err;
/* for future expandability... */
@@ -4820,12 +5021,18 @@ SYSCALL_DEFINE5(perf_event_open,
return -EINVAL;
}
+ event_fd = get_unused_fd_flags(O_RDWR);
+ if (event_fd < 0)
+ return event_fd;
+
/*
* Get the target context (task or percpu):
*/
ctx = find_get_context(pid, cpu);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto err_fd;
+ }
/*
* Look up the group leader (we will attach this event to it):
@@ -4865,13 +5072,11 @@ SYSCALL_DEFINE5(perf_event_open,
if (IS_ERR(event))
goto err_put_context;
- err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
- if (err < 0)
- goto err_free_put_context;
-
- event_file = fget_light(err, &fput_needed2);
- if (!event_file)
+ event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
+ if (IS_ERR(event_file)) {
+ err = PTR_ERR(event_file);
goto err_free_put_context;
+ }
if (flags & PERF_FLAG_FD_OUTPUT) {
err = perf_event_set_output(event, group_fd);
@@ -4892,19 +5097,19 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
-err_fput_free_put_context:
- fput_light(event_file, fput_needed2);
+ fput_light(group_file, fput_needed);
+ fd_install(event_fd, event_file);
+ return event_fd;
+err_fput_free_put_context:
+ fput(event_file);
err_free_put_context:
- if (err < 0)
- free_event(event);
-
+ free_event(event);
err_put_context:
- if (err < 0)
- put_ctx(ctx);
-
fput_light(group_file, fput_needed);
-
+ put_ctx(ctx);
+err_fd:
+ put_unused_fd(event_fd);
return err;
}
@@ -5176,7 +5381,7 @@ void perf_event_exit_task(struct task_struct *child)
*
* But since its the parent context it won't be the same instance.
*/
- mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&child_ctx->mutex);
again:
list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
@@ -5384,6 +5589,7 @@ static void __init perf_event_init_all_cpus(void)
for_each_possible_cpu(cpu) {
cpuctx = &per_cpu(perf_cpu_context, cpu);
+ mutex_init(&cpuctx->hlist_mutex);
__perf_event_init_context(&cpuctx->ctx, NULL);
}
}
@@ -5397,6 +5603,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)
spin_lock(&perf_resource_lock);
cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
spin_unlock(&perf_resource_lock);
+
+ mutex_lock(&cpuctx->hlist_mutex);
+ if (cpuctx->hlist_refcount > 0) {
+ struct swevent_hlist *hlist;
+
+ hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
+ WARN_ON_ONCE(!hlist);
+ rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+ }
+ mutex_unlock(&cpuctx->hlist_mutex);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -5416,6 +5632,10 @@ static void perf_event_exit_cpu(int cpu)
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_event_context *ctx = &cpuctx->ctx;
+ mutex_lock(&cpuctx->hlist_mutex);
+ swevent_hlist_release(cpuctx);
+ mutex_unlock(&cpuctx->hlist_mutex);
+
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
mutex_unlock(&ctx->mutex);
diff --git a/kernel/pid.c b/kernel/pid.c
index aebb30d9c23..e9fd8c132d2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -513,6 +513,13 @@ void __init pidhash_init(void)
void __init pidmap_init(void)
{
+ /* bump default and minimum pid_max based on number of cpus */
+ pid_max = min(pid_max_max, max_t(int, pid_max,
+ PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
+ pid_max_min = max_t(int, pid_max_min,
+ PIDS_PER_CPU_MIN * num_possible_cpus());
+ pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+
init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 3db49b9ca37..f42d3f737a3 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -2,7 +2,7 @@
* This module exposes the interface to kernel space for specifying
* QoS dependencies. It provides infrastructure for registration of:
*
- * Dependents on a QoS value : register requirements
+ * Dependents on a QoS value : register requests
* Watchers of QoS value : get notified when target QoS value changes
*
* This QoS design is best effort based. Dependents register their QoS needs.
@@ -14,19 +14,21 @@
* timeout: usec <-- currently not used.
* throughput: kbs (kilo byte / sec)
*
- * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ * There are lists of pm_qos_objects each one wrapping requests, notifiers
*
- * User mode requirements on a QOS parameter register themselves to the
+ * User mode requests on a QOS parameter register themselves to the
* subsystem by opening the device node /dev/... and writing there request to
* the node. As long as the process holds a file handle open to the node the
* client continues to be accounted for. Upon file release the usermode
- * requirement is removed and a new qos target is computed. This way when the
- * requirement that the application has is cleaned up when closes the file
+ * request is removed and a new qos target is computed. This way when the
+ * request that the application has is cleaned up when closes the file
* pointer or exits the pm_qos_object will get an opportunity to clean up.
*
* Mark Gross <mgross@linux.intel.com>
*/
+/*#define DEBUG*/
+
#include <linux/pm_qos_params.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
@@ -42,25 +44,25 @@
#include <linux/uaccess.h>
/*
- * locking rule: all changes to requirements or notifiers lists
+ * locking rule: all changes to requests or notifiers lists
* or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
* held, taken with _irqsave. One lock to rule them all
*/
-struct requirement_list {
+struct pm_qos_request_list {
struct list_head list;
union {
s32 value;
s32 usec;
s32 kbps;
};
- char *name;
+ int pm_qos_class;
};
static s32 max_compare(s32 v1, s32 v2);
static s32 min_compare(s32 v1, s32 v2);
struct pm_qos_object {
- struct requirement_list requirements;
+ struct pm_qos_request_list requests;
struct blocking_notifier_head *notifiers;
struct miscdevice pm_qos_power_miscdev;
char *name;
@@ -72,7 +74,7 @@ struct pm_qos_object {
static struct pm_qos_object null_pm_qos;
static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
static struct pm_qos_object cpu_dma_pm_qos = {
- .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)},
+ .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
.notifiers = &cpu_dma_lat_notifier,
.name = "cpu_dma_latency",
.default_value = 2000 * USEC_PER_SEC,
@@ -82,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
static struct pm_qos_object network_lat_pm_qos = {
- .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)},
+ .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
.notifiers = &network_lat_notifier,
.name = "network_latency",
.default_value = 2000 * USEC_PER_SEC,
@@ -93,8 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = {
static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
static struct pm_qos_object network_throughput_pm_qos = {
- .requirements =
- {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
+ .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
.notifiers = &network_throughput_notifier,
.name = "network_throughput",
.default_value = 0,
@@ -135,31 +136,34 @@ static s32 min_compare(s32 v1, s32 v2)
}
-static void update_target(int target)
+static void update_target(int pm_qos_class)
{
s32 extreme_value;
- struct requirement_list *node;
+ struct pm_qos_request_list *node;
unsigned long flags;
int call_notifier = 0;
spin_lock_irqsave(&pm_qos_lock, flags);
- extreme_value = pm_qos_array[target]->default_value;
+ extreme_value = pm_qos_array[pm_qos_class]->default_value;
list_for_each_entry(node,
- &pm_qos_array[target]->requirements.list, list) {
- extreme_value = pm_qos_array[target]->comparitor(
+ &pm_qos_array[pm_qos_class]->requests.list, list) {
+ extreme_value = pm_qos_array[pm_qos_class]->comparitor(
extreme_value, node->value);
}
- if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
+ if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
+ extreme_value) {
call_notifier = 1;
- atomic_set(&pm_qos_array[target]->target_value, extreme_value);
- pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
- atomic_read(&pm_qos_array[target]->target_value));
+ atomic_set(&pm_qos_array[pm_qos_class]->target_value,
+ extreme_value);
+ pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
+ atomic_read(&pm_qos_array[pm_qos_class]->target_value));
}
spin_unlock_irqrestore(&pm_qos_lock, flags);
if (call_notifier)
- blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
- (unsigned long) extreme_value, NULL);
+ blocking_notifier_call_chain(
+ pm_qos_array[pm_qos_class]->notifiers,
+ (unsigned long) extreme_value, NULL);
}
static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -185,125 +189,112 @@ static int find_pm_qos_object_by_minor(int minor)
}
/**
- * pm_qos_requirement - returns current system wide qos expectation
+ * pm_qos_request - returns current system wide qos expectation
* @pm_qos_class: identification of which qos value is requested
*
* This function returns the current target value in an atomic manner.
*/
-int pm_qos_requirement(int pm_qos_class)
+int pm_qos_request(int pm_qos_class)
{
return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
}
-EXPORT_SYMBOL_GPL(pm_qos_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_request);
/**
- * pm_qos_add_requirement - inserts new qos request into the list
+ * pm_qos_add_request - inserts new qos request into the list
* @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
* @value: defines the qos request
*
* This function inserts a new entry in the pm_qos_class list of requested qos
* performance characteristics. It recomputes the aggregate QoS expectations
- * for the pm_qos_class of parameters.
+ * for the pm_qos_class of parameters, and returns the pm_qos_request list
+ * element as a handle for use in updating and removal. Call needs to save
+ * this handle for later use.
*/
-int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
+struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
{
- struct requirement_list *dep;
+ struct pm_qos_request_list *dep;
unsigned long flags;
- dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+ dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
if (dep) {
if (value == PM_QOS_DEFAULT_VALUE)
dep->value = pm_qos_array[pm_qos_class]->default_value;
else
dep->value = value;
- dep->name = kstrdup(name, GFP_KERNEL);
- if (!dep->name)
- goto cleanup;
+ dep->pm_qos_class = pm_qos_class;
spin_lock_irqsave(&pm_qos_lock, flags);
list_add(&dep->list,
- &pm_qos_array[pm_qos_class]->requirements.list);
+ &pm_qos_array[pm_qos_class]->requests.list);
spin_unlock_irqrestore(&pm_qos_lock, flags);
update_target(pm_qos_class);
-
- return 0;
}
-cleanup:
- kfree(dep);
- return -ENOMEM;
+ return dep;
}
-EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_add_request);
/**
- * pm_qos_update_requirement - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
+ * pm_qos_update_request - modifies an existing qos request
+ * @pm_qos_req : handle to list element holding a pm_qos request to use
* @value: defines the qos request
*
- * Updates an existing qos requirement for the pm_qos_class of parameters along
+ * Updates an existing qos request for the pm_qos_class of parameters along
* with updating the target pm_qos_class value.
*
- * If the named request isn't in the list then no change is made.
+ * Attempts are made to make this code callable on hot code paths.
*/
-int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
+void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
+ s32 new_value)
{
unsigned long flags;
- struct requirement_list *node;
int pending_update = 0;
+ s32 temp;
- spin_lock_irqsave(&pm_qos_lock, flags);
- list_for_each_entry(node,
- &pm_qos_array[pm_qos_class]->requirements.list, list) {
- if (strcmp(node->name, name) == 0) {
- if (new_value == PM_QOS_DEFAULT_VALUE)
- node->value =
- pm_qos_array[pm_qos_class]->default_value;
- else
- node->value = new_value;
+ if (pm_qos_req) { /*guard against callers passing in null */
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ if (new_value == PM_QOS_DEFAULT_VALUE)
+ temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
+ else
+ temp = new_value;
+
+ if (temp != pm_qos_req->value) {
pending_update = 1;
- break;
+ pm_qos_req->value = temp;
}
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+ if (pending_update)
+ update_target(pm_qos_req->pm_qos_class);
}
- spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (pending_update)
- update_target(pm_qos_class);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_update_request);
/**
- * pm_qos_remove_requirement - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
+ * pm_qos_remove_request - modifies an existing qos request
+ * @pm_qos_req: handle to request list element
*
- * Will remove named qos request from pm_qos_class list of parameters and
- * recompute the current target value for the pm_qos_class.
+ * Will remove pm qos request from the list of requests and
+ * recompute the current target value for the pm_qos_class. Call this
+ * on slow code paths.
*/
-void pm_qos_remove_requirement(int pm_qos_class, char *name)
+void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
{
unsigned long flags;
- struct requirement_list *node;
- int pending_update = 0;
+ int qos_class;
+ if (pm_qos_req == NULL)
+ return;
+ /* silent return to keep pcm code cleaner */
+
+ qos_class = pm_qos_req->pm_qos_class;
spin_lock_irqsave(&pm_qos_lock, flags);
- list_for_each_entry(node,
- &pm_qos_array[pm_qos_class]->requirements.list, list) {
- if (strcmp(node->name, name) == 0) {
- kfree(node->name);
- list_del(&node->list);
- kfree(node);
- pending_update = 1;
- break;
- }
- }
+ list_del(&pm_qos_req->list);
+ kfree(pm_qos_req);
spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (pending_update)
- update_target(pm_qos_class);
+ update_target(qos_class);
}
-EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_remove_request);
/**
* pm_qos_add_notifier - sets notification entry for changes to target value
@@ -313,7 +304,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
* will register the notifier into a notification chain that gets called
* upon changes to the pm_qos_class target value.
*/
- int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
{
int retval;
@@ -343,21 +334,16 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
}
EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
-#define PID_NAME_LEN 32
-
static int pm_qos_power_open(struct inode *inode, struct file *filp)
{
- int ret;
long pm_qos_class;
- char name[PID_NAME_LEN];
pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
if (pm_qos_class >= 0) {
- filp->private_data = (void *)pm_qos_class;
- snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
- ret = pm_qos_add_requirement(pm_qos_class, name,
- PM_QOS_DEFAULT_VALUE);
- if (ret >= 0)
+ filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
+ PM_QOS_DEFAULT_VALUE);
+
+ if (filp->private_data)
return 0;
}
return -EPERM;
@@ -365,32 +351,40 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
static int pm_qos_power_release(struct inode *inode, struct file *filp)
{
- int pm_qos_class;
- char name[PID_NAME_LEN];
+ struct pm_qos_request_list *req;
- pm_qos_class = (long)filp->private_data;
- snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
- pm_qos_remove_requirement(pm_qos_class, name);
+ req = (struct pm_qos_request_list *)filp->private_data;
+ pm_qos_remove_request(req);
return 0;
}
+
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
s32 value;
- int pm_qos_class;
- char name[PID_NAME_LEN];
-
- pm_qos_class = (long)filp->private_data;
- if (count != sizeof(s32))
+ int x;
+ char ascii_value[11];
+ struct pm_qos_request_list *pm_qos_req;
+
+ if (count == sizeof(s32)) {
+ if (copy_from_user(&value, buf, sizeof(s32)))
+ return -EFAULT;
+ } else if (count == 11) { /* len('0x12345678/0') */
+ if (copy_from_user(ascii_value, buf, 11))
+ return -EFAULT;
+ x = sscanf(ascii_value, "%x", &value);
+ if (x != 1)
+ return -EINVAL;
+ pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value);
+ } else
return -EINVAL;
- if (copy_from_user(&value, buf, sizeof(s32)))
- return -EFAULT;
- snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
- pm_qos_update_requirement(pm_qos_class, name, value);
- return sizeof(s32);
+ pm_qos_req = (struct pm_qos_request_list *)filp->private_data;
+ pm_qos_update_request(pm_qos_req, value);
+
+ return count;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bc7704b3a44..9829646d399 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -11,19 +11,18 @@
#include <trace/events/timer.h>
/*
- * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
*/
void update_rlimit_cpu(unsigned long rlim_new)
{
cputime_t cputime = secs_to_cputime(rlim_new);
- struct signal_struct *const sig = current->signal;
- if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
- cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
- spin_lock_irq(&current->sighand->siglock);
- set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
- spin_unlock_irq(&current->sighand->siglock);
- }
+ spin_lock_irq(&current->sighand->siglock);
+ set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+ spin_unlock_irq(&current->sighand->siglock);
}
static int check_clock(const clockid_t which_clock)
@@ -364,7 +363,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
}
} else {
read_lock(&tasklist_lock);
- if (thread_group_leader(p) && p->signal) {
+ if (thread_group_leader(p) && p->sighand) {
error =
cpu_clock_sample_group(which_clock,
p, &rtn);
@@ -440,7 +439,7 @@ int posix_cpu_timer_del(struct k_itimer *timer)
if (likely(p != NULL)) {
read_lock(&tasklist_lock);
- if (unlikely(p->signal == NULL)) {
+ if (unlikely(p->sighand == NULL)) {
/*
* We raced with the reaping of the task.
* The deletion should have cleared us off the list.
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
cputime_gt(expires, new_exp);
}
-static inline int expires_le(cputime_t expires, cputime_t new_exp)
-{
- return !cputime_eq(expires, cputime_zero) &&
- cputime_le(expires, new_exp);
-}
/*
* Insert the timer on the appropriate list before any timers that
* expire later. This must be called with the tasklist_lock held
- * for reading, and interrupts disabled.
+ * for reading, interrupts disabled and p->sighand->siglock taken.
*/
-static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
+static void arm_timer(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos;
+ struct task_cputime *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next;
- unsigned long i;
- head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
- p->cpu_timers : p->signal->cpu_timers);
+ if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+ head = p->cpu_timers;
+ cputime_expires = &p->cputime_expires;
+ } else {
+ head = p->signal->cpu_timers;
+ cputime_expires = &p->signal->cputime_expires;
+ }
head += CPUCLOCK_WHICH(timer->it_clock);
- BUG_ON(!irqs_disabled());
- spin_lock(&p->sighand->siglock);
-
listpos = head;
- if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
- list_for_each_entry(next, head, entry) {
- if (next->expires.sched > nt->expires.sched)
- break;
- listpos = &next->entry;
- }
- } else {
- list_for_each_entry(next, head, entry) {
- if (cputime_gt(next->expires.cpu, nt->expires.cpu))
- break;
- listpos = &next->entry;
- }
+ list_for_each_entry(next, head, entry) {
+ if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+ break;
+ listpos = &next->entry;
}
list_add(&nt->entry, listpos);
if (listpos == head) {
+ union cpu_time_count *exp = &nt->expires;
+
/*
- * We are the new earliest-expiring timer.
- * If we are a thread timer, there can always
- * be a process timer telling us to stop earlier.
+ * We are the new earliest-expiring POSIX 1.b timer, hence
+ * need to update expiration cache. Take into account that
+ * for process timers we share expiration cache with itimers
+ * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
*/
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- union cpu_time_count *exp = &nt->expires;
-
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- default:
- BUG();
- case CPUCLOCK_PROF:
- if (expires_gt(p->cputime_expires.prof_exp,
- exp->cpu))
- p->cputime_expires.prof_exp = exp->cpu;
- break;
- case CPUCLOCK_VIRT:
- if (expires_gt(p->cputime_expires.virt_exp,
- exp->cpu))
- p->cputime_expires.virt_exp = exp->cpu;
- break;
- case CPUCLOCK_SCHED:
- if (p->cputime_expires.sched_exp == 0 ||
- p->cputime_expires.sched_exp > exp->sched)
- p->cputime_expires.sched_exp =
- exp->sched;
- break;
- }
- } else {
- struct signal_struct *const sig = p->signal;
- union cpu_time_count *exp = &timer->it.cpu.expires;
-
- /*
- * For a process timer, set the cached expiration time.
- */
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- default:
- BUG();
- case CPUCLOCK_VIRT:
- if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
- exp->cpu))
- break;
- sig->cputime_expires.virt_exp = exp->cpu;
- break;
- case CPUCLOCK_PROF:
- if (expires_le(sig->it[CPUCLOCK_PROF].expires,
- exp->cpu))
- break;
- i = sig->rlim[RLIMIT_CPU].rlim_cur;
- if (i != RLIM_INFINITY &&
- i <= cputime_to_secs(exp->cpu))
- break;
- sig->cputime_expires.prof_exp = exp->cpu;
- break;
- case CPUCLOCK_SCHED:
- sig->cputime_expires.sched_exp = exp->sched;
- break;
- }
+ switch (CPUCLOCK_WHICH(timer->it_clock)) {
+ case CPUCLOCK_PROF:
+ if (expires_gt(cputime_expires->prof_exp, exp->cpu))
+ cputime_expires->prof_exp = exp->cpu;
+ break;
+ case CPUCLOCK_VIRT:
+ if (expires_gt(cputime_expires->virt_exp, exp->cpu))
+ cputime_expires->virt_exp = exp->cpu;
+ break;
+ case CPUCLOCK_SCHED:
+ if (cputime_expires->sched_exp == 0 ||
+ cputime_expires->sched_exp > exp->sched)
+ cputime_expires->sched_exp = exp->sched;
+ break;
}
}
-
- spin_unlock(&p->sighand->siglock);
}
/*
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
*/
static void cpu_timer_fire(struct k_itimer *timer)
{
- if (unlikely(timer->sigq == NULL)) {
+ if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+ /*
+ * User don't want any signal.
+ */
+ timer->it.cpu.expires.sched = 0;
+ } else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
- union cpu_time_count old_expires, new_expires, val;
+ union cpu_time_count old_expires, new_expires, old_incr, val;
int ret;
if (unlikely(p == NULL)) {
@@ -736,10 +691,10 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
read_lock(&tasklist_lock);
/*
* We need the tasklist_lock to protect against reaping that
- * clears p->signal. If p has just been reaped, we can no
+ * clears p->sighand. If p has just been reaped, we can no
* longer get any information about it at all.
*/
- if (unlikely(p->signal == NULL)) {
+ if (unlikely(p->sighand == NULL)) {
read_unlock(&tasklist_lock);
put_task_struct(p);
timer->it.cpu.task = NULL;
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
BUG_ON(!irqs_disabled());
ret = 0;
+ old_incr = timer->it.cpu.incr;
spin_lock(&p->sighand->siglock);
old_expires = timer->it.cpu.expires;
if (unlikely(timer->it.cpu.firing)) {
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
ret = TIMER_RETRY;
} else
list_del_init(&timer->it.cpu.entry);
- spin_unlock(&p->sighand->siglock);
/*
* We need to sample the current value to convert the new
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* disable this firing since we are already reporting
* it as an overrun (thanks to bump_cpu_timer above).
*/
+ spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock);
goto out;
}
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
*/
timer->it.cpu.expires = new_expires;
if (new_expires.sched != 0 &&
- (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
cpu_time_before(timer->it_clock, val, new_expires)) {
- arm_timer(timer, val);
+ arm_timer(timer);
}
+ spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock);
/*
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
timer->it_overrun = -1;
if (new_expires.sched != 0 &&
- (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
!cpu_time_before(timer->it_clock, val, new_expires)) {
/*
* The designated time already passed, so we notify
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
out:
if (old) {
sample_to_timespec(timer->it_clock,
- timer->it.cpu.incr, &old->it_interval);
+ old_incr, &old->it_interval);
}
return ret;
}
@@ -908,7 +863,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
clear_dead = p->exit_state;
} else {
read_lock(&tasklist_lock);
- if (unlikely(p->signal == NULL)) {
+ if (unlikely(p->sighand == NULL)) {
/*
* The process has been reaped.
* We can't even collect a sample any more.
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
read_unlock(&tasklist_lock);
}
- if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- if (timer->it.cpu.incr.sched == 0 &&
- cpu_time_before(timer->it_clock,
- timer->it.cpu.expires, now)) {
- /*
- * Do-nothing timer expired and has no reload,
- * so it's as if it was never set.
- */
- timer->it.cpu.expires.sched = 0;
- itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
- return;
- }
- /*
- * Account for any expirations and reloads that should
- * have happened.
- */
- bump_cpu_timer(timer, now);
- }
-
if (unlikely(clear_dead)) {
/*
* We've noticed that the thread is dead, but
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig)
struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
- if (!cputimer->running)
- return;
-
spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
spin_unlock_irqrestore(&cputimer->lock, flags);
-
- sig->cputime_expires.prof_exp = cputime_zero;
- sig->cputime_expires.virt_exp = cputime_zero;
- sig->cputime_expires.sched_exp = 0;
}
static u32 onecputick;
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
}
}
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime: The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero. Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+ if (cputime_eq(cputime->utime, cputime_zero) &&
+ cputime_eq(cputime->stime, cputime_zero) &&
+ cputime->sum_exec_runtime == 0)
+ return 1;
+ return 0;
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them
* off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk,
unsigned long soft;
/*
- * Don't sample the current process CPU clocks if there are no timers.
- */
- if (list_empty(&timers[CPUCLOCK_PROF]) &&
- cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
- sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
- list_empty(&timers[CPUCLOCK_VIRT]) &&
- cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
- list_empty(&timers[CPUCLOCK_SCHED])) {
- stop_process_timers(sig);
- return;
- }
-
- /*
* Collect the current process totals.
*/
thread_group_cputimer(tsk, &cputime);
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk,
}
}
- if (!cputime_eq(prof_expires, cputime_zero) &&
- (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
- cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
- sig->cputime_expires.prof_exp = prof_expires;
- if (!cputime_eq(virt_expires, cputime_zero) &&
- (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
- cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
- sig->cputime_expires.virt_exp = virt_expires;
- if (sched_expires != 0 &&
- (sig->cputime_expires.sched_exp == 0 ||
- sig->cputime_expires.sched_exp > sched_expires))
- sig->cputime_expires.sched_exp = sched_expires;
+ sig->cputime_expires.prof_exp = prof_expires;
+ sig->cputime_expires.virt_exp = virt_expires;
+ sig->cputime_expires.sched_exp = sched_expires;
+ if (task_cputime_zero(&sig->cputime_expires))
+ stop_process_timers(sig);
}
/*
@@ -1270,9 +1196,10 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
goto out;
}
read_lock(&tasklist_lock); /* arm_timer needs it. */
+ spin_lock(&p->sighand->siglock);
} else {
read_lock(&tasklist_lock);
- if (unlikely(p->signal == NULL)) {
+ if (unlikely(p->sighand == NULL)) {
/*
* The process has been reaped.
* We can't even collect a sample any more.
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
clear_dead_task(timer, now);
goto out_unlock;
}
+ spin_lock(&p->sighand->siglock);
cpu_timer_sample_group(timer->it_clock, p, &now);
bump_cpu_timer(timer, now);
/* Leave the tasklist_lock locked for the call below. */
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
/*
* Now re-arm for the new expiry time.
*/
- arm_timer(timer, now);
+ BUG_ON(!irqs_disabled());
+ arm_timer(timer);
+ spin_unlock(&p->sighand->siglock);
out_unlock:
read_unlock(&tasklist_lock);
@@ -1310,23 +1240,6 @@ out:
}
/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime: The struct to compare.
- *
- * Checks @cputime to see if all fields are zero. Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
-{
- if (cputime_eq(cputime->utime, cputime_zero) &&
- cputime_eq(cputime->stime, cputime_zero) &&
- cputime->sum_exec_runtime == 0)
- return 1;
- return 0;
-}
-
-/**
* task_cputime_expired - Compare two task_cputime entities.
*
* @sample: The task_cputime structure to be checked for expiration.
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
}
sig = tsk->signal;
- if (!task_cputime_zero(&sig->cputime_expires)) {
+ if (sig->cputimer.running) {
struct task_cputime group_sample;
thread_group_cputimer(tsk, &group_sample);
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
return 1;
}
- return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+ return 0;
}
/*
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* put them on the firing list.
*/
check_thread_timers(tsk, &firing);
- check_process_timers(tsk, &firing);
+ /*
+ * If there are any active process wide timers (POSIX 1.b, itimers,
+ * RLIMIT_CPU) cputimer must be running.
+ */
+ if (tsk->signal->cputimer.running)
+ check_process_timers(tsk, &firing);
/*
* We must release these locks before taking any timer's lock.
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
- * Set one of the process-wide special case CPU timers.
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
* The tsk->sighand->siglock must be held by the caller.
- * The *newval argument is relative and we update it to be absolute, *oldval
- * is absolute and we update it to be relative.
*/
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
union cpu_time_count now;
- struct list_head *head;
BUG_ON(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) {
+ /*
+ * We are setting itimer. The *oldval is absolute and we update
+ * it to be relative, *newval argument is relative and we update
+ * it to be absolute.
+ */
if (!cputime_eq(*oldval, cputime_zero)) {
if (cputime_le(*oldval, now.cpu)) {
/* Just about to fire. */
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
if (cputime_eq(*newval, cputime_zero))
return;
*newval = cputime_add(*newval, now.cpu);
-
- /*
- * If the RLIMIT_CPU timer will expire before the
- * ITIMER_PROF timer, we have nothing else to do.
- */
- if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
- < cputime_to_secs(*newval))
- return;
}
/*
- * Check whether there are any process timers already set to fire
- * before this one. If so, we don't have anything more to do.
+ * Update expiration cache if we are the earliest timer, or eventually
+ * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
*/
- head = &tsk->signal->cpu_timers[clock_idx];
- if (list_empty(head) ||
- cputime_ge(list_first_entry(head,
- struct cpu_timer_list, entry)->expires.cpu,
- *newval)) {
- switch (clock_idx) {
- case CPUCLOCK_PROF:
+ switch (clock_idx) {
+ case CPUCLOCK_PROF:
+ if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
tsk->signal->cputime_expires.prof_exp = *newval;
- break;
- case CPUCLOCK_VIRT:
+ break;
+ case CPUCLOCK_VIRT:
+ if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
tsk->signal->cputime_expires.virt_exp = *newval;
- break;
- }
+ break;
}
}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 00d1fda58ab..ad723420acc 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -559,14 +559,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
new_timer->it_overrun = -1;
- error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
- if (error)
- goto out;
- /*
- * return the timer_id now. The next step is hard to
- * back out if there is an error.
- */
if (copy_to_user(created_timer_id,
&new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT;
@@ -597,6 +590,10 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->sigq->info.si_tid = new_timer->it_id;
new_timer->sigq->info.si_code = SI_TIMER;
+ error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
+ if (error)
+ goto out;
+
spin_lock_irq(&current->sighand->siglock);
new_timer->it_signal = current->signal;
list_add(&new_timer->list, &current->signal->posix_timers);
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 43191815f87..524e058dcf0 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_PM_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
-obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
+ block_io.o
obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
new file mode 100644
index 00000000000..97024fd40cd
--- /dev/null
+++ b/kernel/power/block_io.c
@@ -0,0 +1,103 @@
+/*
+ * This file provides functions for block I/O operations on swap/file.
+ *
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/bio.h>
+#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#include "power.h"
+
+/**
+ * submit - submit BIO request.
+ * @rw: READ or WRITE.
+ * @off physical offset of page.
+ * @page: page we're reading or writing.
+ * @bio_chain: list of pending biod (for async reading)
+ *
+ * Straight from the textbook - allocate and initialize the bio.
+ * If we're reading, make sure the page is marked as dirty.
+ * Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, struct block_device *bdev, sector_t sector,
+ struct page *page, struct bio **bio_chain)
+{
+ const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+ struct bio *bio;
+
+ bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+ bio->bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_end_io = end_swap_bio_read;
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+ (unsigned long long)sector);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ lock_page(page);
+ bio_get(bio);
+
+ if (bio_chain == NULL) {
+ submit_bio(bio_rw, bio);
+ wait_on_page_locked(page);
+ if (rw == READ)
+ bio_set_pages_dirty(bio);
+ bio_put(bio);
+ } else {
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
+ bio->bi_private = *bio_chain;
+ *bio_chain = bio;
+ submit_bio(bio_rw, bio);
+ }
+ return 0;
+}
+
+int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+ virt_to_page(addr), bio_chain);
+}
+
+int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+ virt_to_page(addr), bio_chain);
+}
+
+int hib_wait_on_bio_chain(struct bio **bio_chain)
+{
+ struct bio *bio;
+ struct bio *next_bio;
+ int ret = 0;
+
+ if (bio_chain == NULL)
+ return 0;
+
+ bio = *bio_chain;
+ if (bio == NULL)
+ return 0;
+ while (bio) {
+ struct page *page;
+
+ next_bio = bio->bi_private;
+ page = bio->bi_io_vec[0].bv_page;
+ wait_on_page_locked(page);
+ if (!PageUptodate(page) || PageError(page))
+ ret = -EIO;
+ put_page(page);
+ bio_put(bio);
+ bio = next_bio;
+ }
+ *bio_chain = NULL;
+ return ret;
+}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46c5a26630a..006270fe382 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -97,24 +97,12 @@ extern int hibernate_preallocate_memory(void);
*/
struct snapshot_handle {
- loff_t offset; /* number of the last byte ready for reading
- * or writing in the sequence
- */
unsigned int cur; /* number of the block of PAGE_SIZE bytes the
* next operation will refer to (ie. current)
*/
- unsigned int cur_offset; /* offset with respect to the current
- * block (for the next operation)
- */
- unsigned int prev; /* number of the block of PAGE_SIZE bytes that
- * was the current one previously
- */
void *buffer; /* address of the block to read from
* or write to
*/
- unsigned int buf_offset; /* location to read from or write to,
- * given as a displacement from 'buffer'
- */
int sync_read; /* Set to one to notify the caller of
* snapshot_write_next() that it may
* need to call wait_on_bio_chain()
@@ -125,12 +113,12 @@ struct snapshot_handle {
* snapshot_read_next()/snapshot_write_next() is allowed to
* read/write data after the function returns
*/
-#define data_of(handle) ((handle).buffer + (handle).buf_offset)
+#define data_of(handle) ((handle).buffer)
extern unsigned int snapshot_additional_pages(struct zone *zone);
extern unsigned long snapshot_get_image_size(void);
-extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
-extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
+extern int snapshot_read_next(struct snapshot_handle *handle);
+extern int snapshot_write_next(struct snapshot_handle *handle);
extern void snapshot_write_finalize(struct snapshot_handle *handle);
extern int snapshot_image_loaded(struct snapshot_handle *handle);
@@ -154,6 +142,15 @@ extern int swsusp_read(unsigned int *flags_p);
extern int swsusp_write(unsigned int flags);
extern void swsusp_close(fmode_t);
+/* kernel/power/block_io.c */
+extern struct block_device *hib_resume_bdev;
+
+extern int hib_bio_read_page(pgoff_t page_off, void *addr,
+ struct bio **bio_chain);
+extern int hib_bio_write_page(pgoff_t page_off, void *addr,
+ struct bio **bio_chain);
+extern int hib_wait_on_bio_chain(struct bio **bio_chain);
+
struct timeval;
/* kernel/power/swsusp.c */
extern void swsusp_show_speed(struct timeval *, struct timeval *,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index be861c26dda..25ce010e9f8 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1604,14 +1604,9 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
* snapshot_handle structure. The structure gets updated and a pointer
* to it should be passed to this function every next time.
*
- * The @count parameter should contain the number of bytes the caller
- * wants to read from the snapshot. It must not be zero.
- *
* On success the function returns a positive number. Then, the caller
* is allowed to read up to the returned number of bytes from the memory
- * location computed by the data_of() macro. The number returned
- * may be smaller than @count, but this only happens if the read would
- * cross a page boundary otherwise.
+ * location computed by the data_of() macro.
*
* The function returns 0 to indicate the end of data stream condition,
* and a negative number is returned on error. In such cases the
@@ -1619,7 +1614,7 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
* any more.
*/
-int snapshot_read_next(struct snapshot_handle *handle, size_t count)
+int snapshot_read_next(struct snapshot_handle *handle)
{
if (handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
@@ -1630,7 +1625,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
if (!buffer)
return -ENOMEM;
}
- if (!handle->offset) {
+ if (!handle->cur) {
int error;
error = init_header((struct swsusp_info *)buffer);
@@ -1639,42 +1634,30 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
handle->buffer = buffer;
memory_bm_position_reset(&orig_bm);
memory_bm_position_reset(&copy_bm);
- }
- if (handle->prev < handle->cur) {
- if (handle->cur <= nr_meta_pages) {
- memset(buffer, 0, PAGE_SIZE);
- pack_pfns(buffer, &orig_bm);
- } else {
- struct page *page;
+ } else if (handle->cur <= nr_meta_pages) {
+ memset(buffer, 0, PAGE_SIZE);
+ pack_pfns(buffer, &orig_bm);
+ } else {
+ struct page *page;
- page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
- if (PageHighMem(page)) {
- /* Highmem pages are copied to the buffer,
- * because we can't return with a kmapped
- * highmem page (we may not be called again).
- */
- void *kaddr;
+ page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+ if (PageHighMem(page)) {
+ /* Highmem pages are copied to the buffer,
+ * because we can't return with a kmapped
+ * highmem page (we may not be called again).
+ */
+ void *kaddr;
- kaddr = kmap_atomic(page, KM_USER0);
- memcpy(buffer, kaddr, PAGE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- handle->buffer = buffer;
- } else {
- handle->buffer = page_address(page);
- }
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(buffer, kaddr, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ handle->buffer = buffer;
+ } else {
+ handle->buffer = page_address(page);
}
- handle->prev = handle->cur;
- }
- handle->buf_offset = handle->cur_offset;
- if (handle->cur_offset + count >= PAGE_SIZE) {
- count = PAGE_SIZE - handle->cur_offset;
- handle->cur_offset = 0;
- handle->cur++;
- } else {
- handle->cur_offset += count;
}
- handle->offset += count;
- return count;
+ handle->cur++;
+ return PAGE_SIZE;
}
/**
@@ -2133,14 +2116,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
* snapshot_handle structure. The structure gets updated and a pointer
* to it should be passed to this function every next time.
*
- * The @count parameter should contain the number of bytes the caller
- * wants to write to the image. It must not be zero.
- *
* On success the function returns a positive number. Then, the caller
* is allowed to write up to the returned number of bytes to the memory
- * location computed by the data_of() macro. The number returned
- * may be smaller than @count, but this only happens if the write would
- * cross a page boundary otherwise.
+ * location computed by the data_of() macro.
*
* The function returns 0 to indicate the "end of file" condition,
* and a negative number is returned on error. In such cases the
@@ -2148,16 +2126,18 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
* any more.
*/
-int snapshot_write_next(struct snapshot_handle *handle, size_t count)
+int snapshot_write_next(struct snapshot_handle *handle)
{
static struct chain_allocator ca;
int error = 0;
/* Check if we have already loaded the entire image */
- if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
+ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
- if (handle->offset == 0) {
+ handle->sync_read = 1;
+
+ if (!handle->cur) {
if (!buffer)
/* This makes the buffer be freed by swsusp_free() */
buffer = get_image_page(GFP_ATOMIC, PG_ANY);
@@ -2166,56 +2146,43 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
return -ENOMEM;
handle->buffer = buffer;
- }
- handle->sync_read = 1;
- if (handle->prev < handle->cur) {
- if (handle->prev == 0) {
- error = load_header(buffer);
- if (error)
- return error;
+ } else if (handle->cur == 1) {
+ error = load_header(buffer);
+ if (error)
+ return error;
- error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
- if (error)
- return error;
+ error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
+ if (error)
+ return error;
+
+ } else if (handle->cur <= nr_meta_pages + 1) {
+ error = unpack_orig_pfns(buffer, &copy_bm);
+ if (error)
+ return error;
- } else if (handle->prev <= nr_meta_pages) {
- error = unpack_orig_pfns(buffer, &copy_bm);
+ if (handle->cur == nr_meta_pages + 1) {
+ error = prepare_image(&orig_bm, &copy_bm);
if (error)
return error;
- if (handle->prev == nr_meta_pages) {
- error = prepare_image(&orig_bm, &copy_bm);
- if (error)
- return error;
-
- chain_init(&ca, GFP_ATOMIC, PG_SAFE);
- memory_bm_position_reset(&orig_bm);
- restore_pblist = NULL;
- handle->buffer = get_buffer(&orig_bm, &ca);
- handle->sync_read = 0;
- if (IS_ERR(handle->buffer))
- return PTR_ERR(handle->buffer);
- }
- } else {
- copy_last_highmem_page();
+ chain_init(&ca, GFP_ATOMIC, PG_SAFE);
+ memory_bm_position_reset(&orig_bm);
+ restore_pblist = NULL;
handle->buffer = get_buffer(&orig_bm, &ca);
+ handle->sync_read = 0;
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
- if (handle->buffer != buffer)
- handle->sync_read = 0;
}
- handle->prev = handle->cur;
- }
- handle->buf_offset = handle->cur_offset;
- if (handle->cur_offset + count >= PAGE_SIZE) {
- count = PAGE_SIZE - handle->cur_offset;
- handle->cur_offset = 0;
- handle->cur++;
} else {
- handle->cur_offset += count;
+ copy_last_highmem_page();
+ handle->buffer = get_buffer(&orig_bm, &ca);
+ if (IS_ERR(handle->buffer))
+ return PTR_ERR(handle->buffer);
+ if (handle->buffer != buffer)
+ handle->sync_read = 0;
}
- handle->offset += count;
- return count;
+ handle->cur++;
+ return PAGE_SIZE;
}
/**
@@ -2230,7 +2197,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
{
copy_last_highmem_page();
/* Free only if we have loaded the image entirely */
- if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
+ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
free_highmem_data();
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 66824d71983..b0bb2177839 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -29,6 +29,40 @@
#define SWSUSP_SIG "S1SUSPEND"
+/*
+ * The swap map is a data structure used for keeping track of each page
+ * written to a swap partition. It consists of many swap_map_page
+ * structures that contain each an array of MAP_PAGE_SIZE swap entries.
+ * These structures are stored on the swap and linked together with the
+ * help of the .next_swap member.
+ *
+ * The swap map is created during suspend. The swap map pages are
+ * allocated and populated one at a time, so we only need one memory
+ * page to set up the entire structure.
+ *
+ * During resume we also only need to use one swap_map_page structure
+ * at a time.
+ */
+
+#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
+
+struct swap_map_page {
+ sector_t entries[MAP_PAGE_ENTRIES];
+ sector_t next_swap;
+};
+
+/**
+ * The swap_map_handle structure is used for handling swap in
+ * a file-alike way
+ */
+
+struct swap_map_handle {
+ struct swap_map_page *cur;
+ sector_t cur_swap;
+ sector_t first_sector;
+ unsigned int k;
+};
+
struct swsusp_header {
char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
sector_t image;
@@ -145,110 +179,24 @@ int swsusp_swap_in_use(void)
*/
static unsigned short root_swap = 0xffff;
-static struct block_device *resume_bdev;
-
-/**
- * submit - submit BIO request.
- * @rw: READ or WRITE.
- * @off physical offset of page.
- * @page: page we're reading or writing.
- * @bio_chain: list of pending biod (for async reading)
- *
- * Straight from the textbook - allocate and initialize the bio.
- * If we're reading, make sure the page is marked as dirty.
- * Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, pgoff_t page_off, struct page *page,
- struct bio **bio_chain)
-{
- const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
- struct bio *bio;
-
- bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
- bio->bi_sector = page_off * (PAGE_SIZE >> 9);
- bio->bi_bdev = resume_bdev;
- bio->bi_end_io = end_swap_bio_read;
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
- page_off);
- bio_put(bio);
- return -EFAULT;
- }
-
- lock_page(page);
- bio_get(bio);
-
- if (bio_chain == NULL) {
- submit_bio(bio_rw, bio);
- wait_on_page_locked(page);
- if (rw == READ)
- bio_set_pages_dirty(bio);
- bio_put(bio);
- } else {
- if (rw == READ)
- get_page(page); /* These pages are freed later */
- bio->bi_private = *bio_chain;
- *bio_chain = bio;
- submit_bio(bio_rw, bio);
- }
- return 0;
-}
-
-static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(READ, page_off, virt_to_page(addr), bio_chain);
-}
-
-static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
-}
-
-static int wait_on_bio_chain(struct bio **bio_chain)
-{
- struct bio *bio;
- struct bio *next_bio;
- int ret = 0;
-
- if (bio_chain == NULL)
- return 0;
-
- bio = *bio_chain;
- if (bio == NULL)
- return 0;
- while (bio) {
- struct page *page;
-
- next_bio = bio->bi_private;
- page = bio->bi_io_vec[0].bv_page;
- wait_on_page_locked(page);
- if (!PageUptodate(page) || PageError(page))
- ret = -EIO;
- put_page(page);
- bio_put(bio);
- bio = next_bio;
- }
- *bio_chain = NULL;
- return ret;
-}
+struct block_device *hib_resume_bdev;
/*
* Saving part
*/
-static int mark_swapfiles(sector_t start, unsigned int flags)
+static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
{
int error;
- bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
- swsusp_header->image = start;
+ swsusp_header->image = handle->first_sector;
swsusp_header->flags = flags;
- error = bio_write_page(swsusp_resume_block,
+ error = hib_bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
printk(KERN_ERR "PM: Swap header not found!\n");
@@ -260,25 +208,26 @@ static int mark_swapfiles(sector_t start, unsigned int flags)
/**
* swsusp_swap_check - check if the resume device is a swap device
* and get its index (if so)
+ *
+ * This is called before saving image
*/
-
-static int swsusp_swap_check(void) /* This is called before saving image */
+static int swsusp_swap_check(void)
{
int res;
res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
- &resume_bdev);
+ &hib_resume_bdev);
if (res < 0)
return res;
root_swap = res;
- res = blkdev_get(resume_bdev, FMODE_WRITE);
+ res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
if (res)
return res;
- res = set_blocksize(resume_bdev, PAGE_SIZE);
+ res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
if (res < 0)
- blkdev_put(resume_bdev, FMODE_WRITE);
+ blkdev_put(hib_resume_bdev, FMODE_WRITE);
return res;
}
@@ -309,42 +258,9 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
} else {
src = buf;
}
- return bio_write_page(offset, src, bio_chain);
+ return hib_bio_write_page(offset, src, bio_chain);
}
-/*
- * The swap map is a data structure used for keeping track of each page
- * written to a swap partition. It consists of many swap_map_page
- * structures that contain each an array of MAP_PAGE_SIZE swap entries.
- * These structures are stored on the swap and linked together with the
- * help of the .next_swap member.
- *
- * The swap map is created during suspend. The swap map pages are
- * allocated and populated one at a time, so we only need one memory
- * page to set up the entire structure.
- *
- * During resume we also only need to use one swap_map_page structure
- * at a time.
- */
-
-#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
-
-struct swap_map_page {
- sector_t entries[MAP_PAGE_ENTRIES];
- sector_t next_swap;
-};
-
-/**
- * The swap_map_handle structure is used for handling swap in
- * a file-alike way
- */
-
-struct swap_map_handle {
- struct swap_map_page *cur;
- sector_t cur_swap;
- unsigned int k;
-};
-
static void release_swap_writer(struct swap_map_handle *handle)
{
if (handle->cur)
@@ -354,16 +270,33 @@ static void release_swap_writer(struct swap_map_handle *handle)
static int get_swap_writer(struct swap_map_handle *handle)
{
+ int ret;
+
+ ret = swsusp_swap_check();
+ if (ret) {
+ if (ret != -ENOSPC)
+ printk(KERN_ERR "PM: Cannot find swap device, try "
+ "swapon -a.\n");
+ return ret;
+ }
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
- if (!handle->cur)
- return -ENOMEM;
+ if (!handle->cur) {
+ ret = -ENOMEM;
+ goto err_close;
+ }
handle->cur_swap = alloc_swapdev_block(root_swap);
if (!handle->cur_swap) {
- release_swap_writer(handle);
- return -ENOSPC;
+ ret = -ENOSPC;
+ goto err_rel;
}
handle->k = 0;
+ handle->first_sector = handle->cur_swap;
return 0;
+err_rel:
+ release_swap_writer(handle);
+err_close:
+ swsusp_close(FMODE_WRITE);
+ return ret;
}
static int swap_write_page(struct swap_map_handle *handle, void *buf,
@@ -380,7 +313,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
- error = wait_on_bio_chain(bio_chain);
+ error = hib_wait_on_bio_chain(bio_chain);
if (error)
goto out;
offset = alloc_swapdev_block(root_swap);
@@ -406,6 +339,24 @@ static int flush_swap_writer(struct swap_map_handle *handle)
return -EINVAL;
}
+static int swap_writer_finish(struct swap_map_handle *handle,
+ unsigned int flags, int error)
+{
+ if (!error) {
+ flush_swap_writer(handle);
+ printk(KERN_INFO "PM: S");
+ error = mark_swapfiles(handle, flags);
+ printk("|\n");
+ }
+
+ if (error)
+ free_all_swap_pages(root_swap);
+ release_swap_writer(handle);
+ swsusp_close(FMODE_WRITE);
+
+ return error;
+}
+
/**
* save_image - save the suspend image data
*/
@@ -431,7 +382,7 @@ static int save_image(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
while (1) {
- ret = snapshot_read_next(snapshot, PAGE_SIZE);
+ ret = snapshot_read_next(snapshot);
if (ret <= 0)
break;
ret = swap_write_page(handle, data_of(*snapshot), &bio);
@@ -441,7 +392,7 @@ static int save_image(struct swap_map_handle *handle,
printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
nr_pages++;
}
- err2 = wait_on_bio_chain(&bio);
+ err2 = hib_wait_on_bio_chain(&bio);
do_gettimeofday(&stop);
if (!ret)
ret = err2;
@@ -483,50 +434,34 @@ int swsusp_write(unsigned int flags)
struct swap_map_handle handle;
struct snapshot_handle snapshot;
struct swsusp_info *header;
+ unsigned long pages;
int error;
- error = swsusp_swap_check();
+ pages = snapshot_get_image_size();
+ error = get_swap_writer(&handle);
if (error) {
- printk(KERN_ERR "PM: Cannot find swap device, try "
- "swapon -a.\n");
+ printk(KERN_ERR "PM: Cannot get swap writer\n");
return error;
}
+ if (!enough_swap(pages)) {
+ printk(KERN_ERR "PM: Not enough free swap\n");
+ error = -ENOSPC;
+ goto out_finish;
+ }
memset(&snapshot, 0, sizeof(struct snapshot_handle));
- error = snapshot_read_next(&snapshot, PAGE_SIZE);
+ error = snapshot_read_next(&snapshot);
if (error < PAGE_SIZE) {
if (error >= 0)
error = -EFAULT;
- goto out;
+ goto out_finish;
}
header = (struct swsusp_info *)data_of(snapshot);
- if (!enough_swap(header->pages)) {
- printk(KERN_ERR "PM: Not enough free swap\n");
- error = -ENOSPC;
- goto out;
- }
- error = get_swap_writer(&handle);
- if (!error) {
- sector_t start = handle.cur_swap;
-
- error = swap_write_page(&handle, header, NULL);
- if (!error)
- error = save_image(&handle, &snapshot,
- header->pages - 1);
-
- if (!error) {
- flush_swap_writer(&handle);
- printk(KERN_INFO "PM: S");
- error = mark_swapfiles(start, flags);
- printk("|\n");
- }
- }
- if (error)
- free_all_swap_pages(root_swap);
-
- release_swap_writer(&handle);
- out:
- swsusp_close(FMODE_WRITE);
+ error = swap_write_page(&handle, header, NULL);
+ if (!error)
+ error = save_image(&handle, &snapshot, pages - 1);
+out_finish:
+ error = swap_writer_finish(&handle, flags, error);
return error;
}
@@ -542,18 +477,21 @@ static void release_swap_reader(struct swap_map_handle *handle)
handle->cur = NULL;
}
-static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
+static int get_swap_reader(struct swap_map_handle *handle,
+ unsigned int *flags_p)
{
int error;
- if (!start)
+ *flags_p = swsusp_header->flags;
+
+ if (!swsusp_header->image) /* how can this happen? */
return -EINVAL;
handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
if (!handle->cur)
return -ENOMEM;
- error = bio_read_page(start, handle->cur, NULL);
+ error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
if (error) {
release_swap_reader(handle);
return error;
@@ -573,21 +511,28 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
offset = handle->cur->entries[handle->k];
if (!offset)
return -EFAULT;
- error = bio_read_page(offset, buf, bio_chain);
+ error = hib_bio_read_page(offset, buf, bio_chain);
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
- error = wait_on_bio_chain(bio_chain);
+ error = hib_wait_on_bio_chain(bio_chain);
handle->k = 0;
offset = handle->cur->next_swap;
if (!offset)
release_swap_reader(handle);
else if (!error)
- error = bio_read_page(offset, handle->cur, NULL);
+ error = hib_bio_read_page(offset, handle->cur, NULL);
}
return error;
}
+static int swap_reader_finish(struct swap_map_handle *handle)
+{
+ release_swap_reader(handle);
+
+ return 0;
+}
+
/**
* load_image - load the image using the swap map handle
* @handle and the snapshot handle @snapshot
@@ -615,21 +560,21 @@ static int load_image(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for ( ; ; ) {
- error = snapshot_write_next(snapshot, PAGE_SIZE);
+ error = snapshot_write_next(snapshot);
if (error <= 0)
break;
error = swap_read_page(handle, data_of(*snapshot), &bio);
if (error)
break;
if (snapshot->sync_read)
- error = wait_on_bio_chain(&bio);
+ error = hib_wait_on_bio_chain(&bio);
if (error)
break;
if (!(nr_pages % m))
printk("\b\b\b\b%3d%%", nr_pages / m);
nr_pages++;
}
- err2 = wait_on_bio_chain(&bio);
+ err2 = hib_wait_on_bio_chain(&bio);
do_gettimeofday(&stop);
if (!error)
error = err2;
@@ -657,20 +602,20 @@ int swsusp_read(unsigned int *flags_p)
struct snapshot_handle snapshot;
struct swsusp_info *header;
- *flags_p = swsusp_header->flags;
-
memset(&snapshot, 0, sizeof(struct snapshot_handle));
- error = snapshot_write_next(&snapshot, PAGE_SIZE);
+ error = snapshot_write_next(&snapshot);
if (error < PAGE_SIZE)
return error < 0 ? error : -EFAULT;
header = (struct swsusp_info *)data_of(snapshot);
- error = get_swap_reader(&handle, swsusp_header->image);
+ error = get_swap_reader(&handle, flags_p);
+ if (error)
+ goto end;
if (!error)
error = swap_read_page(&handle, header, NULL);
if (!error)
error = load_image(&handle, &snapshot, header->pages - 1);
- release_swap_reader(&handle);
-
+ swap_reader_finish(&handle);
+end:
if (!error)
pr_debug("PM: Image successfully loaded\n");
else
@@ -686,11 +631,11 @@ int swsusp_check(void)
{
int error;
- resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
- if (!IS_ERR(resume_bdev)) {
- set_blocksize(resume_bdev, PAGE_SIZE);
+ hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+ if (!IS_ERR(hib_resume_bdev)) {
+ set_blocksize(hib_resume_bdev, PAGE_SIZE);
memset(swsusp_header, 0, PAGE_SIZE);
- error = bio_read_page(swsusp_resume_block,
+ error = hib_bio_read_page(swsusp_resume_block,
swsusp_header, NULL);
if (error)
goto put;
@@ -698,7 +643,7 @@ int swsusp_check(void)
if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
/* Reset swap signature now */
- error = bio_write_page(swsusp_resume_block,
+ error = hib_bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
error = -EINVAL;
@@ -706,11 +651,11 @@ int swsusp_check(void)
put:
if (error)
- blkdev_put(resume_bdev, FMODE_READ);
+ blkdev_put(hib_resume_bdev, FMODE_READ);
else
pr_debug("PM: Signature found, resuming\n");
} else {
- error = PTR_ERR(resume_bdev);
+ error = PTR_ERR(hib_resume_bdev);
}
if (error)
@@ -725,12 +670,12 @@ put:
void swsusp_close(fmode_t mode)
{
- if (IS_ERR(resume_bdev)) {
+ if (IS_ERR(hib_resume_bdev)) {
pr_debug("PM: Image device not initialised\n");
return;
}
- blkdev_put(resume_bdev, mode);
+ blkdev_put(hib_resume_bdev, mode);
}
static int swsusp_header_init(void)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a8c96212bc1..e819e17877c 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -151,6 +151,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
{
struct snapshot_data *data;
ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
mutex_lock(&pm_mutex);
@@ -159,14 +160,19 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
res = -ENODATA;
goto Unlock;
}
- res = snapshot_read_next(&data->handle, count);
- if (res > 0) {
- if (copy_to_user(buf, data_of(data->handle), res))
- res = -EFAULT;
- else
- *offp = data->handle.offset;
+ if (!pg_offp) { /* on page boundary? */
+ res = snapshot_read_next(&data->handle);
+ if (res <= 0)
+ goto Unlock;
+ } else {
+ res = PAGE_SIZE - pg_offp;
}
+ res = simple_read_from_buffer(buf, count, &pg_offp,
+ data_of(data->handle), res);
+ if (res > 0)
+ *offp += res;
+
Unlock:
mutex_unlock(&pm_mutex);
@@ -178,18 +184,25 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
{
struct snapshot_data *data;
ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
mutex_lock(&pm_mutex);
data = filp->private_data;
- res = snapshot_write_next(&data->handle, count);
- if (res > 0) {
- if (copy_from_user(data_of(data->handle), buf, res))
- res = -EFAULT;
- else
- *offp = data->handle.offset;
+
+ if (!pg_offp) {
+ res = snapshot_write_next(&data->handle);
+ if (res <= 0)
+ goto unlock;
+ } else {
+ res = PAGE_SIZE - pg_offp;
}
+ res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
+ buf, count);
+ if (res > 0)
+ *offp += res;
+unlock:
mutex_unlock(&pm_mutex);
return res;
diff --git a/kernel/printk.c b/kernel/printk.c
index 75077ad0b53..444b770c959 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/kexec.h>
+#include <linux/kdb.h>
#include <linux/ratelimit.h>
#include <linux/kmsg_dump.h>
#include <linux/syslog.h>
@@ -413,6 +414,22 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
}
+#ifdef CONFIG_KGDB_KDB
+/* kdb dmesg command needs access to the syslog buffer. do_syslog()
+ * uses locks so it cannot be used during debugging. Just tell kdb
+ * where the start and end of the physical and logical logs are. This
+ * is equivalent to do_syslog(3).
+ */
+void kdb_syslog_data(char *syslog_data[4])
+{
+ syslog_data[0] = log_buf;
+ syslog_data[1] = log_buf + log_buf_len;
+ syslog_data[2] = log_buf + log_end -
+ (logged_chars < log_buf_len ? logged_chars : log_buf_len);
+ syslog_data[3] = log_buf + log_end;
+}
+#endif /* CONFIG_KGDB_KDB */
+
/*
* Call the console drivers on a range of log_buf
*/
@@ -586,6 +603,14 @@ asmlinkage int printk(const char *fmt, ...)
va_list args;
int r;
+#ifdef CONFIG_KGDB_KDB
+ if (unlikely(kdb_trap_printk)) {
+ va_start(args, fmt);
+ r = vkdb_printf(fmt, args);
+ va_end(args);
+ return r;
+ }
+#endif
va_start(args, fmt);
r = vprintk(fmt, args);
va_end(args);
diff --git a/kernel/profile.c b/kernel/profile.c
index dfadc5b729f..b22a899934c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -365,14 +365,14 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- node = cpu_to_node(cpu);
+ node = cpu_to_mem(cpu);
per_cpu(cpu_profile_flip, cpu) = 0;
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
page = alloc_pages_exact_node(node,
GFP_KERNEL | __GFP_ZERO,
0);
if (!page)
- return NOTIFY_BAD;
+ return notifier_from_errno(-ENOMEM);
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
}
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
@@ -388,7 +388,7 @@ out_free:
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
__free_page(page);
- return NOTIFY_BAD;
+ return notifier_from_errno(-ENOMEM);
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
if (prof_cpu_mask != NULL)
@@ -567,7 +567,7 @@ static int create_hash_tables(void)
int cpu;
for_each_online_cpu(cpu) {
- int node = cpu_to_node(cpu);
+ int node = cpu_to_mem(cpu);
struct page *page;
page = alloc_pages_exact_node(node,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 42ad8ae729a..74a3d693c19 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -14,7 +14,6 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/signal.h>
@@ -76,7 +75,6 @@ void __ptrace_unlink(struct task_struct *child)
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
- arch_ptrace_untrace(child);
if (task_is_traced(child))
ptrace_untrace(child);
}
@@ -596,6 +594,32 @@ int ptrace_request(struct task_struct *child, long request,
ret = ptrace_detach(child, data);
break;
+#ifdef CONFIG_BINFMT_ELF_FDPIC
+ case PTRACE_GETFDPIC: {
+ struct mm_struct *mm = get_task_mm(child);
+ unsigned long tmp = 0;
+
+ ret = -ESRCH;
+ if (!mm)
+ break;
+
+ switch (addr) {
+ case PTRACE_GETFDPIC_EXEC:
+ tmp = mm->context.exec_fdpic_loadmap;
+ break;
+ case PTRACE_GETFDPIC_INTERP:
+ tmp = mm->context.interp_fdpic_loadmap;
+ break;
+ default:
+ break;
+ }
+ mmput(mm);
+
+ ret = put_user(tmp, (unsigned long __user *) data);
+ break;
+ }
+#endif
+
#ifdef PTRACE_SINGLESTEP
case PTRACE_SINGLESTEP:
#endif
@@ -666,10 +690,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
struct task_struct *child;
long ret;
- /*
- * This lock_kernel fixes a subtle race with suid exec
- */
- lock_kernel();
if (request == PTRACE_TRACEME) {
ret = ptrace_traceme();
if (!ret)
@@ -703,7 +723,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
out_put_task_struct:
put_task_struct(child);
out:
- unlock_kernel();
return ret;
}
@@ -813,10 +832,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
struct task_struct *child;
long ret;
- /*
- * This lock_kernel fixes a subtle race with suid exec
- */
- lock_kernel();
if (request == PTRACE_TRACEME) {
ret = ptrace_traceme();
goto out;
@@ -846,7 +861,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
out_put_task_struct:
put_task_struct(child);
out:
- unlock_kernel();
return ret;
}
#endif /* CONFIG_COMPAT */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 49d808e833b..72a8dc9567f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,7 +44,6 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
-#include <linux/kernel_stat.h>
#include <linux/hardirq.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -64,9 +63,6 @@ struct lockdep_map rcu_sched_lock_map =
EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
#endif
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int debug_lockdep_rcu_enabled(void)
@@ -97,21 +93,6 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
- * This function is invoked towards the end of the scheduler's initialization
- * process. Before this is called, the idle task might contain
- * RCU read-side critical sections (during which time, this idle
- * task is booting the system). After this function is called, the
- * idle tasks are prohibited from containing RCU read-side critical
- * sections.
- */
-void rcu_scheduler_starting(void)
-{
- WARN_ON(num_online_cpus() != 1);
- WARN_ON(nr_context_switches() > 0);
- rcu_scheduler_active = 1;
-}
-
-/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
*/
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 9f6d9ff2572..38729d3cd23 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -44,9 +44,9 @@ struct rcu_ctrlblk {
};
/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_ctrlblk = {
- .donetail = &rcu_ctrlblk.rcucblist,
- .curtail = &rcu_ctrlblk.rcucblist,
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+ .donetail = &rcu_sched_ctrlblk.rcucblist,
+ .curtail = &rcu_sched_ctrlblk.rcucblist,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
@@ -54,6 +54,11 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.curtail = &rcu_bh_ctrlblk.rcucblist,
};
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
#ifdef CONFIG_NO_HZ
static long rcu_dynticks_nesting = 1;
@@ -108,7 +113,8 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
*/
void rcu_sched_qs(int cpu)
{
- if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk))
+ if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
+ rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
}
@@ -173,7 +179,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
*/
static void rcu_process_callbacks(struct softirq_action *unused)
{
- __rcu_process_callbacks(&rcu_ctrlblk);
+ __rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
}
@@ -187,7 +193,8 @@ static void rcu_process_callbacks(struct softirq_action *unused)
*
* Cool, huh? (Due to Josh Triplett.)
*
- * But we want to make this a static inline later.
+ * But we want to make this a static inline later. The cond_resched()
+ * currently makes this problematic.
*/
void synchronize_sched(void)
{
@@ -195,12 +202,6 @@ void synchronize_sched(void)
}
EXPORT_SYMBOL_GPL(synchronize_sched);
-void synchronize_rcu_bh(void)
-{
- synchronize_sched();
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
/*
* Helper function for call_rcu() and call_rcu_bh().
*/
@@ -226,7 +227,7 @@ static void __call_rcu(struct rcu_head *head,
*/
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
- __call_rcu(head, func, &rcu_ctrlblk);
+ __call_rcu(head, func, &rcu_sched_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu);
@@ -244,11 +245,13 @@ void rcu_barrier(void)
{
struct rcu_synchronize rcu;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
@@ -256,11 +259,13 @@ void rcu_barrier_bh(void)
{
struct rcu_synchronize rcu;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
@@ -268,11 +273,13 @@ void rcu_barrier_sched(void)
{
struct rcu_synchronize rcu;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
@@ -280,3 +287,5 @@ void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}
+
+#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
new file mode 100644
index 00000000000..d223a92bc74
--- /dev/null
+++ b/kernel/rcutiny_plugin.h
@@ -0,0 +1,39 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions that provide either classic
+ * or preemptable semantics.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+#include <linux/kernel_stat.h>
+
+/*
+ * During boot, we forgive RCU lockdep issues. After this function is
+ * invoked, we start taking RCU lockdep issues seriously.
+ */
+void rcu_scheduler_starting(void)
+{
+ WARN_ON(nr_context_switches() > 0);
+ rcu_scheduler_active = 1;
+}
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 58df55bf83e..6535ac8bc6a 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -464,9 +464,11 @@ static void rcu_bh_torture_synchronize(void)
{
struct rcu_bh_torture_synchronize rcu;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
static struct rcu_torture_ops rcu_bh_ops = {
@@ -669,7 +671,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
.fqs = rcu_sched_force_quiescent_state,
- .stats = rcu_expedited_torture_stats,
+ .stats = NULL,
.irq_capable = 1,
.name = "sched_expedited"
};
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 3ec8160fc75..d4437345706 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,6 +46,7 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/time.h>
+#include <linux/kernel_stat.h>
#include "rcutree.h"
@@ -53,8 +54,8 @@
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
-#define RCU_STATE_INITIALIZER(name) { \
- .level = { &name.node[0] }, \
+#define RCU_STATE_INITIALIZER(structname) { \
+ .level = { &structname.node[0] }, \
.levelcnt = { \
NUM_RCU_LVL_0, /* root of hierarchy. */ \
NUM_RCU_LVL_1, \
@@ -65,13 +66,14 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
.signaled = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
- .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \
+ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
.orphan_cbs_list = NULL, \
- .orphan_cbs_tail = &name.orphan_cbs_list, \
+ .orphan_cbs_tail = &structname.orphan_cbs_list, \
.orphan_qlen = 0, \
- .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \
+ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
.n_force_qs = 0, \
.n_force_qs_ngp = 0, \
+ .name = #structname, \
}
struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
@@ -80,6 +82,9 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+
/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
* permit this function to be invoked without holding the root rcu_node
@@ -97,25 +102,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
*/
void rcu_sched_qs(int cpu)
{
- struct rcu_data *rdp;
+ struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
- rdp = &per_cpu(rcu_sched_data, cpu);
rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
- rcu_preempt_note_context_switch(cpu);
}
void rcu_bh_qs(int cpu)
{
- struct rcu_data *rdp;
+ struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
- rdp = &per_cpu(rcu_bh_data, cpu);
rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
}
+/*
+ * Note a context switch. This is a quiescent state for RCU-sched,
+ * and requires special handling for preemptible RCU.
+ */
+void rcu_note_context_switch(int cpu)
+{
+ rcu_sched_qs(cpu);
+ rcu_preempt_note_context_switch(cpu);
+}
+
#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = 1,
@@ -438,6 +450,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+int rcu_cpu_stall_panicking __read_mostly;
+
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
rsp->gp_start = jiffies;
@@ -470,7 +484,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
/* OK, time to rat on our buddy... */
- printk(KERN_ERR "INFO: RCU detected CPU stalls:");
+ printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
+ rsp->name);
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave(&rnp->lock, flags);
rcu_print_task_stall(rnp);
@@ -481,7 +496,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
if (rnp->qsmask & (1UL << cpu))
printk(" %d", rnp->grplo + cpu);
}
- printk(" (detected by %d, t=%ld jiffies)\n",
+ printk("} (detected by %d, t=%ld jiffies)\n",
smp_processor_id(), (long)(jiffies - rsp->gp_start));
trigger_all_cpu_backtrace();
@@ -497,8 +512,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
unsigned long flags;
struct rcu_node *rnp = rcu_get_root(rsp);
- printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n",
- smp_processor_id(), jiffies - rsp->gp_start);
+ printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
+ rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
trigger_all_cpu_backtrace();
raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -515,6 +530,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
long delta;
struct rcu_node *rnp;
+ if (rcu_cpu_stall_panicking)
+ return;
delta = jiffies - rsp->jiffies_stall;
rnp = rdp->mynode;
if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
@@ -529,6 +546,21 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
}
}
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+ rcu_cpu_stall_panicking = 1;
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block rcu_panic_block = {
+ .notifier_call = rcu_panic,
+};
+
+static void __init check_cpu_stall_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
+}
+
#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -539,6 +571,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
}
+static void __init check_cpu_stall_init(void)
+{
+}
+
#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
@@ -1125,8 +1161,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
*/
void rcu_check_callbacks(int cpu, int user)
{
- if (!rcu_pending(cpu))
- return; /* if nothing for RCU to do. */
if (user ||
(idle_cpu(cpu) && rcu_scheduler_active &&
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -1158,7 +1192,8 @@ void rcu_check_callbacks(int cpu, int user)
rcu_bh_qs(cpu);
}
rcu_preempt_check_callbacks(cpu);
- raise_softirq(RCU_SOFTIRQ);
+ if (rcu_pending(cpu))
+ raise_softirq(RCU_SOFTIRQ);
}
#ifdef CONFIG_SMP
@@ -1236,11 +1271,11 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
break; /* grace period idle or initializing, ignore. */
case RCU_SAVE_DYNTICK:
-
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
break; /* So gcc recognizes the dead code. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+
/* Record dyntick-idle state. */
force_qs_rnp(rsp, dyntick_save_progress_counter);
raw_spin_lock(&rnp->lock); /* irqs already disabled */
@@ -1449,11 +1484,13 @@ void synchronize_sched(void)
if (rcu_blocking_is_gp())
return;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(synchronize_sched);
@@ -1473,11 +1510,13 @@ void synchronize_rcu_bh(void)
if (rcu_blocking_is_gp())
return;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_bh(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
@@ -1498,8 +1537,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
check_cpu_stall(rsp, rdp);
/* Is the RCU core waiting for a quiescent state from this CPU? */
- if (rdp->qs_pending) {
+ if (rdp->qs_pending && !rdp->passed_quiesc) {
+
+ /*
+ * If force_quiescent_state() coming soon and this CPU
+ * needs a quiescent state, and this is either RCU-sched
+ * or RCU-bh, force a local reschedule.
+ */
rdp->n_rp_qs_pending++;
+ if (!rdp->preemptable &&
+ ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
+ jiffies))
+ set_need_resched();
+ } else if (rdp->qs_pending && rdp->passed_quiesc) {
+ rdp->n_rp_report_qs++;
return 1;
}
@@ -1767,6 +1818,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
}
/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process. Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system). After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections. This function also enables RCU lockdep checking.
+ */
+void rcu_scheduler_starting(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+ WARN_ON(nr_context_switches() > 0);
+ rcu_scheduler_active = 1;
+}
+
+/*
* Compute the per-level fanout, either using the exact fanout specified
* or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
*/
@@ -1849,6 +1915,14 @@ static void __init rcu_init_one(struct rcu_state *rsp)
INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
}
}
+
+ rnp = rsp->level[NUM_RCU_LVLS - 1];
+ for_each_possible_cpu(i) {
+ while (i > rnp->grphi)
+ rnp++;
+ rsp->rda[i]->mynode = rnp;
+ rcu_boot_init_percpu_data(i, rsp);
+ }
}
/*
@@ -1859,19 +1933,11 @@ static void __init rcu_init_one(struct rcu_state *rsp)
#define RCU_INIT_FLAVOR(rsp, rcu_data) \
do { \
int i; \
- int j; \
- struct rcu_node *rnp; \
\
- rcu_init_one(rsp); \
- rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \
- j = 0; \
for_each_possible_cpu(i) { \
- if (i > rnp[j].grphi) \
- j++; \
- per_cpu(rcu_data, i).mynode = &rnp[j]; \
(rsp)->rda[i] = &per_cpu(rcu_data, i); \
- rcu_boot_init_percpu_data(i, rsp); \
} \
+ rcu_init_one(rsp); \
} while (0)
void __init rcu_init(void)
@@ -1879,12 +1945,6 @@ void __init rcu_init(void)
int cpu;
rcu_bootup_announce();
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
- printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-#if NUM_RCU_LVL_4 != 0
- printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n");
-#endif /* #if NUM_RCU_LVL_4 != 0 */
RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
__rcu_init_preempt();
@@ -1898,6 +1958,7 @@ void __init rcu_init(void)
cpu_notifier(rcu_cpu_notify, 0);
for_each_online_cpu(cpu)
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
+ check_cpu_stall_init();
}
#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4a525a30e08..14c040b18ed 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -223,6 +223,7 @@ struct rcu_data {
/* 5) __rcu_pending() statistics. */
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
unsigned long n_rp_qs_pending;
+ unsigned long n_rp_report_qs;
unsigned long n_rp_cb_ready;
unsigned long n_rp_cpu_needs_gp;
unsigned long n_rp_gp_completed;
@@ -326,6 +327,7 @@ struct rcu_state {
unsigned long jiffies_stall; /* Time at which to check */
/* for CPU stalls. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+ char *name; /* Name of structure. */
};
/* Return values for rcu_preempt_offline_tasks(). */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 79b53bda894..0e4f420245d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -26,6 +26,45 @@
#include <linux/delay.h>
+/*
+ * Check the RCU kernel configuration parameters and print informative
+ * messages about anything out of the ordinary. If you like #ifdef, you
+ * will love this function.
+ */
+static void __init rcu_bootup_announce_oddness(void)
+{
+#ifdef CONFIG_RCU_TRACE
+ printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
+#endif
+#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
+ printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
+ CONFIG_RCU_FANOUT);
+#endif
+#ifdef CONFIG_RCU_FANOUT_EXACT
+ printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
+#endif
+#ifdef CONFIG_RCU_FAST_NO_HZ
+ printk(KERN_INFO
+ "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
+#endif
+#ifdef CONFIG_PROVE_RCU
+ printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
+#endif
+#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
+ printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
+#endif
+#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
+ printk(KERN_INFO
+ "\tRCU-based detection of stalled CPUs is disabled.\n");
+#endif
+#ifndef CONFIG_RCU_CPU_STALL_VERBOSE
+ printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
+#endif
+#if NUM_RCU_LVL_4 != 0
+ printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
+#endif
+}
+
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
@@ -38,8 +77,8 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
*/
static void __init rcu_bootup_announce(void)
{
- printk(KERN_INFO
- "Experimental preemptable hierarchical RCU implementation.\n");
+ printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
+ rcu_bootup_announce_oddness();
}
/*
@@ -75,13 +114,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
* that this just means that the task currently running on the CPU is
* not in a quiescent state. There might be any number of tasks blocked
* while in an RCU read-side critical section.
+ *
+ * Unlike the other rcu_*_qs() functions, callers to this function
+ * must disable irqs in order to protect the assignment to
+ * ->rcu_read_unlock_special.
*/
static void rcu_preempt_qs(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+
rdp->passed_quiesc_completed = rdp->gpnum - 1;
barrier();
rdp->passed_quiesc = 1;
+ current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
}
/*
@@ -144,9 +189,8 @@ static void rcu_preempt_note_context_switch(int cpu)
* grace period, then the fact that the task has been enqueued
* means that we continue to block the current grace period.
*/
- rcu_preempt_qs(cpu);
local_irq_save(flags);
- t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+ rcu_preempt_qs(cpu);
local_irq_restore(flags);
}
@@ -236,7 +280,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/
special = t->rcu_read_unlock_special;
if (special & RCU_READ_UNLOCK_NEED_QS) {
- t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
rcu_preempt_qs(smp_processor_id());
}
@@ -473,7 +516,6 @@ static void rcu_preempt_check_callbacks(int cpu)
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0) {
- t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
rcu_preempt_qs(cpu);
return;
}
@@ -515,11 +557,13 @@ void synchronize_rcu(void)
if (!rcu_scheduler_active)
return;
+ init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
@@ -754,6 +798,7 @@ void exit_rcu(void)
static void __init rcu_bootup_announce(void)
{
printk(KERN_INFO "Hierarchical RCU implementation.\n");
+ rcu_bootup_announce_oddness();
}
/*
@@ -1008,6 +1053,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
int rcu_needs_cpu(int cpu)
{
int c = 0;
+ int snap;
+ int snap_nmi;
int thatcpu;
/* Check for being in the holdoff period. */
@@ -1015,12 +1062,18 @@ int rcu_needs_cpu(int cpu)
return rcu_needs_cpu_quick_check(cpu);
/* Don't bother unless we are the last non-dyntick-idle CPU. */
- for_each_cpu_not(thatcpu, nohz_cpu_mask)
- if (thatcpu != cpu) {
+ for_each_online_cpu(thatcpu) {
+ if (thatcpu == cpu)
+ continue;
+ snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
+ snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
+ smp_mb(); /* Order sampling of snap with end of grace period. */
+ if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
per_cpu(rcu_dyntick_drain, cpu) = 0;
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
return rcu_needs_cpu_quick_check(cpu);
}
+ }
/* Check and update the rcu_dyntick_drain sequencing. */
if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d45db2e35d2..36c95b45738 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -241,11 +241,13 @@ static const struct file_operations rcugp_fops = {
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
{
seq_printf(m, "%3d%cnp=%ld "
- "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+ "qsp=%ld rpq=%ld cbr=%ld cng=%ld "
+ "gpc=%ld gps=%ld nf=%ld nn=%ld\n",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->n_rcu_pending,
rdp->n_rp_qs_pending,
+ rdp->n_rp_report_qs,
rdp->n_rp_cb_ready,
rdp->n_rp_cpu_needs_gp,
rdp->n_rp_gp_completed,
diff --git a/kernel/relay.c b/kernel/relay.c
index 3d97f282161..c7cf397fb92 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -539,7 +539,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
"relay_hotcpu_callback: cpu %d buffer "
"creation failed\n", hotcpu);
mutex_unlock(&relay_channels_mutex);
- return NOTIFY_BAD;
+ return notifier_from_errno(-ENOMEM);
}
}
mutex_unlock(&relay_channels_mutex);
@@ -1231,8 +1231,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
size_t read_subbuf = read_start / subbuf_size;
size_t padding = rbuf->padding[read_subbuf];
size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
- struct page *pages[PIPE_BUFFERS];
- struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages[PIPE_DEF_BUFFERS];
+ struct partial_page partial[PIPE_DEF_BUFFERS];
struct splice_pipe_desc spd = {
.pages = pages,
.nr_pages = 0,
@@ -1245,6 +1245,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
return 0;
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
/*
* Adjust read len, if longer than what is available
@@ -1255,7 +1257,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
pidx = (read_start / PAGE_SIZE) % subbuf_pages;
poff = read_start & ~PAGE_MASK;
- nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS);
+ nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers);
for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
unsigned int this_len, this_end, private;
@@ -1289,16 +1291,19 @@ static ssize_t subbuf_splice_actor(struct file *in,
}
}
+ ret = 0;
if (!spd.nr_pages)
- return 0;
+ goto out;
ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
if (ret < 0 || ret < total_len)
- return ret;
+ goto out;
if (read_start + ret == nonpad_end)
ret += padding;
+out:
+ splice_shrink_spd(pipe, &spd);
return ret;
}
diff --git a/kernel/resource.c b/kernel/resource.c
index 9c358e26353..7b36976e5de 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
+#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/pfn.h>
@@ -681,6 +682,8 @@ resource_size_t resource_alignment(struct resource *res)
* release_region releases a matching busy region.
*/
+static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait);
+
/**
* __request_region - create a new busy resource region
* @parent: parent resource descriptor
@@ -693,6 +696,7 @@ struct resource * __request_region(struct resource *parent,
resource_size_t start, resource_size_t n,
const char *name, int flags)
{
+ DECLARE_WAITQUEUE(wait, current);
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
if (!res)
@@ -717,7 +721,15 @@ struct resource * __request_region(struct resource *parent,
if (!(conflict->flags & IORESOURCE_BUSY))
continue;
}
-
+ if (conflict->flags & flags & IORESOURCE_MUXED) {
+ add_wait_queue(&muxed_resource_wait, &wait);
+ write_unlock(&resource_lock);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule();
+ remove_wait_queue(&muxed_resource_wait, &wait);
+ write_lock(&resource_lock);
+ continue;
+ }
/* Uhhuh, that didn't work out.. */
kfree(res);
res = NULL;
@@ -791,6 +803,8 @@ void __release_region(struct resource *parent, resource_size_t start,
break;
*p = res->sibling;
write_unlock(&resource_lock);
+ if (res->flags & IORESOURCE_MUXED)
+ wake_up(&muxed_resource_wait);
kfree(res);
return;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d051c7517f..d4840814250 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -55,9 +55,9 @@
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/percpu.h>
-#include <linux/kthread.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/stop_machine.h>
#include <linux/sysctl.h>
#include <linux/syscalls.h>
#include <linux/times.h>
@@ -503,8 +503,11 @@ struct rq {
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
#ifdef CONFIG_NO_HZ
+ u64 nohz_stamp;
unsigned char in_nohz_recently;
#endif
+ unsigned int skip_clock_update;
+
/* capture load from *all* tasks on this cpu: */
struct load_weight load;
unsigned long nr_load_updates;
@@ -546,15 +549,13 @@ struct rq {
int post_schedule;
int active_balance;
int push_cpu;
+ struct cpu_stop_work active_balance_work;
/* cpu of this runqueue: */
int cpu;
int online;
unsigned long avg_load_per_task;
- struct task_struct *migration_thread;
- struct list_head migration_queue;
-
u64 rt_avg;
u64 age_stamp;
u64 idle_stamp;
@@ -602,6 +603,13 @@ static inline
void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+
+ /*
+ * A queue event has occurred, and we're going to schedule. In
+ * this case, we can save a useless back to back clock update.
+ */
+ if (test_tsk_need_resched(p))
+ rq->skip_clock_update = 1;
}
static inline int cpu_of(struct rq *rq)
@@ -636,7 +644,8 @@ static inline int cpu_of(struct rq *rq)
inline void update_rq_clock(struct rq *rq)
{
- rq->clock = sched_clock_cpu(cpu_of(rq));
+ if (!rq->skip_clock_update)
+ rq->clock = sched_clock_cpu(cpu_of(rq));
}
/*
@@ -914,16 +923,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
/*
- * Check whether the task is waking, we use this to synchronize against
- * ttwu() so that task_cpu() reports a stable number.
- *
- * We need to make an exception for PF_STARTING tasks because the fork
- * path might require task_rq_lock() to work, eg. it can call
- * set_cpus_allowed_ptr() from the cpuset clone_ns code.
+ * Check whether the task is waking, we use this to synchronize ->cpus_allowed
+ * against ttwu().
*/
static inline int task_is_waking(struct task_struct *p)
{
- return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
+ return unlikely(p->state == TASK_WAKING);
}
/*
@@ -936,11 +941,9 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
struct rq *rq;
for (;;) {
- while (task_is_waking(p))
- cpu_relax();
rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_is_waking(p)))
+ if (likely(rq == task_rq(p)))
return rq;
raw_spin_unlock(&rq->lock);
}
@@ -957,25 +960,15 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
struct rq *rq;
for (;;) {
- while (task_is_waking(p))
- cpu_relax();
local_irq_save(*flags);
rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_is_waking(p)))
+ if (likely(rq == task_rq(p)))
return rq;
raw_spin_unlock_irqrestore(&rq->lock, *flags);
}
}
-void task_rq_unlock_wait(struct task_struct *p)
-{
- struct rq *rq = task_rq(p);
-
- smp_mb(); /* spin-unlock-wait is not a full memory barrier */
- raw_spin_unlock_wait(&rq->lock);
-}
-
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
@@ -1239,6 +1232,17 @@ void wake_up_idle_cpu(int cpu)
if (!tsk_is_polling(rq->idle))
smp_send_reschedule(cpu);
}
+
+int nohz_ratelimit(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ u64 diff = rq->clock - rq->nohz_stamp;
+
+ rq->nohz_stamp = rq->clock;
+
+ return diff < (NSEC_PER_SEC / HZ) >> 1;
+}
+
#endif /* CONFIG_NO_HZ */
static u64 sched_avg_period(void)
@@ -1781,8 +1785,6 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
}
}
- update_rq_clock(rq1);
- update_rq_clock(rq2);
}
/*
@@ -1813,7 +1815,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
}
#endif
-static void calc_load_account_active(struct rq *this_rq);
+static void calc_load_account_idle(struct rq *this_rq);
static void update_sysctl(void);
static int get_update_sysctl_factor(void);
@@ -1870,62 +1872,43 @@ static void set_load_weight(struct task_struct *p)
p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
}
-static void update_avg(u64 *avg, u64 sample)
-{
- s64 diff = sample - *avg;
- *avg += diff >> 3;
-}
-
-static void
-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
- if (wakeup)
- p->se.start_runtime = p->se.sum_exec_runtime;
-
+ update_rq_clock(rq);
sched_info_queued(p);
- p->sched_class->enqueue_task(rq, p, wakeup, head);
+ p->sched_class->enqueue_task(rq, p, flags);
p->se.on_rq = 1;
}
-static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
- if (sleep) {
- if (p->se.last_wakeup) {
- update_avg(&p->se.avg_overlap,
- p->se.sum_exec_runtime - p->se.last_wakeup);
- p->se.last_wakeup = 0;
- } else {
- update_avg(&p->se.avg_wakeup,
- sysctl_sched_wakeup_granularity);
- }
- }
-
+ update_rq_clock(rq);
sched_info_dequeued(p);
- p->sched_class->dequeue_task(rq, p, sleep);
+ p->sched_class->dequeue_task(rq, p, flags);
p->se.on_rq = 0;
}
/*
* activate_task - move a task to the runqueue.
*/
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
- enqueue_task(rq, p, wakeup, false);
+ enqueue_task(rq, p, flags);
inc_nr_running(rq);
}
/*
* deactivate_task - remove a task from the runqueue.
*/
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible++;
- dequeue_task(rq, p, sleep);
+ dequeue_task(rq, p, flags);
dec_nr_running(rq);
}
@@ -2054,21 +2037,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
__set_task_cpu(p, new_cpu);
}
-struct migration_req {
- struct list_head list;
-
+struct migration_arg {
struct task_struct *task;
int dest_cpu;
-
- struct completion done;
};
+static int migration_cpu_stop(void *data);
+
/*
* The task's runqueue lock must be held.
* Returns true if you have to wait for migration thread.
*/
-static int
-migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
+static bool migrate_task(struct task_struct *p, int dest_cpu)
{
struct rq *rq = task_rq(p);
@@ -2076,58 +2056,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
* If the task is not on a runqueue (and not running), then
* the next wake-up will properly place the task.
*/
- if (!p->se.on_rq && !task_running(rq, p))
- return 0;
-
- init_completion(&req->done);
- req->task = p;
- req->dest_cpu = dest_cpu;
- list_add(&req->list, &rq->migration_queue);
-
- return 1;
-}
-
-/*
- * wait_task_context_switch - wait for a thread to complete at least one
- * context switch.
- *
- * @p must not be current.
- */
-void wait_task_context_switch(struct task_struct *p)
-{
- unsigned long nvcsw, nivcsw, flags;
- int running;
- struct rq *rq;
-
- nvcsw = p->nvcsw;
- nivcsw = p->nivcsw;
- for (;;) {
- /*
- * The runqueue is assigned before the actual context
- * switch. We need to take the runqueue lock.
- *
- * We could check initially without the lock but it is
- * very likely that we need to take the lock in every
- * iteration.
- */
- rq = task_rq_lock(p, &flags);
- running = task_running(rq, p);
- task_rq_unlock(rq, &flags);
-
- if (likely(!running))
- break;
- /*
- * The switch count is incremented before the actual
- * context switch. We thus wait for two switches to be
- * sure at least one completed.
- */
- if ((p->nvcsw - nvcsw) > 1)
- break;
- if ((p->nivcsw - nivcsw) > 1)
- break;
-
- cpu_relax();
- }
+ return p->se.on_rq || task_running(rq, p);
}
/*
@@ -2185,7 +2114,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
- trace_sched_wait_task(rq, p);
+ trace_sched_wait_task(p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
ncsw = 0;
@@ -2283,6 +2212,9 @@ void task_oncpu_function_call(struct task_struct *p,
}
#ifdef CONFIG_SMP
+/*
+ * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
+ */
static int select_fallback_rq(int cpu, struct task_struct *p)
{
int dest_cpu;
@@ -2299,12 +2231,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
return dest_cpu;
/* No more Mr. Nice Guy. */
- if (dest_cpu >= nr_cpu_ids) {
- rcu_read_lock();
- cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
- rcu_read_unlock();
- dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
-
+ if (unlikely(dest_cpu >= nr_cpu_ids)) {
+ dest_cpu = cpuset_cpus_allowed_fallback(p);
/*
* Don't tell them about moving exiting tasks or
* kernel threads (both mm NULL), since they never
@@ -2321,17 +2249,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}
/*
- * Gets called from 3 sites (exec, fork, wakeup), since it is called without
- * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
- * by:
- *
- * exec: is unstable, retry loop
- * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
+ * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
*/
static inline
-int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
{
- int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+ int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
/*
* In order not to call set_task_cpu() on a blocking task we need
@@ -2349,6 +2272,12 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
return cpu;
}
+
+static void update_avg(u64 *avg, u64 sample)
+{
+ s64 diff = sample - *avg;
+ *avg += diff >> 3;
+}
#endif
/***
@@ -2370,16 +2299,13 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
{
int cpu, orig_cpu, this_cpu, success = 0;
unsigned long flags;
+ unsigned long en_flags = ENQUEUE_WAKEUP;
struct rq *rq;
- if (!sched_feat(SYNC_WAKEUPS))
- wake_flags &= ~WF_SYNC;
-
this_cpu = get_cpu();
smp_wmb();
rq = task_rq_lock(p, &flags);
- update_rq_clock(rq);
if (!(p->state & state))
goto out;
@@ -2399,28 +2325,26 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
*
* First fix up the nr_uninterruptible count:
*/
- if (task_contributes_to_load(p))
- rq->nr_uninterruptible--;
+ if (task_contributes_to_load(p)) {
+ if (likely(cpu_online(orig_cpu)))
+ rq->nr_uninterruptible--;
+ else
+ this_rq()->nr_uninterruptible--;
+ }
p->state = TASK_WAKING;
- if (p->sched_class->task_waking)
+ if (p->sched_class->task_waking) {
p->sched_class->task_waking(rq, p);
+ en_flags |= ENQUEUE_WAKING;
+ }
- __task_rq_unlock(rq);
-
- cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
- if (cpu != orig_cpu) {
- /*
- * Since we migrate the task without holding any rq->lock,
- * we need to be careful with task_rq_lock(), since that
- * might end up locking an invalid rq.
- */
+ cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+ if (cpu != orig_cpu)
set_task_cpu(p, cpu);
- }
+ __task_rq_unlock(rq);
rq = cpu_rq(cpu);
raw_spin_lock(&rq->lock);
- update_rq_clock(rq);
/*
* We migrated the task without holding either rq->lock, however
@@ -2448,36 +2372,20 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
out_activate:
#endif /* CONFIG_SMP */
- schedstat_inc(p, se.nr_wakeups);
+ schedstat_inc(p, se.statistics.nr_wakeups);
if (wake_flags & WF_SYNC)
- schedstat_inc(p, se.nr_wakeups_sync);
+ schedstat_inc(p, se.statistics.nr_wakeups_sync);
if (orig_cpu != cpu)
- schedstat_inc(p, se.nr_wakeups_migrate);
+ schedstat_inc(p, se.statistics.nr_wakeups_migrate);
if (cpu == this_cpu)
- schedstat_inc(p, se.nr_wakeups_local);
+ schedstat_inc(p, se.statistics.nr_wakeups_local);
else
- schedstat_inc(p, se.nr_wakeups_remote);
- activate_task(rq, p, 1);
+ schedstat_inc(p, se.statistics.nr_wakeups_remote);
+ activate_task(rq, p, en_flags);
success = 1;
- /*
- * Only attribute actual wakeups done by this task.
- */
- if (!in_interrupt()) {
- struct sched_entity *se = &current->se;
- u64 sample = se->sum_exec_runtime;
-
- if (se->last_wakeup)
- sample -= se->last_wakeup;
- else
- sample -= se->start_runtime;
- update_avg(&se->avg_wakeup, sample);
-
- se->last_wakeup = se->sum_exec_runtime;
- }
-
out_running:
- trace_sched_wakeup(rq, p, success);
+ trace_sched_wakeup(p, success);
check_preempt_curr(rq, p, wake_flags);
p->state = TASK_RUNNING;
@@ -2537,42 +2445,9 @@ static void __sched_fork(struct task_struct *p)
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
- p->se.last_wakeup = 0;
- p->se.avg_overlap = 0;
- p->se.start_runtime = 0;
- p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
#ifdef CONFIG_SCHEDSTATS
- p->se.wait_start = 0;
- p->se.wait_max = 0;
- p->se.wait_count = 0;
- p->se.wait_sum = 0;
-
- p->se.sleep_start = 0;
- p->se.sleep_max = 0;
- p->se.sum_sleep_runtime = 0;
-
- p->se.block_start = 0;
- p->se.block_max = 0;
- p->se.exec_max = 0;
- p->se.slice_max = 0;
-
- p->se.nr_migrations_cold = 0;
- p->se.nr_failed_migrations_affine = 0;
- p->se.nr_failed_migrations_running = 0;
- p->se.nr_failed_migrations_hot = 0;
- p->se.nr_forced_migrations = 0;
-
- p->se.nr_wakeups = 0;
- p->se.nr_wakeups_sync = 0;
- p->se.nr_wakeups_migrate = 0;
- p->se.nr_wakeups_local = 0;
- p->se.nr_wakeups_remote = 0;
- p->se.nr_wakeups_affine = 0;
- p->se.nr_wakeups_affine_attempts = 0;
- p->se.nr_wakeups_passive = 0;
- p->se.nr_wakeups_idle = 0;
-
+ memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
INIT_LIST_HEAD(&p->rt.run_list);
@@ -2593,11 +2468,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
__sched_fork(p);
/*
- * We mark the process as waking here. This guarantees that
+ * We mark the process as running here. This guarantees that
* nobody will actually run it, and a signal or other external
* event cannot wake it up and insert it on the runqueue either.
*/
- p->state = TASK_WAKING;
+ p->state = TASK_RUNNING;
/*
* Revert to default priority/policy on fork if requested.
@@ -2664,31 +2539,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
int cpu __maybe_unused = get_cpu();
#ifdef CONFIG_SMP
+ rq = task_rq_lock(p, &flags);
+ p->state = TASK_WAKING;
+
/*
* Fork balancing, do it here and not earlier because:
* - cpus_allowed can change in the fork path
* - any previously selected cpu might disappear through hotplug
*
- * We still have TASK_WAKING but PF_STARTING is gone now, meaning
- * ->cpus_allowed is stable, we have preemption disabled, meaning
- * cpu_online_mask is stable.
+ * We set TASK_WAKING so that select_task_rq() can drop rq->lock
+ * without people poking at ->cpus_allowed.
*/
- cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+ cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
set_task_cpu(p, cpu);
-#endif
-
- /*
- * Since the task is not on the rq and we still have TASK_WAKING set
- * nobody else will migrate this task.
- */
- rq = cpu_rq(cpu);
- raw_spin_lock_irqsave(&rq->lock, flags);
- BUG_ON(p->state != TASK_WAKING);
p->state = TASK_RUNNING;
- update_rq_clock(rq);
+ task_rq_unlock(rq, &flags);
+#endif
+
+ rq = task_rq_lock(p, &flags);
activate_task(rq, p, 0);
- trace_sched_wakeup_new(rq, p, 1);
+ trace_sched_wakeup_new(p, 1);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
@@ -2908,7 +2779,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- trace_sched_switch(rq, prev, next);
+ trace_sched_switch(prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -3025,6 +2896,61 @@ static unsigned long calc_load_update;
unsigned long avenrun[3];
EXPORT_SYMBOL(avenrun);
+static long calc_load_fold_active(struct rq *this_rq)
+{
+ long nr_active, delta = 0;
+
+ nr_active = this_rq->nr_running;
+ nr_active += (long) this_rq->nr_uninterruptible;
+
+ if (nr_active != this_rq->calc_load_active) {
+ delta = nr_active - this_rq->calc_load_active;
+ this_rq->calc_load_active = nr_active;
+ }
+
+ return delta;
+}
+
+#ifdef CONFIG_NO_HZ
+/*
+ * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
+ *
+ * When making the ILB scale, we should try to pull this in as well.
+ */
+static atomic_long_t calc_load_tasks_idle;
+
+static void calc_load_account_idle(struct rq *this_rq)
+{
+ long delta;
+
+ delta = calc_load_fold_active(this_rq);
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks_idle);
+}
+
+static long calc_load_fold_idle(void)
+{
+ long delta = 0;
+
+ /*
+ * Its got a race, we don't care...
+ */
+ if (atomic_long_read(&calc_load_tasks_idle))
+ delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
+
+ return delta;
+}
+#else
+static void calc_load_account_idle(struct rq *this_rq)
+{
+}
+
+static inline long calc_load_fold_idle(void)
+{
+ return 0;
+}
+#endif
+
/**
* get_avenrun - get the load average array
* @loads: pointer to dest load array
@@ -3071,20 +2997,22 @@ void calc_global_load(void)
}
/*
- * Either called from update_cpu_load() or from a cpu going idle
+ * Called from update_cpu_load() to periodically update this CPU's
+ * active count.
*/
static void calc_load_account_active(struct rq *this_rq)
{
- long nr_active, delta;
+ long delta;
- nr_active = this_rq->nr_running;
- nr_active += (long) this_rq->nr_uninterruptible;
+ if (time_before(jiffies, this_rq->calc_load_update))
+ return;
- if (nr_active != this_rq->calc_load_active) {
- delta = nr_active - this_rq->calc_load_active;
- this_rq->calc_load_active = nr_active;
+ delta = calc_load_fold_active(this_rq);
+ delta += calc_load_fold_idle();
+ if (delta)
atomic_long_add(delta, &calc_load_tasks);
- }
+
+ this_rq->calc_load_update += LOAD_FREQ;
}
/*
@@ -3116,10 +3044,7 @@ static void update_cpu_load(struct rq *this_rq)
this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
}
- if (time_after_eq(jiffies, this_rq->calc_load_update)) {
- this_rq->calc_load_update += LOAD_FREQ;
- calc_load_account_active(this_rq);
- }
+ calc_load_account_active(this_rq);
}
#ifdef CONFIG_SMP
@@ -3131,44 +3056,27 @@ static void update_cpu_load(struct rq *this_rq)
void sched_exec(void)
{
struct task_struct *p = current;
- struct migration_req req;
- int dest_cpu, this_cpu;
unsigned long flags;
struct rq *rq;
-
-again:
- this_cpu = get_cpu();
- dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
- if (dest_cpu == this_cpu) {
- put_cpu();
- return;
- }
+ int dest_cpu;
rq = task_rq_lock(p, &flags);
- put_cpu();
+ dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+ if (dest_cpu == smp_processor_id())
+ goto unlock;
/*
* select_task_rq() can race against ->cpus_allowed
*/
- if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
- || unlikely(!cpu_active(dest_cpu))) {
- task_rq_unlock(rq, &flags);
- goto again;
- }
+ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
+ likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
+ struct migration_arg arg = { p, dest_cpu };
- /* force the process onto the specified CPU */
- if (migrate_task(p, dest_cpu, &req)) {
- /* Need to wait for migration thread (might exit: take ref). */
- struct task_struct *mt = rq->migration_thread;
-
- get_task_struct(mt);
task_rq_unlock(rq, &flags);
- wake_up_process(mt);
- put_task_struct(mt);
- wait_for_completion(&req.done);
-
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
return;
}
+unlock:
task_rq_unlock(rq, &flags);
}
@@ -3640,23 +3548,9 @@ static inline void schedule_debug(struct task_struct *prev)
static void put_prev_task(struct rq *rq, struct task_struct *prev)
{
- if (prev->state == TASK_RUNNING) {
- u64 runtime = prev->se.sum_exec_runtime;
-
- runtime -= prev->se.prev_sum_exec_runtime;
- runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
-
- /*
- * In order to avoid avg_overlap growing stale when we are
- * indeed overlapping and hence not getting put to sleep, grow
- * the avg_overlap on preemption.
- *
- * We use the average preemption runtime because that
- * correlates to the amount of cache footprint a task can
- * build up.
- */
- update_avg(&prev->se.avg_overlap, runtime);
- }
+ if (prev->se.on_rq)
+ update_rq_clock(rq);
+ rq->skip_clock_update = 0;
prev->sched_class->put_prev_task(rq, prev);
}
@@ -3706,7 +3600,7 @@ need_resched:
preempt_disable();
cpu = smp_processor_id();
rq = cpu_rq(cpu);
- rcu_sched_qs(cpu);
+ rcu_note_context_switch(cpu);
prev = rq->curr;
switch_count = &prev->nivcsw;
@@ -3719,14 +3613,13 @@ need_resched_nonpreemptible:
hrtick_clear(rq);
raw_spin_lock_irq(&rq->lock);
- update_rq_clock(rq);
clear_tsk_need_resched(prev);
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev)))
prev->state = TASK_RUNNING;
else
- deactivate_task(rq, prev, 1);
+ deactivate_task(rq, prev, DEQUEUE_SLEEP);
switch_count = &prev->nvcsw;
}
@@ -3950,6 +3843,7 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
{
__wake_up_common(q, mode, 1, 0, NULL);
}
+EXPORT_SYMBOL_GPL(__wake_up_locked);
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
{
@@ -4049,8 +3943,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
- wait.flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(&x->wait, &wait);
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
@@ -4293,7 +4186,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
BUG_ON(prio < 0 || prio > MAX_PRIO);
rq = task_rq_lock(p, &flags);
- update_rq_clock(rq);
oldprio = p->prio;
prev_class = p->sched_class;
@@ -4314,7 +4206,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->set_curr_task(rq);
if (on_rq) {
- enqueue_task(rq, p, 0, oldprio < prio);
+ enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
check_class_changed(rq, p, prev_class, oldprio, running);
}
@@ -4336,7 +4228,6 @@ void set_user_nice(struct task_struct *p, long nice)
* the task might be in the middle of scheduling on another CPU.
*/
rq = task_rq_lock(p, &flags);
- update_rq_clock(rq);
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
@@ -4358,7 +4249,7 @@ void set_user_nice(struct task_struct *p, long nice)
delta = p->prio - old_prio;
if (on_rq) {
- enqueue_task(rq, p, 0, false);
+ enqueue_task(rq, p, 0);
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
@@ -4619,7 +4510,6 @@ recheck:
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
goto recheck;
}
- update_rq_clock(rq);
on_rq = p->se.on_rq;
running = task_current(rq, p);
if (on_rq)
@@ -5356,17 +5246,15 @@ static inline void sched_init_granularity(void)
/*
* This is how migration works:
*
- * 1) we queue a struct migration_req structure in the source CPU's
- * runqueue and wake up that CPU's migration thread.
- * 2) we down() the locked semaphore => thread blocks.
- * 3) migration thread wakes up (implicitly it forces the migrated
- * thread off the CPU)
- * 4) it gets the migration request and checks whether the migrated
- * task is still in the wrong runqueue.
- * 5) if it's in the wrong runqueue then the migration thread removes
+ * 1) we invoke migration_cpu_stop() on the target CPU using
+ * stop_one_cpu().
+ * 2) stopper starts to run (implicitly forcing the migrated thread
+ * off the CPU)
+ * 3) it checks whether the migrated task is still in the wrong runqueue.
+ * 4) if it's in the wrong runqueue then the migration thread removes
* it and puts it into the right queue.
- * 6) migration thread up()s the semaphore.
- * 7) we wake up and the migration is done.
+ * 5) stopper completes and stop_one_cpu() returns and the migration
+ * is done.
*/
/*
@@ -5380,12 +5268,23 @@ static inline void sched_init_granularity(void)
*/
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
- struct migration_req req;
unsigned long flags;
struct rq *rq;
+ unsigned int dest_cpu;
int ret = 0;
+ /*
+ * Serialize against TASK_WAKING so that ttwu() and wunt() can
+ * drop the rq->lock and still rely on ->cpus_allowed.
+ */
+again:
+ while (task_is_waking(p))
+ cpu_relax();
rq = task_rq_lock(p, &flags);
+ if (task_is_waking(p)) {
+ task_rq_unlock(rq, &flags);
+ goto again;
+ }
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
@@ -5409,15 +5308,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
- if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
+ dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+ if (migrate_task(p, dest_cpu)) {
+ struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
- struct task_struct *mt = rq->migration_thread;
-
- get_task_struct(mt);
task_rq_unlock(rq, &flags);
- wake_up_process(mt);
- put_task_struct(mt);
- wait_for_completion(&req.done);
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
}
@@ -5475,98 +5371,49 @@ fail:
return ret;
}
-#define RCU_MIGRATION_IDLE 0
-#define RCU_MIGRATION_NEED_QS 1
-#define RCU_MIGRATION_GOT_QS 2
-#define RCU_MIGRATION_MUST_SYNC 3
-
/*
- * migration_thread - this is a highprio system thread that performs
- * thread migration by bumping thread off CPU then 'pushing' onto
- * another runqueue.
+ * migration_cpu_stop - this will be executed by a highprio stopper thread
+ * and performs thread migration by bumping thread off CPU then
+ * 'pushing' onto another runqueue.
*/
-static int migration_thread(void *data)
-{
- int badcpu;
- int cpu = (long)data;
- struct rq *rq;
-
- rq = cpu_rq(cpu);
- BUG_ON(rq->migration_thread != current);
-
- set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop()) {
- struct migration_req *req;
- struct list_head *head;
-
- raw_spin_lock_irq(&rq->lock);
-
- if (cpu_is_offline(cpu)) {
- raw_spin_unlock_irq(&rq->lock);
- break;
- }
-
- if (rq->active_balance) {
- active_load_balance(rq, cpu);
- rq->active_balance = 0;
- }
-
- head = &rq->migration_queue;
-
- if (list_empty(head)) {
- raw_spin_unlock_irq(&rq->lock);
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
- continue;
- }
- req = list_entry(head->next, struct migration_req, list);
- list_del_init(head->next);
-
- if (req->task != NULL) {
- raw_spin_unlock(&rq->lock);
- __migrate_task(req->task, cpu, req->dest_cpu);
- } else if (likely(cpu == (badcpu = smp_processor_id()))) {
- req->dest_cpu = RCU_MIGRATION_GOT_QS;
- raw_spin_unlock(&rq->lock);
- } else {
- req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
- raw_spin_unlock(&rq->lock);
- WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
- }
- local_irq_enable();
-
- complete(&req->done);
- }
- __set_current_state(TASK_RUNNING);
-
- return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
+static int migration_cpu_stop(void *data)
{
- int ret;
+ struct migration_arg *arg = data;
+ /*
+ * The original target cpu might have gone down and we might
+ * be on another cpu but it doesn't matter.
+ */
local_irq_disable();
- ret = __migrate_task(p, src_cpu, dest_cpu);
+ __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
local_irq_enable();
- return ret;
+ return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
/*
* Figure out where task on dead CPU should go, use force if necessary.
*/
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
- int dest_cpu;
+ struct rq *rq = cpu_rq(dead_cpu);
+ int needs_cpu, uninitialized_var(dest_cpu);
+ unsigned long flags;
-again:
- dest_cpu = select_fallback_rq(dead_cpu, p);
+ local_irq_save(flags);
- /* It can have affinity changed while we were choosing. */
- if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
- goto again;
+ raw_spin_lock(&rq->lock);
+ needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
+ if (needs_cpu)
+ dest_cpu = select_fallback_rq(dead_cpu, p);
+ raw_spin_unlock(&rq->lock);
+ /*
+ * It can only fail if we race with set_cpus_allowed(),
+ * in the racer should migrate the task anyway.
+ */
+ if (needs_cpu)
+ __migrate_task(p, dead_cpu, dest_cpu);
+ local_irq_restore(flags);
}
/*
@@ -5630,7 +5477,6 @@ void sched_idle_next(void)
__setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
- update_rq_clock(rq);
activate_task(rq, p, 0);
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -5685,7 +5531,6 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
for ( ; ; ) {
if (!rq->nr_running)
break;
- update_rq_clock(rq);
next = pick_next_task(rq);
if (!next)
break;
@@ -5908,35 +5753,20 @@ static void set_rq_offline(struct rq *rq)
static int __cpuinit
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
- struct task_struct *p;
int cpu = (long)hcpu;
unsigned long flags;
- struct rq *rq;
+ struct rq *rq = cpu_rq(cpu);
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
- if (IS_ERR(p))
- return NOTIFY_BAD;
- kthread_bind(p, cpu);
- /* Must be high prio: stop_machine expects to yield to it. */
- rq = task_rq_lock(p, &flags);
- __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
- task_rq_unlock(rq, &flags);
- get_task_struct(p);
- cpu_rq(cpu)->migration_thread = p;
rq->calc_load_update = calc_load_update;
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- /* Strictly unnecessary, as first user will wake it. */
- wake_up_process(cpu_rq(cpu)->migration_thread);
-
/* Update our root-domain */
- rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -5947,61 +5777,24 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- if (!cpu_rq(cpu)->migration_thread)
- break;
- /* Unbind it from offline cpu so it can run. Fall thru. */
- kthread_bind(cpu_rq(cpu)->migration_thread,
- cpumask_any(cpu_online_mask));
- kthread_stop(cpu_rq(cpu)->migration_thread);
- put_task_struct(cpu_rq(cpu)->migration_thread);
- cpu_rq(cpu)->migration_thread = NULL;
- break;
-
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
migrate_live_tasks(cpu);
- rq = cpu_rq(cpu);
- kthread_stop(rq->migration_thread);
- put_task_struct(rq->migration_thread);
- rq->migration_thread = NULL;
/* Idle task back to normal (off runqueue, low prio) */
raw_spin_lock_irq(&rq->lock);
- update_rq_clock(rq);
deactivate_task(rq, rq->idle, 0);
__setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
rq->idle->sched_class = &idle_sched_class;
migrate_dead_tasks(cpu);
raw_spin_unlock_irq(&rq->lock);
- cpuset_unlock();
migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0);
calc_global_load_remove(rq);
- /*
- * No need to migrate the tasks: it was best-effort if
- * they didn't take sched_hotcpu_mutex. Just wake up
- * the requestors.
- */
- raw_spin_lock_irq(&rq->lock);
- while (!list_empty(&rq->migration_queue)) {
- struct migration_req *req;
-
- req = list_entry(rq->migration_queue.next,
- struct migration_req, list);
- list_del_init(&req->list);
- raw_spin_unlock_irq(&rq->lock);
- complete(&req->done);
- raw_spin_lock_irq(&rq->lock);
- }
- raw_spin_unlock_irq(&rq->lock);
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
/* Update our root-domain */
- rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -6332,6 +6125,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
struct rq *rq = cpu_rq(cpu);
struct sched_domain *tmp;
+ for (tmp = sd; tmp; tmp = tmp->parent)
+ tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
+
/* Remove the sched domains which do not contribute to scheduling. */
for (tmp = sd; tmp; ) {
struct sched_domain *parent = tmp->parent;
@@ -7815,10 +7611,8 @@ void __init sched_init(void)
rq->push_cpu = 0;
rq->cpu = i;
rq->online = 0;
- rq->migration_thread = NULL;
rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost;
- INIT_LIST_HEAD(&rq->migration_queue);
rq_attach_root(rq, &def_root_domain);
#endif
init_rq_hrtick(rq);
@@ -7919,7 +7713,6 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
{
int on_rq;
- update_rq_clock(rq);
on_rq = p->se.on_rq;
if (on_rq)
deactivate_task(rq, p, 0);
@@ -7946,9 +7739,9 @@ void normalize_rt_tasks(void)
p->se.exec_start = 0;
#ifdef CONFIG_SCHEDSTATS
- p->se.wait_start = 0;
- p->se.sleep_start = 0;
- p->se.block_start = 0;
+ p->se.statistics.wait_start = 0;
+ p->se.statistics.sleep_start = 0;
+ p->se.statistics.block_start = 0;
#endif
if (!rt_task(p)) {
@@ -7975,9 +7768,9 @@ void normalize_rt_tasks(void)
#endif /* CONFIG_MAGIC_SYSRQ */
-#ifdef CONFIG_IA64
+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
/*
- * These functions are only useful for the IA64 MCA handling.
+ * These functions are only useful for the IA64 MCA handling, or kdb.
*
* They can only be called when the whole system has been
* stopped - every CPU needs to be quiescent, and no scheduling
@@ -7997,6 +7790,9 @@ struct task_struct *curr_task(int cpu)
return cpu_curr(cpu);
}
+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
+
+#ifdef CONFIG_IA64
/**
* set_curr_task - set the current task for a given cpu.
* @cpu: the processor in question.
@@ -8281,8 +8077,6 @@ void sched_move_task(struct task_struct *tsk)
rq = task_rq_lock(tsk, &flags);
- update_rq_clock(rq);
-
running = task_current(rq, tsk);
on_rq = tsk->se.on_rq;
@@ -8301,7 +8095,7 @@ void sched_move_task(struct task_struct *tsk)
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
if (on_rq)
- enqueue_task(rq, tsk, 0, false);
+ enqueue_task(rq, tsk, 0);
task_rq_unlock(rq, &flags);
}
@@ -9115,43 +8909,32 @@ struct cgroup_subsys cpuacct_subsys = {
#ifndef CONFIG_SMP
-int rcu_expedited_torture_stats(char *page)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
-
void synchronize_sched_expedited(void)
{
+ barrier();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#else /* #ifndef CONFIG_SMP */
-static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
-static DEFINE_MUTEX(rcu_sched_expedited_mutex);
+static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
-#define RCU_EXPEDITED_STATE_POST -2
-#define RCU_EXPEDITED_STATE_IDLE -1
-
-static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
-
-int rcu_expedited_torture_stats(char *page)
+static int synchronize_sched_expedited_cpu_stop(void *data)
{
- int cnt = 0;
- int cpu;
-
- cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
- for_each_online_cpu(cpu) {
- cnt += sprintf(&page[cnt], " %d:%d",
- cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
- }
- cnt += sprintf(&page[cnt], "\n");
- return cnt;
+ /*
+ * There must be a full memory barrier on each affected CPU
+ * between the time that try_stop_cpus() is called and the
+ * time that it returns.
+ *
+ * In the current initial implementation of cpu_stop, the
+ * above condition is already met when the control reaches
+ * this point and the following smp_mb() is not strictly
+ * necessary. Do smp_mb() anyway for documentation and
+ * robustness against future implementation changes.
+ */
+ smp_mb(); /* See above comment block. */
+ return 0;
}
-EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
-
-static long synchronize_sched_expedited_count;
/*
* Wait for an rcu-sched grace period to elapse, but use "big hammer"
@@ -9165,18 +8948,14 @@ static long synchronize_sched_expedited_count;
*/
void synchronize_sched_expedited(void)
{
- int cpu;
- unsigned long flags;
- bool need_full_sync = 0;
- struct rq *rq;
- struct migration_req *req;
- long snap;
- int trycount = 0;
+ int snap, trycount = 0;
smp_mb(); /* ensure prior mod happens before capturing snap. */
- snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
+ snap = atomic_read(&synchronize_sched_expedited_count) + 1;
get_online_cpus();
- while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
+ while (try_stop_cpus(cpu_online_mask,
+ synchronize_sched_expedited_cpu_stop,
+ NULL) == -EAGAIN) {
put_online_cpus();
if (trycount++ < 10)
udelay(trycount * num_online_cpus());
@@ -9184,41 +8963,15 @@ void synchronize_sched_expedited(void)
synchronize_sched();
return;
}
- if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
+ if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
smp_mb(); /* ensure test happens before caller kfree */
return;
}
get_online_cpus();
}
- rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
- for_each_online_cpu(cpu) {
- rq = cpu_rq(cpu);
- req = &per_cpu(rcu_migration_req, cpu);
- init_completion(&req->done);
- req->task = NULL;
- req->dest_cpu = RCU_MIGRATION_NEED_QS;
- raw_spin_lock_irqsave(&rq->lock, flags);
- list_add(&req->list, &rq->migration_queue);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- wake_up_process(rq->migration_thread);
- }
- for_each_online_cpu(cpu) {
- rcu_expedited_state = cpu;
- req = &per_cpu(rcu_migration_req, cpu);
- rq = cpu_rq(cpu);
- wait_for_completion(&req->done);
- raw_spin_lock_irqsave(&rq->lock, flags);
- if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
- need_full_sync = 1;
- req->dest_cpu = RCU_MIGRATION_IDLE;
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- }
- rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
- synchronize_sched_expedited_count++;
- mutex_unlock(&rcu_sched_expedited_mutex);
+ atomic_inc(&synchronize_sched_expedited_count);
+ smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
put_online_cpus();
- if (need_full_sync)
- synchronize_sched();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 5b496132c28..906a0f718cb 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -41,6 +41,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
* (NSEC_PER_SEC / HZ);
}
+EXPORT_SYMBOL_GPL(sched_clock);
static __read_mostly int sched_clock_running;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 19be00ba612..35565395d00 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -70,16 +70,16 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu,
PN(se->vruntime);
PN(se->sum_exec_runtime);
#ifdef CONFIG_SCHEDSTATS
- PN(se->wait_start);
- PN(se->sleep_start);
- PN(se->block_start);
- PN(se->sleep_max);
- PN(se->block_max);
- PN(se->exec_max);
- PN(se->slice_max);
- PN(se->wait_max);
- PN(se->wait_sum);
- P(se->wait_count);
+ PN(se->statistics.wait_start);
+ PN(se->statistics.sleep_start);
+ PN(se->statistics.block_start);
+ PN(se->statistics.sleep_max);
+ PN(se->statistics.block_max);
+ PN(se->statistics.exec_max);
+ PN(se->statistics.slice_max);
+ PN(se->statistics.wait_max);
+ PN(se->statistics.wait_sum);
+ P(se->statistics.wait_count);
#endif
P(se->load.weight);
#undef PN
@@ -104,7 +104,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
SPLIT_NS(p->se.vruntime),
SPLIT_NS(p->se.sum_exec_runtime),
- SPLIT_NS(p->se.sum_sleep_runtime));
+ SPLIT_NS(p->se.statistics.sum_sleep_runtime));
#else
SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
@@ -175,11 +175,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
-#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
- {
- uid_t uid = cfs_rq->tg->uid;
- SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
- }
#else
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
#endif
@@ -386,15 +381,9 @@ __initcall(init_sched_debug_procfs);
void proc_sched_show_task(struct task_struct *p, struct seq_file *