diff options
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2010-11-24 12:57:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-25 06:50:45 +0900
commite9959f0f37160e1f5351af828cc981712b5066c1 (patch)
parenta42c390cfa0c2612459d7226ba11612847ca3a64 (diff)
mm/page_alloc.c: fix build_all_zonelist() where percpu_alloc() is wrongly called under stop_machine_run()
During memory hotplug, build_allzonelists() may be called under stop_machine_run(). In this function, setup_zone_pageset() is called. But it's bug because it will do page allocation under stop_machine_run(). Here is a report from Alok Kataria. BUG: sleeping function called from invalid context at kernel/mutex.c:94 in_atomic(): 0, irqs_disabled(): 1, pid: 4, name: migration/0 Pid: 4, comm: migration/0 Not tainted #1 Call Trace: [<ffffffff8103d12b>] __might_sleep+0xeb/0xf0 [<ffffffff81468245>] mutex_lock+0x24/0x50 [<ffffffff8110eaa6>] pcpu_alloc+0x6d/0x7ee [<ffffffff81048888>] ? load_balance+0xbe/0x60e [<ffffffff8103a1b3>] ? rt_se_boosted+0x21/0x2f [<ffffffff8103e1cf>] ? dequeue_rt_stack+0x18b/0x1ed [<ffffffff8110f237>] __alloc_percpu+0x10/0x12 [<ffffffff81465e22>] setup_zone_pageset+0x38/0xbe [<ffffffff810d6d81>] ? build_zonelists_node.clone.58+0x79/0x8c [<ffffffff81452539>] __build_all_zonelists+0x419/0x46c [<ffffffff8108ef01>] ? cpu_stopper_thread+0xb2/0x198 [<ffffffff8108f075>] stop_machine_cpu_stop+0x8e/0xc5 [<ffffffff8108efe7>] ? stop_machine_cpu_stop+0x0/0xc5 [<ffffffff8108ef57>] cpu_stopper_thread+0x108/0x198 [<ffffffff81467a37>] ? schedule+0x5b2/0x5cc [<ffffffff8108ee4f>] ? cpu_stopper_thread+0x0/0x198 [<ffffffff81065f29>] kthread+0x7f/0x87 [<ffffffff8100aae4>] kernel_thread_helper+0x4/0x10 [<ffffffff81065eaa>] ? kthread+0x0/0x87 [<ffffffff8100aae0>] ? kernel_thread_helper+0x0/0x10 Built 5 zonelists in Node order, mobility grouping on. Total pages: 289456 Policy zone: Normal This patch tries to fix the issue by moving setup_zone_pageset() out from stop_machine_run(). It's obviously not necessary to be called under stop_machine_run(). [akpm@linux-foundation.org: remove unneeded local] Reported-by: Alok Kataria <akataria@vmware.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Tejun Heo <tj@kernel.org> Cc: Petr Vandrovec <petr@vmware.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 files changed, 5 insertions, 9 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07a654486f7..e4092704c1a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3008,14 +3008,6 @@ static __init_refok int __build_all_zonelists(void *data)
- /* Setup real pagesets for the new zone */
- if (data) {
- struct zone *zone = data;
- setup_zone_pageset(zone);
- }
* Initialize the boot_pagesets that are going to be used
* for bootstrapping processors. The real pagesets for
@@ -3064,7 +3056,11 @@ void build_all_zonelists(void *data)
} else {
/* we have to stop all cpus to guarantee there is no user
of zonelist */
- stop_machine(__build_all_zonelists, data, NULL);
+ if (data)
+ setup_zone_pageset((struct zone *)data);
+ stop_machine(__build_all_zonelists, NULL, NULL);
/* cpuset refresh routine should be here */
vm_total_pages = nr_free_pagecache_pages();