aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-06-28 19:46:47 +0200
committerIngo Molnar <mingo@kernel.org>2019-06-28 19:46:47 +0200
commit83086d654dd08c0f57381522e6819f421677706e (patch)
treebcb3d6f66916134de1a86c67db8a5829e3885ef3
parent556e2f6020bf90f63c5dd65e9a2254be6db3185b (diff)
parentb989ff070574ad8b8621d866de0a8e9a65d42c80 (diff)
downloadtip-core/rcu.tar.gz
tip-core/rcu.tar.xz
tip-core/rcu.zip
Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcucore/rcu
Pull rcu/next + tools/memory-model changes from Paul E. McKenney: - RCU flavor consolidation cleanups and optmizations - Documentation updates - Miscellaneous fixes - SRCU updates - RCU-sync flavor consolidation - Torture-test updates - Linux-kernel memory-consistency-model updates, most notably the addition of plain C-language accesses Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/RCU/rcuref.txt21
-rw-r--r--Documentation/RCU/stallwarn.txt2
-rw-r--r--Documentation/RCU/whatisRCU.txt8
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/atomic_t.txt17
-rw-r--r--Documentation/core-api/circular-buffers.rst2
-rw-r--r--Documentation/memory-barriers.txt2
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt2
-rw-r--r--include/linux/lockdep.h7
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/percpu-rwsem.h10
-rw-r--r--include/linux/rcu_sync.h40
-rw-r--r--include/linux/rcupdate.h21
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/srcutree.h14
-rw-r--r--include/linux/torture.h2
-rw-r--r--kernel/cgroup/cgroup.c3
-rw-r--r--kernel/events/uprobes.c4
-rw-r--r--kernel/locking/locktorture.c2
-rw-r--r--kernel/locking/percpu-rwsem.c2
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/rcu/rcu.h5
-rw-r--r--kernel/rcu/rcutorture.c96
-rw-r--r--kernel/rcu/srcutree.c69
-rw-r--r--kernel/rcu/sync.c214
-rw-r--r--kernel/rcu/tree.c164
-rw-r--r--kernel/rcu/tree.h6
-rw-r--r--kernel/rcu/tree_exp.h53
-rw-r--r--kernel/rcu/tree_plugin.h195
-rw-r--r--kernel/rcu/tree_stall.h4
-rw-r--r--kernel/rcu/update.c13
-rw-r--r--kernel/torture.c23
-rw-r--r--tools/include/linux/rcu.h4
-rw-r--r--tools/memory-model/linux-kernel.bell6
-rw-r--r--tools/memory-model/linux-kernel.cat102
-rw-r--r--tools/memory-model/linux-kernel.def1
-rw-r--r--tools/memory-model/litmus-tests/MP+poonceonces.litmus2
-rw-r--r--tools/memory-model/litmus-tests/README2
-rw-r--r--tools/memory-model/lock.cat2
-rw-r--r--tools/memory-model/scripts/README4
-rwxr-xr-xtools/memory-model/scripts/checkalllitmus.sh2
-rwxr-xr-xtools/memory-model/scripts/checklitmus.sh2
-rw-r--r--tools/memory-model/scripts/parseargs.sh2
-rw-r--r--tools/memory-model/scripts/runlitmushist.sh2
-rw-r--r--tools/testing/radix-tree/linux/rcupdate.h2
-rw-r--r--tools/testing/selftests/rcutorture/Makefile3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh5
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL14
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot3
61 files changed, 845 insertions, 466 deletions
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
index 613033ff2b9b..5e6429d66c24 100644
--- a/Documentation/RCU/rcuref.txt
+++ b/Documentation/RCU/rcuref.txt
@@ -12,6 +12,7 @@ please read on.
Reference counting on elements of lists which are protected by traditional
reader/writer spinlocks or semaphores are straightforward:
+CODE LISTING A:
1. 2.
add() search_and_reference()
{ {
@@ -28,7 +29,8 @@ add() search_and_reference()
release_referenced() delete()
{ {
... write_lock(&list_lock);
- atomic_dec(&el->rc, relfunc) ...
+ if(atomic_dec_and_test(&el->rc)) ...
+ kfree(el);
... remove_element
} write_unlock(&list_lock);
...
@@ -44,6 +46,7 @@ search_and_reference() could potentially hold reference to an element which
has already been deleted from the list/array. Use atomic_inc_not_zero()
in this scenario as follows:
+CODE LISTING B:
1. 2.
add() search_and_reference()
{ {
@@ -79,6 +82,7 @@ search_and_reference() code path. In such cases, the
atomic_dec_and_test() may be moved from delete() to el_free()
as follows:
+CODE LISTING C:
1. 2.
add() search_and_reference()
{ {
@@ -114,6 +118,17 @@ element can therefore safely be freed. This in turn guarantees that if
any reader finds the element, that reader may safely acquire a reference
without checking the value of the reference counter.
+A clear advantage of the RCU-based pattern in listing C over the one
+in listing B is that any call to search_and_reference() that locates
+a given object will succeed in obtaining a reference to that object,
+even given a concurrent invocation of delete() for that same object.
+Similarly, a clear advantage of both listings B and C over listing A is
+that a call to delete() is not delayed even if there are an arbitrarily
+large number of calls to search_and_reference() searching for the same
+object that delete() was invoked on. Instead, all that is delayed is
+the eventual invocation of kfree(), which is usually not a problem on
+modern computer systems, even the small ones.
+
In cases where delete() can sleep, synchronize_rcu() can be called from
delete(), so that el_free() can be subsumed into delete as follows:
@@ -130,3 +145,7 @@ delete()
kfree(el);
...
}
+
+As additional examples in the kernel, the pattern in listing C is used by
+reference counting of struct pid, while the pattern in listing B is used by
+struct posix_acl.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 1ab70c37921f..13e88fc00f01 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -153,7 +153,7 @@ rcupdate.rcu_task_stall_timeout
This boot/sysfs parameter controls the RCU-tasks stall warning
interval. A value of zero or less suppresses RCU-tasks stall
warnings. A positive value sets the stall-warning interval
- in jiffies. An RCU-tasks stall warning starts with the line:
+ in seconds. An RCU-tasks stall warning starts with the line:
INFO: rcu_tasks detected stalls on tasks:
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 981651a8b65d..7e1a8721637a 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -212,7 +212,7 @@ synchronize_rcu()
rcu_assign_pointer()
- typeof(p) rcu_assign_pointer(p, typeof(p) v);
+ void rcu_assign_pointer(p, typeof(p) v);
Yes, rcu_assign_pointer() -is- implemented as a macro, though it
would be cool to be able to declare a function in this manner.
@@ -220,9 +220,9 @@ rcu_assign_pointer()
The updater uses this function to assign a new value to an
RCU-protected pointer, in order to safely communicate the change
- in value from the updater to the reader. This function returns
- the new value, and also executes any memory-barrier instructions
- required for a given CPU architecture.
+ in value from the updater to the reader. This macro does not
+ evaluate to an rvalue, but it does execute any memory-barrier
+ instructions required for a given CPU architecture.
Perhaps just as important, it serves to document (1) which
pointers are protected by RCU and (2) the point at which a
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..b96fd15c7316 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3752,6 +3752,12 @@
the propagation of recent CPU-hotplug changes up
the rcu_node combining tree.
+ rcutree.use_softirq= [KNL]
+ If set to zero, move all RCU_SOFTIRQ processing to
+ per-CPU rcuc kthreads. Defaults to a non-zero
+ value, meaning that RCU_SOFTIRQ is used by default.
+ Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
rcutree.rcu_fanout_exact= [KNL]
Disable autobalancing of the rcu_node combining
tree. This is used by rcutorture, and might
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index dca3fb0554db..b3afe69d03a1 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -187,8 +187,14 @@ The barriers:
smp_mb__{before,after}_atomic()
-only apply to the RMW ops and can be used to augment/upgrade the ordering
-inherent to the used atomic op. These barriers provide a full smp_mb().
+only apply to the RMW atomic ops and can be used to augment/upgrade the
+ordering inherent to the op. These barriers act almost like a full smp_mb():
+smp_mb__before_atomic() orders all earlier accesses against the RMW op
+itself and all accesses following it, and smp_mb__after_atomic() orders all
+later accesses against the RMW op and all accesses preceding it. However,
+accesses between the smp_mb__{before,after}_atomic() and the RMW op are not
+ordered, so it is advisable to place the barrier right next to the RMW atomic
+op whenever possible.
These helper barriers exist because architectures have varying implicit
ordering on their SMP atomic primitives. For example our TSO architectures
@@ -212,7 +218,9 @@ Further, while something like:
atomic_dec(&X);
is a 'typical' RELEASE pattern, the barrier is strictly stronger than
-a RELEASE. Similarly for something like:
+a RELEASE because it orders preceding instructions against both the read
+and write parts of the atomic_dec(), and against all following instructions
+as well. Similarly, something like:
atomic_inc(&X);
smp_mb__after_atomic();
@@ -244,7 +252,8 @@ strictly stronger than ACQUIRE. As illustrated:
This should not happen; but a hypothetical atomic_inc_acquire() --
(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
-since then:
+because it would not order the W part of the RMW against the following
+WRITE_ONCE. Thus:
P1 P2
diff --git a/Documentation/core-api/circular-buffers.rst b/Documentation/core-api/circular-buffers.rst
index 53e51caa3347..50966f66e398 100644
--- a/Documentation/core-api/circular-buffers.rst
+++ b/Documentation/core-api/circular-buffers.rst
@@ -3,7 +3,7 @@ Circular Buffers
================
:Author: David Howells <dhowells@redhat.com>
-:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+:Author: Paul E. McKenney <paulmck@linux.ibm.com>
Linux provides a number of features that can be used to implement circular
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f70ebcdfe592..e4e07c8ab89e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,7 +3,7 @@
============================
By: David Howells <dhowells@redhat.com>
- Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Paul E. McKenney <paulmck@linux.ibm.com>
Will Deacon <will.deacon@arm.com>
Peter Zijlstra <peterz@infradead.org>
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index db0b9d8619f1..5f3c74dcad43 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -24,7 +24,7 @@ Documentation/memory-barriers.txt
=========================
저자: David Howells <dhowells@redhat.com>
- Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Paul E. McKenney <paulmck@linux.ibm.com>
Will Deacon <will.deacon@arm.com>
Peter Zijlstra <peterz@infradead.org>
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6e2377e6c1d6..e8eef38b2213 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -632,11 +632,18 @@ do { \
"IRQs not disabled as expected\n"); \
} while (0)
+#define lockdep_assert_in_irq() do { \
+ WARN_ONCE(debug_locks && !current->lockdep_recursion && \
+ !current->hardirq_context, \
+ "Not in hardirq as expected\n"); \
+ } while (0)
+
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
+# define lockdep_assert_in_irq() do { } while (0)
#endif
#ifdef CONFIG_LOCKDEP
diff --git a/include/linux/module.h b/include/linux/module.h
index 188998d3dca9..1455812dd325 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -21,6 +21,7 @@
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
+#include <linux/srcu.h>
#include <linux/percpu.h>
#include <asm/module.h>
@@ -450,6 +451,10 @@ struct module {
unsigned int num_tracepoints;
tracepoint_ptr_t *tracepoints_ptrs;
#endif
+#ifdef CONFIG_TREE_SRCU
+ unsigned int num_srcu_structs;
+ struct srcu_struct **srcu_struct_ptrs;
+#endif
#ifdef CONFIG_BPF_EVENTS
unsigned int num_bpf_raw_events;
struct bpf_raw_event_map *bpf_raw_events;
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 03cb4b6f842e..2809b44cbbee 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -17,14 +17,18 @@ struct percpu_rw_semaphore {
int readers_block;
};
-#define DEFINE_STATIC_PERCPU_RWSEM(name) \
+#define __DEFINE_PERCPU_RWSEM(name, is_static) \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
-static struct percpu_rw_semaphore name = { \
- .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
+is_static struct percpu_rw_semaphore name = { \
+ .rss = __RCU_SYNC_INITIALIZER(name.rss), \
.read_count = &__percpu_rwsem_rc_##name, \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
.writer = __RCUWAIT_INITIALIZER(name.writer), \
}
+#define DEFINE_PERCPU_RWSEM(name) \
+ __DEFINE_PERCPU_RWSEM(name, /* not static */)
+#define DEFINE_STATIC_PERCPU_RWSEM(name) \
+ __DEFINE_PERCPU_RWSEM(name, static)
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
extern void __percpu_up_read(struct percpu_rw_semaphore *);
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 6fc53a1345b3..9b83865d24f9 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -13,62 +13,44 @@
#include <linux/wait.h>
#include <linux/rcupdate.h>
-enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
-
/* Structure to mediate between updaters and fastpath-using readers. */
struct rcu_sync {
int gp_state;
int gp_count;
wait_queue_head_t gp_wait;
- int cb_state;
struct rcu_head cb_head;
-
- enum rcu_sync_type gp_type;
};
-extern void rcu_sync_lockdep_assert(struct rcu_sync *);
-
/**
* rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
- * Returns true if readers are permitted to use their fastpaths.
- * Must be invoked within an RCU read-side critical section whose
- * flavor matches that of the rcu_sync struture.
+ * Returns true if readers are permitted to use their fastpaths. Must be
+ * invoked within some flavor of RCU read-side critical section.
*/
static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
{
-#ifdef CONFIG_PROVE_RCU
- rcu_sync_lockdep_assert(rsp);
-#endif
- return !rsp->gp_state; /* GP_IDLE */
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
+ !rcu_read_lock_bh_held() &&
+ !rcu_read_lock_sched_held(),
+ "suspicious rcu_sync_is_idle() usage");
+ return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
}
-extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_init(struct rcu_sync *);
extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
-#define __RCU_SYNC_INITIALIZER(name, type) { \
+#define __RCU_SYNC_INITIALIZER(name) { \
.gp_state = 0, \
.gp_count = 0, \
.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
- .cb_state = 0, \
- .gp_type = type, \
}
-#define __DEFINE_RCU_SYNC(name, type) \
- struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
-
-#define DEFINE_RCU_SYNC(name) \
- __DEFINE_RCU_SYNC(name, RCU_SYNC)
-
-#define DEFINE_RCU_SCHED_SYNC(name) \
- __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
-
-#define DEFINE_RCU_BH_SYNC(name) \
- __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+#define DEFINE_RCU_SYNC(name) \
+ struct rcu_sync name = __RCU_SYNC_INITIALIZER(name)
#endif /* _LINUX_RCU_SYNC_H_ */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b25d20822e75..8f7167478c1d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -365,16 +365,15 @@ static inline void rcu_preempt_sleep_check(void) { }
* other macros that it invokes.
*/
#define rcu_assign_pointer(p, v) \
-({ \
+do { \
uintptr_t _r_a_p__v = (uintptr_t)(v); \
- rcu_check_sparse(p, __rcu); \
+ rcu_check_sparse(p, __rcu); \
\
if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
else \
smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
- _r_a_p__v; \
-})
+} while (0)
/**
* rcu_swap_protected() - swap an RCU and a regular pointer
@@ -586,7 +585,7 @@ static inline void rcu_preempt_sleep_check(void) { }
* read-side critical sections may be preempted and they may also block, but
* only when acquiring spinlocks that are subject to priority inheritance.
*/
-static inline void rcu_read_lock(void)
+static __always_inline void rcu_read_lock(void)
{
__rcu_read_lock();
__acquire(RCU);
@@ -803,7 +802,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
- * @rcu_head: the name of the struct rcu_head within the type of @ptr.
+ * @rhf: the name of the struct rcu_head within the type of @ptr.
*
* Many rcu callbacks functions just call kfree() on the base structure.
* These functions are trivial, but their size adds up, and furthermore
@@ -826,9 +825,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* The BUILD_BUG_ON check must not involve any function calls, hence the
* checks are done in macros here.
*/
-#define kfree_rcu(ptr, rcu_head) \
- __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
-
+#define kfree_rcu(ptr, rhf) \
+do { \
+ typeof (ptr) ___p = (ptr); \
+ \
+ if (___p) \
+ __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
+} while (0)
/*
* Place this after a lock-acquisition primitive to guarantee that
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11837410690f..942a44c1b8eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -565,7 +565,7 @@ union rcu_special {
u8 blocked;
u8 need_qs;
u8 exp_hint; /* Hint for performance. */
- u8 pad; /* No garbage from compiler! */
+ u8 deferred_qs;
} b; /* Bits. */
u32 s; /* Set of bits. */
};
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 7f7c8c050f63..9cfcc8a756ae 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -120,9 +120,17 @@ struct srcu_struct {
*
* See include/linux/percpu-defs.h for the rules on per-CPU variables.
*/
-#define __DEFINE_SRCU(name, is_static) \
- static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
- is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data)
+#ifdef MODULE
+# define __DEFINE_SRCU(name, is_static) \
+ is_static struct srcu_struct name; \
+ struct srcu_struct * const __srcu_struct_##name \
+ __section("___srcu_struct_ptrs") = &name
+#else
+# define __DEFINE_SRCU(name, is_static) \
+ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
+ is_static struct srcu_struct name = \
+ __SRCU_STRUCT_INIT(name, name##_srcu_data)
+#endif
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 23d80db426d7..a620118385bb 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -66,7 +66,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void));
/* Task stuttering, which forces load/no-load transitions. */
bool stutter_wait(const char *title);
-int torture_stutter_init(int s);
+int torture_stutter_init(int s, int sgap);
/* Initialization and cleanup. */
bool torture_init_begin(char *ttype, int v);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index bf9dbffd46b1..cdbeff87fa99 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -101,7 +101,7 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);
-struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
#define cgroup_assert_mutex_or_rcu_locked() \
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
@@ -5666,7 +5666,6 @@ int __init cgroup_init(void)
int ssid;
BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
- BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..97c367f0a9aa 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -46,7 +46,7 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
-static struct percpu_rw_semaphore dup_mmap_sem;
+DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
@@ -2302,7 +2302,5 @@ void __init uprobes_init(void)
for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(&uprobes_mmap_mutex[i]);
- BUG_ON(percpu_init_rwsem(&dup_mmap_sem));
-
BUG_ON(register_die_notifier(&uprobe_exception_nb));
}
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 80a463d31a8d..c513031cd7e3 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -975,7 +975,7 @@ static int __init lock_torture_init(void)
goto unwind;
}
if (stutter > 0) {
- firsterr = torture_stutter_init(stutter);
+ firsterr = torture_stutter_init(stutter, stutter);
if (firsterr)
goto unwind;
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index b6a9cc62099a..364d38a0c444 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
- rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+ rcu_sync_init(&sem->rss);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
rcuwait_init(&sem->writer);
sem->readers_block = 0;
diff --git a/kernel/module.c b/kernel/module.c
index 80c7c09584cf..a2cee14a83f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3083,6 +3083,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->tracepoints_ptrs),
&mod->num_tracepoints);
#endif
+#ifdef CONFIG_TREE_SRCU
+ mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs",
+ sizeof(*mod->srcu_struct_ptrs),
+ &mod->num_srcu_structs);
+#endif
#ifdef CONFIG_BPF_EVENTS
mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
sizeof(*mod->bpf_raw_events),
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 390aab20115e..5290b01de534 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -446,6 +446,7 @@ void rcu_request_urgent_qs_task(struct task_struct *t);
enum rcutorture_type {
RCU_FLAVOR,
RCU_TASKS_FLAVOR,
+ RCU_TRIVIAL_FLAVOR,
SRCU_FLAVOR,
INVALID_RCU_FLAVOR
};
@@ -479,6 +480,10 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
#endif
#endif
+#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
+long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
+#endif
+
#ifdef CONFIG_TINY_SRCU
static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index efaa5b3f4d3f..fce4e7e6f502 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -299,6 +299,7 @@ struct rcu_torture_ops {
int irq_capable;
int can_boost;
int extendables;
+ int slow_gps;
const char *name;
};
@@ -667,9 +668,51 @@ static struct rcu_torture_ops tasks_ops = {
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
+ .slow_gps = 1,
.name = "tasks"
};
+/*
+ * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
+ * This implementation does not necessarily work well with CPU hotplug.
+ */
+
+static void synchronize_rcu_trivial(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
+ WARN_ON_ONCE(raw_smp_processor_id() != cpu);
+ }
+}
+
+static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
+{
+ preempt_disable();
+ return 0;
+}
+
+static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
+{
+ preempt_enable();
+}
+
+static struct rcu_torture_ops trivial_ops = {
+ .ttype = RCU_TRIVIAL_FLAVOR,
+ .init = rcu_sync_torture_init,
+ .readlock = rcu_torture_read_lock_trivial,
+ .read_delay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = rcu_torture_read_unlock_trivial,
+ .get_gp_seq = rcu_no_completed,
+ .sync = synchronize_rcu_trivial,
+ .exp_sync = synchronize_rcu_trivial,
+ .fqs = NULL,
+ .stats = NULL,
+ .irq_capable = 1,
+ .name = "trivial"
+};
+
static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
@@ -1010,10 +1053,17 @@ rcu_torture_writer(void *arg)
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
- if (stutter_wait("rcu_torture_writer"))
+ if (stutter_wait("rcu_torture_writer") &&
+ !READ_ONCE(rcu_fwd_cb_nodelay) &&
+ !cur_ops->slow_gps &&
+ !torture_must_stop())
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
- if (list_empty(&rcu_tortures[i].rtort_free))
- WARN_ON_ONCE(1);
+ if (list_empty(&rcu_tortures[i].rtort_free) &&
+ rcu_access_pointer(rcu_torture_current) !=
+ &rcu_tortures[i]) {
+ rcu_ftrace_dump(DUMP_ALL);
+ WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
+ }
} while (!torture_must_stop());
/* Reset expediting back to unexpedited. */
if (expediting > 0)
@@ -1358,8 +1408,9 @@ rcu_torture_stats_print(void)
}
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
- pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
+ pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
rcu_torture_current,
+ rcu_torture_current ? "ver" : "VER",
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
@@ -1661,6 +1712,17 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
}
+// Give the scheduler a chance, even on nohz_full CPUs.
+static void rcu_torture_fwd_prog_cond_resched(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+ if (need_resched())
+ schedule();
+ } else {
+ cond_resched();
+ }
+}
+
/*
* Free all callbacks on the rcu_fwd_cb_head list, either because the
* test is over or because we hit an OOM event.
@@ -1674,16 +1736,18 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void)
for (;;) {
spin_lock_irqsave(&rcu_fwd_lock, flags);
rfcp = rcu_fwd_cb_head;
- if (!rfcp)
+ if (!rfcp) {
+ spin_unlock_irqrestore(&rcu_fwd_lock, flags);
break;
+ }
rcu_fwd_cb_head = rfcp->rfc_next;
if (!rcu_fwd_cb_head)
rcu_fwd_cb_tail = &rcu_fwd_cb_head;
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
kfree(rfcp);
freed++;
+ rcu_torture_fwd_prog_cond_resched();
}
- spin_unlock_irqrestore(&rcu_fwd_lock, flags);
return freed;
}
@@ -1707,6 +1771,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
}
/* Tight loop containing cond_resched(). */
+ WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+ cur_ops->sync(); /* Later readers see above write. */
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 0);
cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb);
@@ -1724,7 +1790,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
udelay(10);
cur_ops->readunlock(idx);
if (!fwd_progress_need_resched || need_resched())
- cond_resched();
+ rcu_torture_fwd_prog_cond_resched();
}
(*tested_tries)++;
if (!time_before(jiffies, stopat) &&
@@ -1745,6 +1811,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
WARN_ON(READ_ONCE(fcs.stop) != 2);
destroy_rcu_head_on_stack(&fcs.rh);
}
+ schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
+ WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
/* Carry out call_rcu() forward-progress testing. */
@@ -1765,6 +1833,8 @@ static void rcu_torture_fwd_prog_cr(void)
if (READ_ONCE(rcu_fwd_emergency_stop))
return; /* Get out of the way quickly, no GP wait! */
+ if (!cur_ops->call)
+ return; /* Can't do call_rcu() fwd prog without ->call. */
/* Loop continuously posting RCU callbacks. */
WRITE_ONCE(rcu_fwd_cb_nodelay, true);
@@ -1805,7 +1875,7 @@ static void rcu_torture_fwd_prog_cr(void)
rfcp->rfc_gps = 0;
}
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
- cond_resched();
+ rcu_torture_fwd_prog_cond_resched();
}
stoppedat = jiffies;
n_launders_cb_snap = READ_ONCE(n_launders_cb);
@@ -1814,7 +1884,6 @@ static void rcu_torture_fwd_prog_cr(void)
cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
(void)rcu_torture_fwd_prog_cbfree();
- WRITE_ONCE(rcu_fwd_cb_nodelay, false);
if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) {
WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED);
pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n",
@@ -1825,6 +1894,8 @@ static void rcu_torture_fwd_prog_cr(void)
n_max_gps, n_max_cbs, cver, gps);
rcu_torture_fwd_cb_hist();
}
+ schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
+ WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
@@ -2240,7 +2311,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
- &busted_srcud_ops, &tasks_ops,
+ &busted_srcud_ops, &tasks_ops, &trivial_ops,
};
if (!torture_init_begin(torture_type, verbose))
@@ -2363,7 +2434,10 @@ rcu_torture_init(void)
if (stutter < 0)
stutter = 0;
if (stutter) {
- firsterr = torture_stutter_init(stutter * HZ);
+ int t;
+
+ t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
+ firsterr = torture_stutter_init(stutter * HZ, t);
if (firsterr)
goto unwind;
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9b761e546de8..cf0e886314f2 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -831,8 +831,8 @@ static void srcu_leak_callback(struct rcu_head *rhp)
* srcu_read_lock(), and srcu_read_unlock() that are all passed the same
* srcu_struct structure.
*/
-void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
- rcu_callback_t func, bool do_norm)
+static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
+ rcu_callback_t func, bool do_norm)
{
unsigned long flags;
int idx;
@@ -1310,3 +1310,68 @@ void __init srcu_init(void)
queue_work(rcu_gp_wq, &ssp->work.work);
}
}
+
+#ifdef CONFIG_MODULES
+
+/* Initialize any global-scope srcu_struct structures used by this module. */
+static int srcu_module_coming(struct module *mod)
+{
+ int i;
+ struct srcu_struct **sspp = mod->srcu_struct_ptrs;
+ int ret;
+
+ for (i = 0; i < mod->num_srcu_structs; i++) {
+ ret = init_srcu_struct(*(sspp++));
+ if (WARN_ON_ONCE(ret))
+ return ret;
+ }
+ return 0;
+}
+
+/* Clean up any global-scope srcu_struct structures used by this module. */
+static void srcu_module_going(struct module *mod)
+{
+ int i;
+ struct srcu_struct **sspp = mod->srcu_struct_ptrs;
+
+ for (i = 0; i < mod->num_srcu_structs; i++)
+ cleanup_srcu_struct(*(sspp++));
+}
+
+/* Handle one module, either coming or going. */
+static int srcu_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ int ret = 0;
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ ret = srcu_module_coming(mod);
+ break;
+ case MODULE_STATE_GOING:
+ srcu_module_going(mod);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static struct notifier_block srcu_module_nb = {
+ .notifier_call = srcu_module_notify,
+ .priority = 0,
+};
+
+static __init int init_srcu_module_notifier(void)
+{
+ int ret;
+
+ ret = register_module_notifier(&srcu_module_nb);
+ if (ret)
+ pr_warn("Failed to register srcu module notifier\n");
+ return ret;
+}
+late_initcall(init_srcu_module_notifier);
+
+#endif /* #ifdef CONFIG_MODULES */
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index a8304d90573f..d4558ab7a07d 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -10,65 +10,18 @@
#include <linux/rcu_sync.h>
#include <linux/sched.h>
-#ifdef CONFIG_PROVE_RCU
-#define __INIT_HELD(func) .held = func,
-#else
-#define __INIT_HELD(func)
-#endif
-
-static const struct {
- void (*sync)(void);
- void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
- void (*wait)(void);
-#ifdef CONFIG_PROVE_RCU
- int (*held)(void);
-#endif
-} gp_ops[] = {
- [RCU_SYNC] = {
- .sync = synchronize_rcu,
- .call = call_rcu,
- .wait = rcu_barrier,
- __INIT_HELD(rcu_read_lock_held)
- },
- [RCU_SCHED_SYNC] = {
- .sync = synchronize_rcu,
- .call = call_rcu,
- .wait = rcu_barrier,
- __INIT_HELD(rcu_read_lock_sched_held)
- },
- [RCU_BH_SYNC] = {
- .sync = synchronize_rcu,
- .call = call_rcu,
- .wait = rcu_barrier,
- __INIT_HELD(rcu_read_lock_bh_held)
- },
-};
-
-enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
-enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
+enum { GP_IDLE = 0, GP_ENTER, GP_PASSED, GP_EXIT, GP_REPLAY };
#define rss_lock gp_wait.lock
-#ifdef CONFIG_PROVE_RCU
-void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
-{
- RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
- "suspicious rcu_sync_is_idle() usage");
-}
-
-EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
-#endif
-
/**
* rcu_sync_init() - Initialize an rcu_sync structure
* @rsp: Pointer to rcu_sync structure to be initialized
- * @type: Flavor of RCU with which to synchronize rcu_sync structure
*/
-void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
+void rcu_sync_init(struct rcu_sync *rsp)
{
memset(rsp, 0, sizeof(*rsp));
init_waitqueue_head(&rsp->gp_wait);
- rsp->gp_type = type;
}
/**
@@ -86,56 +39,26 @@ void rcu_sync_enter_start(struct rcu_sync *rsp)
rsp->gp_state = GP_PASSED;
}
-/**
- * rcu_sync_enter() - Force readers onto slowpath
- * @rsp: Pointer to rcu_sync structure to use for synchronization
- *
- * This function is used by updaters who need readers to make use of
- * a slowpath during the update. After this function returns, all
- * subsequent calls to rcu_sync_is_idle() will return false, which
- * tells readers to stay off their fastpaths. A later call to
- * rcu_sync_exit() re-enables reader slowpaths.
- *
- * When called in isolation, rcu_sync_enter() must wait for a grace
- * period, however, closely spaced calls to rcu_sync_enter() can
- * optimize away the grace-period wait via a state machine implemented
- * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
- */
-void rcu_sync_enter(struct rcu_sync *rsp)
-{
- bool need_wait, need_sync;
- spin_lock_irq(&rsp->rss_lock);
- need_wait = rsp->gp_count++;
- need_sync = rsp->gp_state == GP_IDLE;
- if (need_sync)
- rsp->gp_state = GP_PENDING;
- spin_unlock_irq(&rsp->rss_lock);
+static void rcu_sync_func(struct rcu_head *rhp);
- WARN_ON_ONCE(need_wait && need_sync);
- if (need_sync) {
- gp_ops[rsp->gp_type].sync();
- rsp->gp_state = GP_PASSED;
- wake_up_all(&rsp->gp_wait);
- } else if (need_wait) {
- wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
- } else {
- /*
- * Possible when there's a pending CB from a rcu_sync_exit().
- * Nobody has yet been allowed the 'fast' path and thus we can
- * avoid doing any sync(). The callback will get 'dropped'.
- */
- WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
- }
+static void rcu_sync_call(struct rcu_sync *rsp)
+{
+ call_rcu(&rsp->cb_head, rcu_sync_func);
}
/**
* rcu_sync_func() - Callback function managing reader access to fastpath
* @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
*
- * This function is passed to one of the call_rcu() functions by
+ * This function is passed to call_rcu() function by rcu_sync_enter() and
* rcu_sync_exit(), so that it is invoked after a grace period following the
- * that invocation of rcu_sync_exit(). It takes action based on events that
+ * that invocation of enter/exit.
+ *
+ * If it is called by rcu_sync_enter() it signals that all the readers were
+ * switched onto slow path.
+ *
+ * If it is called by rcu_sync_exit() it takes action based on events that
* have taken place in the meantime, so that closely spaced rcu_sync_enter()
* and rcu_sync_exit() pairs need not wait for a grace period.
*
@@ -152,35 +75,88 @@ static void rcu_sync_func(struct rcu_head *rhp)
struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
unsigned long flags;
- WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
- WARN_ON_ONCE(rsp->cb_state == CB_IDLE);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irqsave(&rsp->rss_lock, flags);
if (rsp->gp_count) {
/*
- * A new rcu_sync_begin() has happened; drop the callback.
+ * We're at least a GP after the GP_IDLE->GP_ENTER transition.
*/
- rsp->cb_state = CB_IDLE;
- } else if (rsp->cb_state == CB_REPLAY) {
+ WRITE_ONCE(rsp->gp_state, GP_PASSED);
+ wake_up_locked(&rsp->gp_wait);
+ } else if (rsp->gp_state == GP_REPLAY) {
/*
- * A new rcu_sync_exit() has happened; requeue the callback
- * to catch a later GP.
+ * A new rcu_sync_exit() has happened; requeue the callback to
+ * catch a later GP.
*/
- rsp->cb_state = CB_PENDING;
- gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
+ WRITE_ONCE(rsp->gp_state, GP_EXIT);
+ rcu_sync_call(rsp);
} else {
/*
- * We're at least a GP after rcu_sync_exit(); eveybody will now
- * have observed the write side critical section. Let 'em rip!.
+ * We're at least a GP after the last rcu_sync_exit(); eveybody
+ * will now have observed the write side critical section.
+ * Let 'em rip!.
*/
- rsp->cb_state = CB_IDLE;
- rsp->gp_state = GP_IDLE;
+ WRITE_ONCE(rsp->gp_state, GP_IDLE);
}
spin_unlock_irqrestore(&rsp->rss_lock, flags);
}
/**
- * rcu_sync_exit() - Allow readers back onto fast patch after grace period
+ * rcu_sync_enter() - Force readers onto slowpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who need readers to make use of
+ * a slowpath during the update. After this function returns, all
+ * subsequent calls to rcu_sync_is_idle() will return false, which
+ * tells readers to stay off their fastpaths. A later call to
+ * rcu_sync_exit() re-enables reader slowpaths.
+ *
+ * When called in isolation, rcu_sync_enter() must wait for a grace
+ * period, however, closely spaced calls to rcu_sync_enter() can
+ * optimize away the grace-period wait via a state machine implemented
+ * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
+ */
+void rcu_sync_enter(struct rcu_sync *rsp)
+{
+ int gp_state;
+
+ spin_lock_irq(&rsp->rss_lock);
+ gp_state = rsp->gp_state;
+ if (gp_state == GP_IDLE) {
+ WRITE_ONCE(rsp->gp_state, GP_ENTER);
+ WARN_ON_ONCE(rsp->gp_count);
+ /*
+ * Note that we could simply do rcu_sync_call(rsp) here and
+ * avoid the "if (gp_state == GP_IDLE)" block below.
+ *
+ * However, synchronize_rcu() can be faster if rcu_expedited
+ * or rcu_blocking_is_gp() is true.
+ *
+ * Another reason is that we can't wait for rcu callback if
+ * we are called at early boot time but this shouldn't happen.
+ */
+ }
+ rsp->gp_count++;
+ spin_unlock_irq(&rsp->rss_lock);
+
+ if (gp_state == GP_IDLE) {
+ /*
+ * See the comment above, this simply does the "synchronous"
+ * call_rcu(rcu_sync_func) which does GP_ENTER -> GP_PASSED.
+ */
+ synchronize_rcu();
+ rcu_sync_func(&rsp->cb_head);
+ /* Not really needed, wait_event() would see GP_PASSED. */
+ return;
+ }
+
+ wait_event(rsp->gp_wait, READ_ONCE(rsp->gp_state) >= GP_PASSED);
+}
+
+/**
+ * rcu_sync_exit() - Allow readers back onto fast path after grace period
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
* This function is used by updaters who have completed, and can therefore
@@ -191,13 +167,16 @@ static void rcu_sync_func(struct rcu_head *rhp)
*/
void rcu_sync_exit(struct rcu_sync *rsp)
{
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0);
+
spin_lock_irq(&rsp->rss_lock);
if (!--rsp->gp_count) {
- if (rsp->cb_state == CB_IDLE) {
- rsp->cb_state = CB_PENDING;
- gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
- } else if (rsp->cb_state == CB_PENDING) {
- rsp->cb_state = CB_REPLAY;
+ if (rsp->gp_state == GP_PASSED) {
+ WRITE_ONCE(rsp->gp_state, GP_EXIT);
+ rcu_sync_call(rsp);
+ } else if (rsp->gp_state == GP_EXIT) {
+ WRITE_ONCE(rsp->gp_state, GP_REPLAY);
}
}
spin_unlock_irq(&rsp->rss_lock);
@@ -209,18 +188,19 @@ void rcu_sync_exit(struct rcu_sync *rsp)
*/
void rcu_sync_dtor(struct rcu_sync *rsp)
{
- int cb_state;
+ int gp_state;
- WARN_ON_ONCE(rsp->gp_count);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_count));
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irq(&rsp->rss_lock);
- if (rsp->cb_state == CB_REPLAY)
- rsp->cb_state = CB_PENDING;
- cb_state = rsp->cb_state;
+ if (rsp->gp_state == GP_REPLAY)
+ WRITE_ONCE(rsp->gp_state, GP_EXIT);
+ gp_state = rsp->gp_state;
spin_unlock_irq(&rsp->rss_lock);
- if (cb_state != CB_IDLE) {
- gp_ops[rsp->gp_type].wait();
- WARN_ON_ONCE(rsp->cb_state != CB_IDLE);
+ if (gp_state != GP_IDLE) {
+ rcu_barrier();
+ WARN_ON_ONCE(rsp->gp_state != GP_IDLE);
}
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 980ca3ca643f..a14e5fbbea46 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -51,6 +51,12 @@
#include <linux/tick.h>
#include <linux/sysrq.h>
#include <linux/kprobes.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include <linux/jiffies.h>
+#include <linux/sched/isolation.h>
+#include "../time/tick-internal.h"
#include "tree.h"
#include "rcu.h"
@@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
/* Dump rcu_node combining tree at boot to verify correct setup. */
static bool dump_tree;
module_param(dump_tree, bool, 0444);
+/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
+static bool use_softirq = 1;
+module_param(use_softirq, bool, 0444);
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
module_param(rcu_fanout_exact, bool, 0444);
@@ -138,7 +147,6 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
-static void invoke_rcu_callbacks(struct rcu_data *rdp);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
@@ -368,19 +376,33 @@ static void __maybe_unused rcu_momentary_dyntick_idle(void)
}
/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ * rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
*
- * If the current CPU is idle or running at a first-level (not nested)
+ * If the current CPU is idle and running at a first-level (not nested)
* interrupt from idle, return true. The caller must have at least
* disabled preemption.
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
- return __this_cpu_read(rcu_data.dynticks_nesting) <= 0 &&
- __this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 1;
+ /* Called only from within the scheduling-clock interrupt */
+ lockdep_assert_in_irq();
+
+ /* Check for counter underflows */
+ RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
+ "RCU dynticks_nesting counter underflow!");
+ RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
+ "RCU dynticks_nmi_nesting counter underflow/zero!");
+
+ /* Are we at first interrupt nesting level? */
+ if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
+ return false;
+
+ /* Does CPU appear to be idle from an RCU standpoint? */
+ return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
}
-#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */
+#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
+#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
static long qhimark = DEFAULT_RCU_QHIMARK;
@@ -2113,7 +2135,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
/* Reinstate batch limit if we have worked down the excess. */
count = rcu_segcblist_n_cbs(&rdp->cblist);
- if (rdp->blimit == LONG_MAX && count <= qlowmark)
+ if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
rdp->blimit = blimit;
/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
@@ -2253,7 +2275,7 @@ void rcu_force_quiescent_state(void)
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
/* Perform RCU core processing work for the current CPU. */
-static __latent_entropy void rcu_core(struct softirq_action *unused)
+static __latent_entropy void rcu_core(void)
{
unsigned long flags;
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2287,37 +2309,126 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
/* If there are callbacks ready, invoke them. */
- if (rcu_segcblist_ready_cbs(&rdp->cblist))
- invoke_rcu_callbacks(rdp);
+ if (rcu_segcblist_ready_cbs(&rdp->cblist) &&
+ likely(READ_ONCE(rcu_scheduler_fully_active)))
+ rcu_do_batch(rdp);
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core"));
}
+static void rcu_core_si(struct softirq_action *h)
+{
+ rcu_core();
+}
+
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+ /*
+ * If the thread is yielding, only wake it when this
+ * is invoked from idle
+ */
+ if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+ wake_up_process(t);
+}
+
+static void invoke_rcu_core_kthread(void)
+{
+ struct task_struct *t;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
+ t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
+ if (t != NULL && t != current)
+ rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
+ local_irq_restore(flags);
+}
+
/*
- * Schedule RCU callback invocation. If the running implementation of RCU
- * does not support RCU priority boosting, just do a direct call, otherwise
- * wake up the per-CPU kernel kthread. Note that because we are running
- * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
- * cannot disappear out from under us.
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
*/
-static void invoke_rcu_callbacks(struct rcu_data *rdp)
+static void invoke_rcu_core(void)
{
- if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
- return;
- if (likely(!rcu_state.boost)) {
- rcu_do_batch(rdp);
+ if (!cpu_online(smp_processor_id()))
return;
+ if (use_softirq)
+ raise_softirq(RCU_SOFTIRQ);
+ else
+ invoke_rcu_core_kthread();
+}
+
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+ per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+ return __this_cpu_read(rcu_data.rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks. This replaces
+ * the RCU softirq used in configurations of RCU that do not support RCU
+ * priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+ unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
+ char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+ int spincnt;
+
+ for (spincnt = 0; spincnt < 10; spincnt++) {
+ trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+ local_bh_disable();
+ *statusp = RCU_KTHREAD_RUNNING;
+ local_irq_disable();
+ work = *workp;
+ *workp = 0;
+ local_irq_enable();
+ if (work)
+ rcu_core();
+ local_bh_enable();
+ if (*workp == 0) {
+ trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+ *statusp = RCU_KTHREAD_WAITING;
+ return;
+ }
}
- invoke_rcu_callbacks_kthread();
+ *statusp = RCU_KTHREAD_YIELDING;
+ trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+ schedule_timeout_interruptible(2);
+ trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+ *statusp = RCU_KTHREAD_WAITING;
}
-static void invoke_rcu_core(void)
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+ .store = &rcu_data.rcu_cpu_kthread_task,
+ .thread_should_run = rcu_cpu_kthread_should_run,
+ .thread_fn = rcu_cpu_kthread,
+ .thread_comm = "rcuc/%u",
+ .setup = rcu_cpu_kthread_setup,
+ .park = rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
{
- if (cpu_online(smp_processor_id()))
- raise_softirq(RCU_SOFTIRQ);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
+ if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+ return 0;
+ WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
+ "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
+ return 0;
}
+early_initcall(rcu_spawn_core_kthreads);
/*
* Handle any core-RCU processing required by a call_rcu() invocation.
@@ -2354,7 +2465,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
} else {
/* Give the grace period a kick. */
- rdp->blimit = LONG_MAX;
+ rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
rcu_force_quiescent_state();
@@ -3355,7 +3466,8 @@ void __init rcu_init(void)
rcu_init_one();
if (dump_tree)
rcu_dump_rcu_node_tree();
- open_softirq(RCU_SOFTIRQ, rcu_core);
+ if (use_softirq)
+ open_softirq(RCU_SOFTIRQ, rcu_core_si);
/*
* We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e253d11af3c4..7acaf3a62d39 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -154,13 +154,15 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
- bool deferred_qs; /* This CPU awaiting a deferred QS? */
+ bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
unsigned long ticks_this_gp; /* The number of scheduling-clock */
/* ticks this CPU has handled */
/* during and after the last grace */
/* period it is aware of. */
+ struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
+ bool defer_qs_iw_pending; /* Scheduler attention pending? */
/* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */
@@ -407,8 +409,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_prepare_kthreads(int cpu);
static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 9c990df880d1..af7e7b9c86af 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -250,7 +250,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
*/
static void rcu_report_exp_rdp(struct rcu_data *rdp)
{
- WRITE_ONCE(rdp->deferred_qs, false);
+ WRITE_ONCE(rdp->exp_deferred_qs, false);
rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
}
@@ -259,8 +259,7 @@ static bool sync_exp_work_done(unsigned long s)
{
if (rcu_exp_gp_seq_done(s)) {
trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
- /* Ensure test happens before caller kfree(). */
- smp_mb__before_atomic(); /* ^^^ */
+ smp_mb(); /* Ensure test happens before caller kfree(). */
return true;
}
return false;
@@ -384,7 +383,12 @@ retry_ipi:
mask_ofl_test |= mask;
continue;
}
+ if (get_cpu() == cpu) {
+ put_cpu();
+ continue;
+ }
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
+ put_cpu();
if (!ret) {
mask_ofl_ipi &= ~mask;
continue;
@@ -611,7 +615,7 @@ static void rcu_exp_handler(void *unused)
rcu_dynticks_curr_cpu_in_eqs()) {
rcu_report_exp_rdp(rdp);
} else {
- rdp->deferred_qs = true;
+ rdp->exp_deferred_qs = true;
set_tsk_need_resched(t);
set_preempt_need_resched();
}
@@ -633,7 +637,7 @@ static void rcu_exp_handler(void *unused)
if (t->rcu_read_lock_nesting > 0) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
- rdp->deferred_qs = true;
+ rdp->exp_deferred_qs = true;
t->rcu_read_unlock_special.b.exp_hint = true;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -656,7 +660,7 @@ static void rcu_exp_handler(void *unused)
*
* Otherwise, force a context switch after the CPU enables everything.
*/
- rdp->deferred_qs = true;
+ rdp->exp_deferred_qs = true;
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
rcu_preempt_deferred_qs(t);
@@ -694,6 +698,16 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
#else /* #ifdef CONFIG_PREEMPT_RCU */
+/* Request an expedited quiescent state. */
+static void rcu_exp_need_qs(void)
+{
+ __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
+ /* Store .exp before .rcu_urgent_qs. */
+ smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
+ set_tsk_need_resched(current);
+ set_preempt_need_resched();
+}
+
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void rcu_exp_handler(void *unused)
{
@@ -709,25 +723,38 @@ static void rcu_exp_handler(void *unused)
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
return;
}
- __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
- /* Store .exp before .rcu_urgent_qs. */
- smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
- set_tsk_need_resched(current);
- set_preempt_need_resched();
+ rcu_exp_need_qs();
}
/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
static void sync_sched_exp_online_cleanup(int cpu)
{
+ unsigned long flags;
+ int my_cpu;
struct rcu_data *rdp;
int ret;
struct rcu_node *rnp;
rdp = per_cpu_ptr(&rcu_data, cpu);
rnp = rdp->mynode;
- if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+ my_cpu = get_cpu();
+ /* Quiescent state either not needed or already requested, leave. */
+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+ __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
+ put_cpu();
+ return;
+ }
+ /* Quiescent state needed on current CPU, so set it up locally. */
+ if (my_cpu == cpu) {
+ local_irq_save(flags);
+ rcu_exp_need_qs();
+ local_irq_restore(flags);
+ put_cpu();
return;
+ }
+ /* Quiescent state needed on some other CPU, send IPI. */
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
+ put_cpu();
WARN_ON_ONCE(ret);
}
@@ -765,7 +792,6 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
*/
void synchronize_rcu_expedited(void)
{
- struct rcu_data *rdp;
struct rcu_exp_work rew;
struct rcu_node *rnp;
unsigned long s;
@@ -802,7 +828,6 @@ void synchronize_rcu_expedited(void)
}
/* Wait for expedited grace period to complete. */
- rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 1102765f91fd..acb225023ed1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -11,29 +11,7 @@
* Paul E. McKenney <paulmck@linux.ibm.com>
*/
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/sched/debug.h>
-#include <linux/smpboot.h>
-#include <linux/sched/isolation.h>
-#include <uapi/linux/sched/types.h>
-#include "../time/tick-internal.h"
-
-#ifdef CONFIG_RCU_BOOST
#include "../locking/rtmutex_common.h"
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
- * all uses are in dead code. Provide a definition to keep the compiler
- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
- * This probably needs to be excluded from -rt builds.
- */
-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+ if (!use_softirq)
+ pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
pr_info("\tRCU debug extended QS entry/exit.\n");
rcupdate_announce_bootup_oddness();
@@ -257,10 +237,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* no need to check for a subsequent expedited GP. (Though we are
* still in a quiescent state in any case.)
*/
- if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs)
+ if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
else
- WARN_ON_ONCE(rdp->deferred_qs);
+ WARN_ON_ONCE(rdp->exp_deferred_qs);
}
/*
@@ -357,7 +337,7 @@ void rcu_note_context_switch(bool preempt)
* means that we continue to block the current grace period.
*/
rcu_qs();
- if (rdp->deferred_qs)
+ if (rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -471,14 +451,15 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
special = t->rcu_read_unlock_special;
rdp = this_cpu_ptr(&rcu_data);
- if (!special.s && !rdp->deferred_qs) {
+ if (!special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
+ t->rcu_read_unlock_special.b.deferred_qs = false;
if (special.b.need_qs) {
rcu_qs();
t->rcu_read_unlock_special.b.need_qs = false;
- if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
+ if (!t->rcu_read_unlock_special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
@@ -490,7 +471,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
- if (rdp->deferred_qs) {
+ if (rdp->exp_deferred_qs) {
rcu_report_exp_rdp(rdp);
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
@@ -579,7 +560,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
- return (__this_cpu_read(rcu_data.deferred_qs) ||
+ return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
t->rcu_read_lock_nesting <= 0;
}
@@ -607,6 +588,17 @@ static void rcu_preempt_deferred_qs(struct task_struct *t)
}
/*
+ * Minimal handler to give the scheduler a chance to re-evaluate.
+ */
+static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
+{
+ struct rcu_data *rdp;
+
+ rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
+ rdp->defer_qs_iw_pending = false;
+}
+
+/*
* Handle special cases during rcu_read_unlock(), such as needing to
* notify RCU core processing or task having blocked during the RCU
* read-side critical section.
@@ -625,16 +617,41 @@ static void rcu_read_unlock_special(struct task_struct *t)
local_irq_save(flags);
irqs_were_disabled = irqs_disabled_flags(flags);
if (preempt_bh_were_disabled || irqs_were_disabled) {
- WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
- /* Need to defer quiescent state until everything is enabled. */
- if (irqs_were_disabled) {
- /* Enabling irqs does not reschedule, so... */
+ bool exp;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ struct rcu_node *rnp = rdp->mynode;
+
+ t->rcu_read_unlock_special.b.exp_hint = false;
+ exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
+ (rdp->grpmask & rnp->expmask) ||
+ tick_nohz_full_cpu(rdp->cpu);
+ // Need to defer quiescent state until everything is enabled.
+ if ((exp || in_irq()) && irqs_were_disabled && use_softirq &&
+ (in_irq() || !t->rcu_read_unlock_special.b.deferred_qs)) {
+ // Using softirq, safe to awaken, and we get
+ // no help from enabling irqs, unlike bh/preempt.
raise_softirq_irqoff(RCU_SOFTIRQ);
+ } else if (exp && irqs_were_disabled && !use_softirq &&
+ !t->rcu_read_unlock_special.b.deferred_qs) {
+ // Safe to awaken and we get no help from enabling
+ // irqs, unlike bh/preempt.
+ invoke_rcu_core();
} else {
- /* Enabling BH or preempt does reschedule, so... */
+ // Enabling BH or preempt does reschedule, so...
+ // Also if no expediting or NO_HZ_FULL, slow is OK.
set_tsk_need_resched(current);
set_preempt_need_resched();
+ if (IS_ENABLED(CONFIG_IRQ_WORK) &&
+ !rdp->defer_qs_iw_pending && exp) {
+ // Get scheduler to re-evaluate and call hooks.
+ // If !IRQ_WORK, FQS scan will eventually IPI.
+ init_irq_work(&rdp->defer_qs_iw,
+ rcu_preempt_deferred_qs_handler);
+ rdp->defer_qs_iw_pending = true;
+ irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
+ }
}
+ t->rcu_read_unlock_special.b.deferred_qs = true;
local_irq_restore(flags);
return;
}
@@ -760,7 +777,7 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
pr_cont(" %p", lhp);
- if (++i >= 10)
+ if (++i >= ncheck)
break;
}
pr_cont("\n");
@@ -944,18 +961,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
+{
#ifdef CONFIG_RCU_BOOST
+ struct sched_param sp;
-static void rcu_wake_cond(struct task_struct *t, int status)
-{
- /*
- * If the thread is yielding, only wake it when this
- * is invoked from idle
- */
- if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
- wake_up_process(t);
+ sp.sched_priority = kthread_prio;
+ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
}
+#ifdef CONFIG_RCU_BOOST
+
/*
* Carry out RCU priority boosting on the task indicated by ->exp_tasks
* or ->boost_tasks, advancing the pointer to the next task in the
@@ -1091,23 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
}
/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
- if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
- current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
- rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
- __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
- }
- local_irq_restore(flags);
-}
-
-/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
@@ -1160,59 +1163,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
return 0;
}
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
- struct sched_param sp;
-
- sp.sched_priority = kthread_prio;
- sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
- per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
- return __this_cpu_read(rcu_data.rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks. This replaces
- * the RCU softirq used in configurations of RCU that do not support RCU
- * priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
- unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
- char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
- int spincnt;
-
- for (spincnt = 0; spincnt < 10; spincnt++) {
- trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
- local_bh_disable();
- *statusp = RCU_KTHREAD_RUNNING;
- local_irq_disable();
- work = *workp;
- *workp = 0;
- local_irq_enable();
- if (work)
- rcu_do_batch(this_cpu_ptr(&rcu_data));
- local_bh_enable();
- if (*workp == 0) {
- trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
- *statusp = RCU_KTHREAD_WAITING;
- return;
- }
- }
- *statusp = RCU_KTHREAD_YIELDING;
- trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
- schedule_timeout_interruptible(2);
- trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
- *statusp = RCU_KTHREAD_WAITING;
-}
-
/*
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
@@ -1243,27 +1193,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
free_cpumask_var(cm);
}
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
- .store = &rcu_data.rcu_cpu_kthread_task,
- .thread_should_run = rcu_cpu_kthread_should_run,
- .thread_fn = rcu_cpu_kthread,
- .thread_comm = "rcuc/%u",
- .setup = rcu_cpu_kthread_setup,
- .park = rcu_cpu_kthread_park,
-};
-
/*
* Spawn boost kthreads -- called as soon as the scheduler is running.
*/
static void __init rcu_spawn_boost_kthreads(void)
{
struct rcu_node *rnp;
- int cpu;
- for_each_possible_cpu(cpu)
- per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
- if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
- return;
rcu_for_each_leaf_node(rnp)
(void)rcu_spawn_one_boost_kthread(rnp);
}
@@ -1286,11 +1222,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
-static void invoke_rcu_callbacks_kthread(void)
-{
- WARN_ON_ONCE(1);
-}
-
static bool rcu_is_callbacks_kthread(void)
{
return false;
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index f65a73a97323..065183391f75 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -630,7 +630,9 @@ static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
time_before(j, rcu_state.gp_activity + gpssdelay) ||
atomic_xchg(&warned, 1)) {
- raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
+ if (rnp_root != rnp)
+ /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp_root);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c3bf44ba42e5..61df2bf08563 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -423,6 +423,19 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
do { } while (0)
#endif
+#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
+/* Get rcutorture access to sched_setaffinity(). */
+long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+{
+ int ret;
+
+ ret = sched_setaffinity(pid, in_mask);
+ WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
+#endif
+
#ifdef CONFIG_RCU_STALL_COMMON
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
diff --git a/kernel/torture.c b/kernel/torture.c
index 17b2be9bde12..a8d9bdfba7c3 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -570,6 +570,7 @@ static void torture_shutdown_cleanup(void)
static struct task_struct *stutter_task;
static int stutter_pause_test;
static int stutter;
+static int stutter_gap;
/*
* Block until the stutter interval ends. This must be called periodically
@@ -578,10 +579,12 @@ static int stutter;
bool stutter_wait(const char *title)
{
int spt;
+ bool ret = false;
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
+ ret = true;
if (spt == 1) {
schedule_timeout_interruptible(1);
} else if (spt == 2) {
@@ -592,7 +595,7 @@ bool stutter_wait(const char *title)
}
torture_shutdown_absorb(title);
}
- return !!spt;
+ return ret;
}
EXPORT_SYMBOL_GPL(stutter_wait);
@@ -602,17 +605,24 @@ EXPORT_SYMBOL_GPL(stutter_wait);
*/
static int torture_stutter(void *arg)
{
+ int wtime;
+
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
- WRITE_ONCE(stutter_pause_test, 1);
- schedule_timeout_interruptible(stutter - 1);
+ wtime = stutter;
+ if (stutter > HZ + 1) {
+ WRITE_ONCE(stutter_pause_test, 1);
+ wtime = stutter - HZ - 1;
+ schedule_timeout_interruptible(wtime);
+ wtime = HZ + 1;
+ }
WRITE_ONCE(stutter_pause_test, 2);
- schedule_timeout_interruptible(1);
+ schedule_timeout_interruptible(wtime);
}
WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())
- schedule_timeout_interruptible(stutter);
+ schedule_timeout_interruptible(stutter_gap);
torture_shutdown_absorb("torture_stutter");
} while (!torture_must_stop());
torture_kthread_stopping("torture_stutter");
@@ -622,9 +632,10 @@ static int torture_stutter(void *arg)
/*
* Initialize and kick off the torture_stutter kthread.
*/
-int torture_stutter_init(const int s)
+int torture_stutter_init(const int s, const int sgap)
{
stutter = s;
+ stutter_gap = sgap;
return torture_create_kthread(torture_stutter, NULL, stutter_task);
}
EXPORT_SYMBOL_GPL(torture_stutter_init);
diff --git a/tools/include/linux/rcu.h b/tools/include/linux/rcu.h
index 7d02527e5bce..9554d3fa54f3 100644
--- a/tools/include/linux/rcu.h
+++ b/tools/include/linux/rcu.h
@@ -19,7 +19,7 @@ static inline bool rcu_is_watching(void)
return false;
}
-#define rcu_assign_pointer(p, v) ((p) = (v))
-#define RCU_INIT_POINTER(p, v) p=(v)
+#define rcu_assign_pointer(p, v) do { (p) = (v); } while (0)
+#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
#endif
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index def9131d3d8e..5be86b1025e8 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -24,6 +24,7 @@ instructions RMW[{'once,'acquire,'release}]
enum Barriers = 'wmb (*smp_wmb*) ||
'rmb (*smp_rmb*) ||
'mb (*smp_mb*) ||
+ 'barrier (*barrier*) ||
'rcu-lock (*rcu_read_lock*) ||
'rcu-unlock (*rcu_read_unlock*) ||
'sync-rcu (*synchronize_rcu*) ||
@@ -76,3 +77,8 @@ flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
(* Validate SRCU dynamic match *)
flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
+
+(* Compute marked and plain memory accesses *)
+let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
+ LKR | LKW | UL | LF | RL | RU
+let Plain = M \ Marked
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 8dcb37835b61..ea2ff4b94074 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -24,8 +24,14 @@ include "lock.cat"
(* Basic relations *)
(*******************)
+(* Release Acquire *)
+let acq-po = [Acquire] ; po ; [M]
+let po-rel = [M] ; po ; [Release]
+let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
+
(* Fences *)
-let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
+let R4rmb = R \ Noreturn (* Reads for which rmb works *)
+let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
let wmb = [W] ; fencerel(Wmb) ; [W]
let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
@@ -34,13 +40,14 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; po ; [UL] ; (co | po) ; [LKW] ;
fencerel(After-unlock-lock) ; [M])
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
-
let strong-fence = mb | gp
-(* Release Acquire *)
-let acq-po = [Acquire] ; po ; [M]
-let po-rel = [M] ; po ; [Release]
-let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
+let nonrw-fence = strong-fence | po-rel | acq-po
+let fence = nonrw-fence | wmb | rmb
+let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu |
+ Before-atomic | After-atomic | Acquire | Release |
+ Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) |
+ (po ; [Release]) | ([Acquire] ; po)
(**********************************)
(* Fundamental coherence ordering *)
@@ -61,21 +68,22 @@ empty rmw & (fre ; coe) as atomic
let dep = addr | data
let rwdep = (dep | ctrl) ; [W]
let overwrite = co | fr
-let to-w = rwdep | (overwrite & int)
-let to-r = addr | (dep ; rfi)
-let fence = strong-fence | wmb | po-rel | rmb | acq-po
+let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
+let to-r = addr | (dep ; [Marked] ; rfi)
let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
-let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | po-unlock-rf-lock-po
-let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
+let A-cumul(r) = (rfe ; [Marked])? ; r
+let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
+ po-unlock-rf-lock-po) ; [Marked]
+let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
+ [Marked] ; rfe? ; [Marked]
(*
* Happens Before: Ordering from the passage of time.
* No fences needed here for prop because relation confined to one process.
*)
-let hb = ppo | rfe | ((prop \ id) & int)
+let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked]
acyclic hb as happens-before
(****************************************)
@@ -83,7 +91,7 @@ acyclic hb as happens-before
(****************************************)
(* Propagation: Each non-rf link needs a strong fence. *)
-let pb = prop ; strong-fence ; hb*
+let pb = prop ; strong-fence ; hb* ; [Marked]
acyclic pb as propagation
(*******)
@@ -114,24 +122,28 @@ let rcu-link = po? ; hb* ; pb* ; prop ; po
(*
* Any sequence containing at least as many grace periods as RCU read-side
- * critical sections (joined by rcu-link) acts as a generalized strong fence.
+ * critical sections (joined by rcu-link) induces order like a generalized
+ * inter-CPU strong fence.
* Likewise for SRCU grace periods and read-side critical sections, provided
* the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
* struct srcu_struct location.
*)
-let rec rcu-fence = rcu-gp | srcu-gp |
+let rec rcu-order = rcu-gp | srcu-gp |
(rcu-gp ; rcu-link ; rcu-rscsi) |
((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
(rcu-rscsi ; rcu-link ; rcu-gp) |
((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
- (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
- ((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) |
- (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) |
- ((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) |
- (rcu-fence ; rcu-link ; rcu-fence)
+ (rcu-gp ; rcu-link ; rcu-order ; rcu-link ; rcu-rscsi) |
+ ((srcu-gp ; rcu-link ; rcu-order ; rcu-link ; srcu-rscsi) & loc) |
+ (rcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; rcu-gp) |
+ ((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) |
+ (rcu-order ; rcu-link ; rcu-order)
+let rcu-fence = po ; rcu-order ; po?
+let fence = fence | rcu-fence
+let strong-fence = strong-fence | rcu-fence
(* rb orders instructions just as pb does *)
-let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb*
+let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked]
irreflexive rb as rcu
@@ -143,3 +155,49 @@ irreflexive rb as rcu
* let xb = hb | pb | rb
* acyclic xb as executes-before
*)
+
+(*********************************)
+(* Plain accesses and data races *)
+(*********************************)
+
+(* Warn about plain writes and marked accesses in the same region *)
+let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) |
+ ([Marked] ; (po-loc \ barrier) ; [Plain & W])
+flag ~empty mixed-accesses as mixed-accesses
+
+(* Executes-before and visibility *)
+let xbstar = (hb | pb | rb)*
+let vis = cumul-fence* ; rfe? ; [Marked] ;
+ ((strong-fence ; [Marked] ; xbstar) | (xbstar & int))
+
+(* Boundaries for lifetimes of plain accesses *)
+let w-pre-bounded = [Marked] ; (addr | fence)?
+let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
+ ([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
+let w-post-bounded = fence? ; [Marked]
+let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
+ [Marked]
+
+(* Visibility and executes-before for plain accesses *)
+let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) |
+ (w-post-bounded ; vis ; w-pre-bounded)
+let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) |
+ (w-post-bounded ; vis ; r-pre-bounded)
+let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded)
+
+(* Potential races *)
+let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain))
+
+(* Coherence requirements for plain accesses *)
+let wr-incoh = pre-race & rf & rw-xbstar^-1
+let rw-incoh = pre-race & fr & wr-vis^-1
+let ww-incoh = pre-race & co & ww-vis^-1
+empty (wr-incoh | rw-incoh | ww-incoh) as plain-coherence
+
+(* Actual races *)
+let ww-nonrace = ww-vis & ((Marked * W) | rw-xbstar) & ((W * Marked) | wr-vis)
+let ww-race = (pre-race & co) \ ww-nonrace
+let wr-race = (pre-race & (co? ; rf)) \ wr-vis
+let rw-race = (pre-race & fr) \ rw-xbstar
+
+flag ~empty (ww-race | wr-race | rw-race) as data-race
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 551eeaa389d4..ef0f3c1850de 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -24,6 +24,7 @@ smp_mb__before_atomic() { __fence{before-atomic}; }
smp_mb__after_atomic() { __fence{after-atomic}; }
smp_mb__after_spinlock() { __fence{after-spinlock}; }
smp_mb__after_unlock_lock() { __fence{after-unlock-lock}; }
+barrier() { __fence{barrier}; }
// Exchange
xchg(X,V) __xchg{mb}(X,V)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index b2b60b84fb9d..172f0145301c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -1,7 +1,7 @@
C MP+poonceonces
(*
- * Result: Maybe
+ * Result: Sometimes
*
* Can the counter-intuitive message-passing outcome be prevented with
* no ordering at all?
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 5ee08f129094..681f9067fa9e 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -244,7 +244,7 @@ produce the name:
Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus
The descriptors that describe connections between consecutive accesses
-within the cycle through a given litmus test can be provided by the herd
+within the cycle through a given litmus test can be provided by the herd7
tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once,
Release, Acquire, and so on).
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index a059d1a6d8a2..6b52f365d73a 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -11,7 +11,7 @@
include "cross.cat"
(*
- * The lock-related events generated by herd are as follows:
+ * The lock-related events generated by herd7 are as follows:
*
* LKR Lock-Read: the read part of a spin_lock() or successful
* spin_trylock() read-modify-write event pair
diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README
index 29375a1fbbfa..095c7eb36f9f 100644
--- a/tools/memory-model/scripts/README
+++ b/tools/memory-model/scripts/README
@@ -22,7 +22,7 @@ checklitmushist.sh
Run all litmus tests having .litmus.out files from previous
initlitmushist.sh or newlitmushist.sh runs, comparing the
- herd output to that of the original runs.
+ herd7 output to that of the original runs.
checklitmus.sh
@@ -43,7 +43,7 @@ initlitmushist.sh
judgelitmus.sh
- Given a .litmus file and its .litmus.out herd output, check the
+ Given a .litmus file and its .litmus.out herd7 output, check the
.litmus.out file against the .litmus file's "Result:" comment to
judge whether the test ran correctly. Not normally run manually,
provided instead for use by other scripts.
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index b35fcd61ecf6..3c0c7fbbd223 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# Run herd tests on all .litmus files in the litmus-tests directory
+# Run herd7 tests on all .litmus files in the litmus-tests directory
# and check each file's result against a "Result:" comment within that
# litmus test. If the verification result does not match that specified
# in the litmus test, this script prints an error message prefixed with
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index dd08801a30b0..11461ed40b5e 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# Run a herd test and invokes judgelitmus.sh to check the result against
+# Run a herd7 test and invokes judgelitmus.sh to check the result against
# a "Result:" comment within the litmus test. It also outputs verification
# results to a file whose name is that of the specified litmus test, but
# with ".out" appended.
diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 859e1d581e05..40f52080fdbd 100644
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -91,7 +91,7 @@ do
shift
;;
--herdopts|--herdopt)
- checkarg --destdir "(herd options)" "$#" "$2" '.*' '^--'
+ checkarg --destdir "(herd7 options)" "$#" "$2" '.*' '^--'
LKMM_HERD_OPTIONS="$2"
shift
;;
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
index e507f5f933d5..6ed376f495bb 100644
--- a/tools/memory-model/scripts/runlitmushist.sh
+++ b/tools/memory-model/scripts/runlitmushist.sh
@@ -79,7 +79,7 @@ then
echo ' ---' Summary: 1>&2
grep '!!!' $T/*.sh.out 1>&2
nfail="`grep '!!!' $T/*.sh.out | wc -l`"
- echo 'Number of failed herd runs (e.g., timeout): ' $nfail 1>&2
+ echo 'Number of failed herd7 runs (e.g., timeout): ' $nfail 1>&2
exit 1
else
echo All runs completed successfully. 1>&2
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index fd280b070fdb..fed468fb0c78 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -7,6 +7,6 @@
#define rcu_dereference_raw(p) rcu_dereference(p)
#define rcu_dereference_protected(p, cond) rcu_dereference(p)
#define rcu_dereference_check(p, cond) rcu_dereference(p)
-#define RCU_INIT_POINTER(p, v) (p) = (v)
+#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
#endif
diff --git a/tools/testing/selftests/rcutorture/Makefile b/tools/testing/selftests/rcutorture/Makefile
new file mode 100644
index 000000000000..5202dc666206
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0+
+all:
+ ( cd ../../../..; tools/testing/selftests/rcutorture/bin/kvm.sh --duration 10 --configs TREE01 )
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 40359486b3a8..93e80a42249a 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Usage: configinit.sh config-spec-file build-output-dir results-dir
+# Usage: configinit.sh config-spec-file results-dir
#
# Create a .config file from the spec file. Run from the kernel source tree.
# Exits with 0 if all went well, with 1 if all went well but the config
@@ -11,10 +11,6 @@
# desired settings, for example, "CONFIG_NO_HZ=y". For best results,
# this should be a full pathname.
#
-# The second argument is a optional path to a build output directory,
-# for example, "O=/tmp/foo". If this argument is omitted, the .config
-# file will be generated directly in the current directory.
-#
# Copyright (C) IBM Corporation, 2013
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -26,34 +22,23 @@ mkdir $T
# Capture config spec file.
c=$1
-buildloc=$2
-resdir=$3
-builddir=
-if echo $buildloc | grep -q '^O='
-then
- builddir=`echo $buildloc | sed -e 's/^O=//'`
- if test ! -d $builddir
- then
- mkdir $builddir
- fi
-else
- echo Bad build directory: \"$buildloc\"
- exit 2
-fi
+resdir=$2
sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
grep '^grep' < $T/u.sh > $T/upd.sh
echo "cat - $c" >> $T/upd.sh
-make mrproper
-make $buildloc distclean > $resdir/Make.distclean 2>&1
-make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
-mv $builddir/.config $builddir/.config.sav
-sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
-cp $builddir/.config $builddir/.config.new
-yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+if test -z "$TORTURE_TRUST_MAKE"
+then
+ make clean > $resdir/Make.clean 2>&1
+fi
+make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+mv .config .config.sav
+sh $T/upd.sh < .config.sav > .config
+cp .config .config.new
+yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
# verify new config matches specification.
-configcheck.sh $builddir/.config $c
+configcheck.sh .config $c
exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index ff7102212703..4e9485590c10 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -9,6 +9,11 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+if test -n "$TORTURE_ALLOTED_CPUS"
+then
+ echo $TORTURE_ALLOTED_CPUS
+ exit 0
+fi
ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
idlecpus=`mpstat | tail -1 | \
awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 6bcb8b5b2ff2..c3a49fb4d6f6 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -172,7 +172,7 @@ identify_qemu_append () {
local console=ttyS0
case "$1" in
qemu-system-x86_64|qemu-system-i386)
- echo noapic selinux=0 initcall_debug debug
+ echo selinux=0 initcall_debug debug
;;
qemu-system-aarch64)
console=ttyAMA0
@@ -191,8 +191,19 @@ identify_qemu_append () {
# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
# and TORTURE_QEMU_INTERACTIVE environment variables.
identify_qemu_args () {
+ local KVM_CPU=""
+ case "$1" in
+ qemu-system-x86_64)
+ KVM_CPU=kvm64
+ ;;
+ qemu-system-i386)
+ KVM_CPU=kvm32
+ ;;
+ esac
case "$1" in
qemu-system-x86_64|qemu-system-i386)
+ echo -machine q35,accel=kvm
+ echo -cpu ${KVM_CPU}
;;
qemu-system-aarch64)
echo -machine virt,gic-version=host -cpu host
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 435b60933985..dc49a3ba6111 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -34,10 +34,15 @@ do
exit 0;
fi
- # Set affinity to randomly selected CPU
- cpus=`ls /sys/devices/system/cpu/*/online |
- sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
- grep -v '^0*$'`
+ # Set affinity to randomly selected online CPU
+ cpus=`grep 1 /sys/devices/system/cpu/*/online |
+ sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
+
+ # Do not leave out poor old cpu0 which may not be hot-pluggable
+ if [ ! -f "/sys/devices/system/cpu/cpu0/online" ]; then
+ cpus="0 $cpus"
+ fi
+
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index c27a0bbb9c02..18d6518504ee 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -3,7 +3,7 @@
#
# Build a kvm-ready Linux kernel from the tree in the current directory.
#
-# Usage: kvm-build.sh config-template build-dir resdir
+# Usage: kvm-build.sh config-template resdir
#
# Copyright (C) IBM Corporation, 2011
#
@@ -15,8 +15,7 @@ then
echo "kvm-build.sh :$config_template: Not a readable file"
exit 1
fi
-builddir=${2}
-resdir=${3}
+resdir=${2}
T=${TMPDIR-/tmp}/test-linux.sh.$$
trap 'rm -rf $T' 0
@@ -29,14 +28,14 @@ CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_CONSOLE=y
___EOF___
-configinit.sh $T/config O=$builddir $resdir
+configinit.sh $T/config $resdir
retval=$?
if test $retval -gt 1
then
exit 2
fi
ncpus=`cpus2use.sh`
-make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 8426fe1f15ee..1871d00bccd7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -11,6 +11,7 @@
#
# The "directory" above should end with the date/time directory, for example,
# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+# Returns error status reflecting the success (or not) of the specified run.
#
# Copyright (C) IBM Corporation, 2018
#
@@ -56,6 +57,8 @@ done
if test -n "$files"
then
$editor $files
+ exit 1
else
echo No errors in console logs.
+ exit 0
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 2adde6aaafdb..e5edd5198725 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -7,6 +7,8 @@
#
# Usage: kvm-recheck.sh resdir ...
#
+# Returns status reflecting the success or not of the last run specified.
+#
# Copyright (C) IBM Corporation, 2011
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -28,8 +30,16 @@ do
TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
rm -f $i/console.log.*.diags
kvm-recheck-${TORTURE_SUITE}.sh $i
- if test -f "$i/console.log"
+ if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
+ then
+ echo QEMU error, output:
+ cat $i/qemu-output
+ elif test -f "$i/console.log"
then
+ if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -eq 137
+ then
+ echo QEMU killed
+ fi
configcheck.sh $i/.config $i/ConfigFragment
if test -r $i/Make.oldconfig.err
then
@@ -58,3 +68,4 @@ do
fi
done
done
+EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0eb1ec16d78a..27b7b5693ede 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -36,11 +36,6 @@ config_template=${1}
config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
title=`echo $config_template | sed -e 's/^.*\///'`
builddir=${2}
-if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
-then
- echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
- exit 1
-fi
resdir=${3}
if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
then
@@ -85,18 +80,18 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $builddir $resdir
+elif kvm-build.sh $T/Kc2 $resdir
then
# Had to build a kernel for this test.
- QEMU="`identify_qemu $builddir/vmlinux`"
+ QEMU="`identify_qemu vmlinux`"
BOOT_IMAGE="`identify_boot_image $QEMU`"
- cp $builddir/vmlinux $resdir
- cp $builddir/.config $resdir
- cp $builddir/Module.symvers $resdir > /dev/null || :
- cp $builddir/System.map $resdir > /dev/null || :
+ cp vmlinux $resdir
+ cp .config $resdir
+ cp Module.symvers $resdir > /dev/null || :
+ cp System.map $resdir > /dev/null || :
if test -n "$BOOT_IMAGE"
then
- cp $builddir/$BOOT_IMAGE $resdir
+ cp $BOOT_IMAGE $resdir
KERNEL=$resdir/${BOOT_IMAGE##*/}
# Arch-independent indicator
touch $resdir/builtkernel
@@ -107,7 +102,7 @@ then
parse-build.sh $resdir/Make.out $title
else
# Build failed.
- cp $builddir/.config $resdir || :
+ cp .config $resdir || :
echo Build failed, not running KVM, see $resdir.
if test -f $builddir.wait
then
@@ -165,7 +160,7 @@ then
fi
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
sleep 10 # Give qemu's pid a chance to reach the file
if test -s "$resdir/qemu_pid"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 8f1e337b9b54..72518580df23 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -24,6 +24,7 @@ dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
+TORTURE_ALLOTED_CPUS=""
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
@@ -32,6 +33,7 @@ TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
+TORTURE_TRUST_MAKE=""
resdir=""
configs=""
cpus=0
@@ -62,6 +64,7 @@ usage () {
echo " --qemu-cmd qemu-system-..."
echo " --results absolute-pathname"
echo " --torture rcu"
+ echo " --trust-make"
exit 1
}
@@ -89,6 +92,7 @@ do
--cpus)
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
+ TORTURE_ALLOTED_CPUS="$2"
shift
;;
--datestamp)
@@ -173,6 +177,9 @@ do
jitter=0
fi
;;
+ --trust-make)
+ TORTURE_TRUST_MAKE="y"
+ ;;
*)
echo Unknown argument $1
usage
@@ -285,6 +292,7 @@ cat << ___EOF___ > $T/script
CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
KVM="$KVM"; export KVM
PATH="$PATH"; export PATH
+TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
@@ -297,6 +305,7 @@ TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
+TORTURE_TRUST_MAKE="$TORTURE_TRUST_MAKE"; export TORTURE_TRUST_MAKE
if ! test -e $resdir
then
mkdir -p "$resdir" || :
@@ -342,7 +351,7 @@ function dump(first, pastlast, batchnum)
print "needqemurun="
jn=1
for (j = first; j < pastlast; j++) {
- builddir=KVM "/b1"
+ builddir=KVM "/b" j - first + 1
cpusr[jn] = cpus[j];
if (cfrep[cf[j]] == "") {
cfr[jn] = cf[j];
@@ -358,7 +367,6 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "rm -f " builddir ".*";
print "touch " builddir ".wait";
- print "mkdir " builddir " > /dev/null 2>&1 || :";
print "mkdir " rd cfr[jn] " || :";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
@@ -464,3 +472,5 @@ else
fi
# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
+# Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
+# Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 0701b3bf6ade..09155c15ea65 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -21,7 +21,7 @@ mkdir $T
. functions.sh
-if grep -q CC < $F
+if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE"
then
:
else
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 4508373a922f..4bf62d7b1cbc 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -106,6 +106,7 @@ fi | tee -a $file.diags
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
grep -v 'ODEBUG: ' |
+grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' > $T.diags
if test -s $T.diags
then
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index d2d2a86139db..e19a444a0684 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,2 +1,5 @@
CONFIG_RCU_TORTURE_TEST=y
CONFIG_PRINTK_TIME=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index ea47da95374b..d6da9a61d44a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -3,3 +3,4 @@ rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcu_nocbs=0
+rcutorture.fwd_progress=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
new file mode 100644
index 000000000000..4d8eb5bfb6f6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
@@ -0,0 +1,14 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot
new file mode 100644
index 000000000000..7017f5f5a55f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot
@@ -0,0 +1,3 @@
+rcutorture.torture_type=trivial
+rcutorture.onoff_interval=0
+rcutorture.shuffle_interval=0