From 607904c357c61adf20b8fd18af765e501d61a385 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 9 Jan 2017 10:26:52 -0500 Subject: locking/spinlocks: Remove the unused spin_lock_bh_nested() API The spin_lock_bh_nested() API is defined but is not used anywhere in the kernel. So all spin_lock_bh_nested() and related APIs are now removed. Signed-off-by: Waiman Long Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1483975612-16447-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 8 -------- include/linux/spinlock_api_smp.h | 2 -- include/linux/spinlock_api_up.h | 1 - 3 files changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 47dd0cebd204..59248dcc6ef3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -180,8 +180,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock_nested(lock, subclass) -# define raw_spin_lock_bh_nested(lock, subclass) \ - _raw_spin_lock_bh_nested(lock, subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) \ do { \ @@ -197,7 +195,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) -# define raw_spin_lock_bh_nested(lock, subclass) _raw_spin_lock_bh(lock) #endif #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -317,11 +314,6 @@ do { \ raw_spin_lock_nested(spinlock_check(lock), subclass); \ } while (0) -#define spin_lock_bh_nested(lock, subclass) \ -do { \ - raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\ -} while (0) - #define spin_lock_nest_lock(lock, nest_lock) \ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 5344268e6e62..42dfab89e740 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -22,8 +22,6 @@ int in_lock_functions(unsigned long addr); void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) __acquires(lock); -void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) - __acquires(lock); void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map) __acquires(lock); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d3afef9d8dbe..d0d188861ad6 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -57,7 +57,6 @@ #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) -#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) -- cgit v1.2.3 From 642fa448ae6b3a4e5e8737054a094173405b7643 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 3 Jan 2017 13:43:14 -0800 Subject: sched/core: Remove set_task_state() This is a nasty interface and setting the state of a foreign task must not be done. As of the following commit: be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()") ... everyone in the kernel calls set_task_state() with current, allowing the helper to be removed. However, as the comment indicates, it is still around for those archs where computing current is more expensive than using a pointer, at least in theory. An important arch that is affected is arm64, however this has been addressed now [1] and performance is up to par making no difference with either calls. Of all the callers, if any, it's the locking bits that would care most about this -- ie: we end up passing a tsk pointer to a lot of the lock slowpath, and setting ->state on that. The following numbers are based on two tests: a custom ad-hoc microbenchmark that just measures latencies (for ~65 million calls) between get_task_state() vs get_current_state(). Secondly for a higher overview, an unlink microbenchmark was used, which pounds on a single file with open, close,unlink combos with increasing thread counts (up to 4x ncpus). While the workload is quite unrealistic, it does contend a lot on the inode mutex or now rwsem. [1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com == 1. x86-64 == Avg runtime set_task_state(): 601 msecs Avg runtime set_current_state(): 552 msecs vanilla dirty Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%) Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%) Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%) Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%) Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%) Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%) Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%) Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%) Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%) Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%) Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%) Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%) Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%) == 2. ppc64le == Avg runtime set_task_state(): 938 msecs Avg runtime set_current_state: 940 msecs vanilla dirty Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%) Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%) Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%) Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%) Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%) Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%) Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%) Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%) Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%) Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%) Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%) Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%) Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%) Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%) Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%) Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%) x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching) and ppc64 (with paca) show similar improvements in the unlink microbenches. The small delta for ppc64 (2ms), does not represent the gains on the unlink runs. In the case of x86, there was a decent amount of variation in the latency runs, but always within a 20 to 50ms increase), ppc was more constant. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: mark.rutland@arm.com Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index ad3ec9ec61f7..f4f9d32f12b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -227,7 +227,7 @@ extern void proc_sched_set_task(struct task_struct *p); extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; -/* Convenience macros for the sake of set_task_state */ +/* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) @@ -254,17 +254,6 @@ extern char ___assert_task_state[1 - 2*!!( #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -#define __set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - (tsk)->state = (state_value); \ - } while (0) -#define set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ - } while (0) - #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -277,20 +266,6 @@ extern char ___assert_task_state[1 - 2*!!( } while (0) #else - -/* - * @tsk had better be current, or you get to keep the pieces. - * - * The only reason is that computing current can be more expensive than - * using a pointer that's already available. - * - * Therefore, see set_current_state(). - */ -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - smp_store_mb((tsk)->state, (state_value)) - /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to -- cgit v1.2.3 From 8f95c90ceb541a38ac16fec48c05142ef1450c25 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Jan 2017 07:22:25 -0800 Subject: sched/wait, RCU: Introduce rcuwait machinery rcuwait provides support for (single) RCU-safe task wait/wake functionality, with the caveat that it must not be called after exit_notify(), such that we avoid racing with rcu delayed_put_task_struct callbacks, task_struct being rcu unaware in this context -- for which we similarly have task_rcu_dereference() magic, but with different return semantics, which can conflict with the wakeup side. The interfaces are quite straightforward: rcuwait_wait_event() rcuwait_wake_up() More details are in the comments, but it's perhaps worth mentioning at least, that users must provide proper serialization when waiting on a condition, and avoid corrupting a concurrent waiter. Also care must be taken between the task and the condition for when calling the wakeup -- we cannot miss wakeups. When porting users, this is for example, a given when using waitqueues in that everything is done under the q->lock. As such, it can remove sources of non preemptable unbounded work for realtime. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/rcuwait.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 include/linux/rcuwait.h (limited to 'include') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h new file mode 100644 index 000000000000..0e93d56c7ab2 --- /dev/null +++ b/include/linux/rcuwait.h @@ -0,0 +1,63 @@ +#ifndef _LINUX_RCUWAIT_H_ +#define _LINUX_RCUWAIT_H_ + +#include + +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner; where it is forbidden to use + * after exit_notify(). task_struct is not properly rcu protected, + * unless dealing with rcu-aware lists, ie: find_task_by_*(). + * + * Alternatively we have task_rcu_dereference(), but the return + * semantics have different implications which would break the + * wakeup side. The only time @task is non-nil is when a user is + * blocked (or checking if it needs to) on a condition, and reset + * as soon as we know that the condition has succeeded and are + * awoken. + */ +struct rcuwait { + struct task_struct *task; +}; + +#define __RCUWAIT_INITIALIZER(name) \ + { .task = NULL, } + +static inline void rcuwait_init(struct rcuwait *w) +{ + w->task = NULL; +} + +extern void rcuwait_wake_up(struct rcuwait *w); + +/* + * The caller is responsible for locking around rcuwait_wait_event(), + * such that writes to @task are properly serialized. + */ +#define rcuwait_wait_event(w, condition) \ +({ \ + /* \ + * Complain if we are called after do_exit()/exit_notify(), \ + * as we cannot rely on the rcu critical region for the \ + * wakeup side. \ + */ \ + WARN_ON(current->exit_state); \ + \ + rcu_assign_pointer((w)->task, current); \ + for (;;) { \ + /* \ + * Implicit barrier (A) pairs with (B) in \ + * rcuwait_trywake(). \ + */ \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + \ + schedule(); \ + } \ + \ + WRITE_ONCE((w)->task, NULL); \ + __set_current_state(TASK_RUNNING); \ +}) + +#endif /* _LINUX_RCUWAIT_H_ */ -- cgit v1.2.3 From 52b94129f274937e4c25dd17b76697664a3c43c9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Jan 2017 07:22:26 -0800 Subject: locking/percpu-rwsem: Replace waitqueue with rcuwait The use of any kind of wait queue is an overkill for pcpu-rwsems. While one option would be to use the less heavy simple (swait) flavor, this is still too much for what pcpu-rwsems needs. For one, we do not care about any sort of queuing in that the only (rare) time writers (and readers, for that matter) are queued is when trying to acquire the regular contended rw_sem. There cannot be any further queuing as writers are serialized by the rw_sem in the first place. Given that percpu_down_write() must not be called after exit_notify(), we can replace the bulky waitqueue with rcuwait such that a writer can wait for its turn to take the lock. As such, we can avoid the queue handling and locking overhead. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-3-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5b2e6159b744..93664f022ecf 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -4,15 +4,15 @@ #include #include #include -#include +#include #include #include struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; - struct rw_semaphore rw_sem; - wait_queue_head_t writer; + struct rw_semaphore rw_sem; /* slowpath */ + struct rcuwait writer; /* blocked writer */ int readers_block; }; @@ -22,7 +22,7 @@ static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ + .writer = __RCUWAIT_INITIALIZER(name.writer), \ } extern int __percpu_down_read(struct percpu_rw_semaphore *, int); -- cgit v1.2.3 From e274795ea7b7caa0fd74ef651594382a69e2a951 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Jan 2017 14:17:48 +0100 Subject: locking/mutex: Fix mutex handoff While reviewing the ww_mutex patches, I noticed that it was still possible to (incorrectly) succeed for (incorrect) code like: mutex_lock(&a); mutex_lock(&a); This was possible if the second mutex_lock() would block (as expected) but then receive a spurious wakeup. At that point it would find itself at the front of the queue, request a handoff and instantly claim ownership and continue, since owner would point to itself. Avoid this scenario and simplify the code by introducing a third low bit to signal handoff pickup. So once we request handoff, unlock clears the handoff bit and sets the pickup bit along with the new owner. This also removes the need for the .handoff argument to __mutex_trylock(), since that becomes superfluous with PICKUP. In order to guarantee enough low bits, ensure task_struct alignment is at least L1_CACHE_BYTES (which seems a good ideal regardless). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 9d659ae14b54 ("locking/mutex: Add lock handoff to avoid starvation") Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b97870f2debd..3e1fccb47f11 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -65,7 +65,7 @@ struct mutex { static inline struct task_struct *__mutex_owner(struct mutex *lock) { - return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07); } /* -- cgit v1.2.3 From ea9e0fb8fe1bdfca81bd76052a5cce70bb053430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:32 +0100 Subject: locking/ww_mutex: Set use_ww_ctx even when locking without a context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add a new field to struct mutex_waiter. This field must be initialized for all waiters if any waiter uses the ww_use_ctx path. So there is a trade-off: Keep ww_mutex locking without a context on the faster non-use_ww_ctx path, at the cost of adding the initialization to all mutex locks (including non-ww_mutexes), or avoid the additional cost for non-ww_mutex locks, at the cost of adding additional checks to the use_ww_ctx path. We take the latter choice. It may be worth eliminating the users of ww_mutex_lock(lock, NULL), but there are a lot of them. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-5-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 7b0066814fa0..5f2e8379baff 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -222,11 +222,7 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, */ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - if (ctx) - return __ww_mutex_lock(lock, ctx); - - mutex_lock(&lock->base); - return 0; + return __ww_mutex_lock(lock, ctx); } /** @@ -262,10 +258,7 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - if (ctx) - return __ww_mutex_lock_interruptible(lock, ctx); - else - return mutex_lock_interruptible(&lock->base); + return __ww_mutex_lock_interruptible(lock, ctx); } /** -- cgit v1.2.3 From c5470b22d1833241cae996d23a8a346ff8ec4d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:33 +0100 Subject: locking/ww_mutex: Remove the __ww_mutex_lock*() inline wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the documentation in the header file since there is no good place for it in mutex.c: there are two rather different implementations with different EXPORT_SYMBOLs for each function. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-6-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 5f2e8379baff..c07528522bbc 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -186,11 +186,6 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) #endif } -extern int __must_check __ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); -extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); - /** * ww_mutex_lock - acquire the w/w mutex * @lock: the mutex to be acquired @@ -220,10 +215,8 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - return __ww_mutex_lock(lock, ctx); -} +extern int __must_check ww_mutex_lock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible @@ -255,11 +248,8 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - return __ww_mutex_lock_interruptible(lock, ctx); -} +extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex -- cgit v1.2.3 From 6baa5c60a97d7f1a37a4b5ab5fb3a450e56e8b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:34 +0100 Subject: locking/ww_mutex: Add waiters in stamp order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add regular waiters in stamp order. Keep adding waiters that have no context in FIFO order and take care not to starve them. While adding our task as a waiter, back off if we detect that there is a waiter with a lower stamp in front of us. Make sure to call lock_contended even when we back off early. For w/w mutexes, being first in the wait list is only stable when taking the lock without a context. Therefore, the purpose of the first flag is split into two: 'first' remains to indicate whether we want to spin optimistically, while 'handoff' indicates that we should be prepared to accept a handoff. For w/w locking with a context, we always accept handoffs after the first schedule(), to handle the following sequence of events: 1. Task #0 unlocks and hands off to Task #2 which is first in line 2. Task #1 adds itself in front of Task #2 3. Task #2 wakes up and must accept the handoff even though it is no longer first in line Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-7-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3e1fccb47f11..e17942ffb3fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,8 @@ #include #include +struct ww_acquire_ctx; + /* * Simple, straightforward mutexes with strict semantics: * @@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock) struct mutex_waiter { struct list_head list; struct task_struct *task; + struct ww_acquire_ctx *ww_ctx; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif -- cgit v1.2.3 From 977625a693283dbb35b2a3f674bc0237f7347348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:39 +0100 Subject: locking/mutex: Initialize mutex_waiter::ww_ctx with poison when debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Help catch cases where mutex_lock is used directly on w/w mutexes, which otherwise result in the w/w tasks reading uninitialized data. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-12-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/poison.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/poison.h b/include/linux/poison.h index 51334edec506..a39540326417 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -80,6 +80,7 @@ /********** kernel/mutexes **********/ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 +#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) /********** lib/flex_array.c **********/ #define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ -- cgit v1.2.3 From af2e859edd477fa1ea3d1d106f41a595cff3d162 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Dec 2016 11:47:04 +0000 Subject: locking/ww_mutex: Fix compilation of __WW_MUTEX_INITIALIZER From conflicting macro parameters, passing the wrong name to __MUTEX_INITIALIZER and a stray '\', #define __WW_MUTEX_INITIALIZER was very unhappy. One unnecessary change was to choose to pass &ww_class instead of implicitly taking the address of the class within the macro. Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1b375dc30710 ("mutex: Move ww_mutex definitions to ww_mutex.h") Link: http://lkml.kernel.org/r/20161201114711.28697-2-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index c07528522bbc..083950fd5b0b 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -51,10 +51,10 @@ struct ww_mutex { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \ - , .ww_class = &ww_class +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ + , .ww_class = class #else -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) #endif #define __WW_CLASS_INITIALIZER(ww_class) \ @@ -63,7 +63,7 @@ struct ww_mutex { , .mutex_name = #ww_class "_mutex" } #define __WW_MUTEX_INITIALIZER(lockname, class) \ - { .base = { \__MUTEX_INITIALIZER(lockname) } \ + { .base = __MUTEX_INITIALIZER(lockname.base) \ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } #define DEFINE_WW_CLASS(classname) \ -- cgit v1.2.3 From 1e24edca0557dba6486d39d3c24c288475432bcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:12:23 +0100 Subject: locking/atomic, kref: Add KREF_INIT() Since we need to change the implementation, stop exposing internals. Provide KREF_INIT() to allow static initialization of struct kref. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kref.h b/include/linux/kref.h index e15828fd71f1..9af255ad1e2f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -24,6 +24,8 @@ struct kref { atomic_t refcount; }; +#define KREF_INIT(n) { .refcount = ATOMIC_INIT(n), } + /** * kref_init - initialize object. * @kref: object in question. -- cgit v1.2.3 From 2c935bc57221cc2edc787c72ea0e2d30cdcd3d5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:29:48 +0100 Subject: locking/atomic, kref: Add kref_read() Since we need to change the implementation, stop exposing internals. Provide kref_read() to read the current reference count; typically used for debug messages. Kills two anti-patterns: atomic_read(&kref->refcount) kref->refcount.counter Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/drm/drm_framebuffer.h | 2 +- include/drm/ttm/ttm_bo_driver.h | 4 ++-- include/linux/kref.h | 5 +++++ include/linux/sunrpc/cache.h | 2 +- include/net/bluetooth/hci_core.h | 4 ++-- 5 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 1ddfa2928802..a232e7f0c869 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -247,7 +247,7 @@ static inline void drm_framebuffer_unreference(struct drm_framebuffer *fb) */ static inline uint32_t drm_framebuffer_read_refcount(struct drm_framebuffer *fb) { - return atomic_read(&fb->base.refcount.refcount); + return kref_read(&fb->base.refcount); } /** diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cdbdb40eb5bd..feecf33a1212 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -878,7 +878,7 @@ static inline int ttm_bo_reserve(struct ttm_buffer_object *bo, { int ret; - WARN_ON(!atomic_read(&bo->kref.refcount)); + WARN_ON(!kref_read(&bo->kref)); ret = __ttm_bo_reserve(bo, interruptible, no_wait, ticket); if (likely(ret == 0)) @@ -903,7 +903,7 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, { int ret = 0; - WARN_ON(!atomic_read(&bo->kref.refcount)); + WARN_ON(!kref_read(&bo->kref)); if (interruptible) ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, diff --git a/include/linux/kref.h b/include/linux/kref.h index 9af255ad1e2f..7c88d865f82f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -35,6 +35,11 @@ static inline void kref_init(struct kref *kref) atomic_set(&kref->refcount, 1); } +static inline int kref_read(const struct kref *kref) +{ + return atomic_read(&kref->refcount); +} + /** * kref_get - increment refcount for object. * @kref: object. diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..8a511c0985aa 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -198,7 +198,7 @@ static inline struct cache_head *cache_get(struct cache_head *h) static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->ref.refcount) <= 2 && + if (kref_read(&h->ref) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; kref_put(&h->ref, cd->cache_put); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 554671c81f4a..90708f68cc02 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -987,7 +987,7 @@ static inline void hci_conn_drop(struct hci_conn *conn) static inline void hci_dev_put(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); put_device(&d->dev); } @@ -995,7 +995,7 @@ static inline void hci_dev_put(struct hci_dev *d) static inline struct hci_dev *hci_dev_hold(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); get_device(&d->dev); return d; -- cgit v1.2.3 From bdfafc4ffdd24e491119d81f85ddc4393fa49803 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:34:19 +0100 Subject: locking/atomic, kref: Kill kref_sub() By general sentiment kref_sub() is a bad interface, make it go away. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/drm/ttm/ttm_bo_api.h | 15 +-------------- include/linux/kref.h | 32 ++++---------------------------- 2 files changed, 5 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 652e45be97c8..9a465314572c 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -332,19 +332,6 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo, */ extern void ttm_bo_unref(struct ttm_buffer_object **bo); - -/** - * ttm_bo_list_ref_sub - * - * @bo: The buffer object. - * @count: The number of references with which to decrease @bo::list_kref; - * @never_free: The refcount should not reach zero with this operation. - * - * Release @count lru list references to this buffer object. - */ -extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, - bool never_free); - /** * ttm_bo_add_to_lru * @@ -367,7 +354,7 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); * and is usually called just immediately after the bo has been reserved to * avoid recursive reservation from lru lists. */ -extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); +extern void ttm_bo_del_from_lru(struct ttm_buffer_object *bo); /** * ttm_bo_move_to_lru_tail diff --git a/include/linux/kref.h b/include/linux/kref.h index 7c88d865f82f..31c49a64cdf9 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -54,9 +54,8 @@ static inline void kref_get(struct kref *kref) } /** - * kref_sub - subtract a number of refcounts for object. + * kref_put - decrement refcount for object. * @kref: object. - * @count: Number of recounts to subtract. * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree @@ -65,46 +64,23 @@ static inline void kref_get(struct kref *kref) * maintainer, and anyone else who happens to notice it. You have * been warned. * - * Subtract @count from the refcount, and if 0, call release(). + * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this * function returns 0, you still can not count on the kref from remaining in * memory. Only use the return value if you want to see if the kref is now * gone, not present. */ -static inline int kref_sub(struct kref *kref, unsigned int count, - void (*release)(struct kref *kref)) +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { WARN_ON(release == NULL); - if (atomic_sub_and_test((int) count, &kref->refcount)) { + if (atomic_dec_and_test(&kref->refcount)) { release(kref); return 1; } return 0; } -/** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the - * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. If the caller does pass kfree to this - * function, you will be publicly mocked mercilessly by the kref - * maintainer, and anyone else who happens to notice it. You have - * been warned. - * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. - */ -static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) -{ - return kref_sub(kref, 1, release); -} - static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) -- cgit v1.2.3 From 23b19ec3377924eb8f303880102e056e9214ba72 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jan 2017 12:11:59 +0100 Subject: locking/ww_mutex: Turn off __must_check for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the ww_mutex inline wrappers gone there's a lot of dormant anti-patterns emerging in an x86 allyesconfig build: kernel/locking/test-ww_mutex.c:80:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:55:3: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:134:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:213:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:177:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:266:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/drm_modeset_lock.c:430:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/vgem/vgem_fence.c:193:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/i915/i915_gem_batch_pool.c:125:4: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/i915/i915_gem_execbuffer.c:1302:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/radeon/radeon_prime.c:69:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/nouveau/nouveau_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] ... but we cannot just litter the kernel build log with such warnings. These need to be fixed separately - turn off the warning for now. Cc: Nicolai Hähnle Cc: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 083950fd5b0b..5dd9a7682227 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -215,8 +215,7 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -extern int __must_check ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); +extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible -- cgit v1.2.3 From 0a13cd1a05e7a549259d4f803d2ec2efda07ed7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:03:11 +0100 Subject: locking/atomic, kref: Implement kref_put_lock() Because home-rolling your own is _awesome_, stop doing it. Provide kref_put_lock(), just like kref_put_mutex() but for a spinlock. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kref.h b/include/linux/kref.h index 31c49a64cdf9..9db9685fed84 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -19,6 +19,7 @@ #include #include #include +#include struct kref { atomic_t refcount; @@ -86,12 +87,21 @@ static inline int kref_put_mutex(struct kref *kref, struct mutex *lock) { WARN_ON(release == NULL); - if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { - mutex_lock(lock); - if (unlikely(!atomic_dec_and_test(&kref->refcount))) { - mutex_unlock(lock); - return 0; - } + + if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) { + release(kref); + return 1; + } + return 0; +} + +static inline int kref_put_lock(struct kref *kref, + void (*release)(struct kref *kref), + spinlock_t *lock) +{ + WARN_ON(release == NULL); + + if (atomic_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; } -- cgit v1.2.3 From 85b36c931ff328297572a3e6136fac573795ad79 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 18 Jan 2017 09:38:04 -0800 Subject: jump_labels: Move header guard #endif down where it belongs The ending header guard is misplaced. This has no functional change, this is just an eye-sore. Signed-off-by: Luis R. Rodriguez Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: bp@suse.de Cc: catalin.marinas@arm.com Cc: jbaron@akamai.com Cc: pbonzini@redhat.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/20170118173804.16281-1-mcgrof@kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a0547c571800..b63d6b7b0db0 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -402,6 +402,6 @@ extern bool ____wrong_branch_error(void); #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) -#endif /* _LINUX_JUMP_LABEL_H */ - #endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_JUMP_LABEL_H */ -- cgit v1.2.3 From 06321dd2d1ae5b5bdc847958ab9e71d22a29a33e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 19 Jan 2017 09:31:52 -0500 Subject: locking/rwsem: Remove unnecessary atomic_long_t casts Since sem->count had been changed to a atomic_long_t type, it is no longer necessary to use the atomic_long_t cast anymore. So remove them. Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Arnd Bergmann Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1484836312-6656-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/asm-generic/rwsem.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index 5be122e3d326..6c6a2141f271 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -33,7 +33,7 @@ */ static inline void __down_read(struct rw_semaphore *sem) { - if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0)) + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) rwsem_down_read_failed(sem); } @@ -58,7 +58,7 @@ static inline void __down_write(struct rw_semaphore *sem) long tmp; tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count); + &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) rwsem_down_write_failed(sem); } @@ -68,7 +68,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) long tmp; tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count); + &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; @@ -91,7 +91,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; - tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count); + tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) rwsem_wake(sem); } @@ -102,7 +102,7 @@ static inline void __up_read(struct rw_semaphore *sem) static inline void __up_write(struct rw_semaphore *sem) { if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_long_t *)&sem->count) < 0)) + &sem->count) < 0)) rwsem_wake(sem); } @@ -120,8 +120,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ - tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, - (atomic_long_t *)&sem->count); + tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); if (tmp < 0) rwsem_downgrade_wake(sem); } -- cgit v1.2.3 From bcc9a76d5ac426bc45c9e863b1830347827ca77a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Jan 2017 21:33:35 -0500 Subject: locking/rwsem: Reinit wake_q after use In __rwsem_down_write_failed_common(), the same wake_q variable name is defined twice, with the inner wake_q hiding the one in outer scope. We can either use different names for the two wake_q's. Even better, we can use the same wake_q twice, if necessary. To enable the latter change, we need to define a new helper function wake_q_init() to enable reinitalization of wake_q after use. Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1485052415-9611-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f4f9d32f12b3..4e62b378bd65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,12 @@ struct wake_q_head { #define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); -- cgit v1.2.3 From 7e1f9467d1e48c64c27d6a32de1bfd1b9cdb1002 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 29 Jan 2017 07:42:12 -0800 Subject: sched/wait, rcuwait: Fix typo in comment Forgot to update the comment after renaming the call. Signed-off-by: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1485704532-9290-1-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/rcuwait.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 0e93d56c7ab2..a4ede51b3e7c 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -47,7 +47,7 @@ extern void rcuwait_wake_up(struct rcuwait *w); for (;;) { \ /* \ * Implicit barrier (A) pairs with (B) in \ - * rcuwait_trywake(). \ + * rcuwait_wake_up(). \ */ \ set_current_state(TASK_UNINTERRUPTIBLE); \ if (condition) \ -- cgit v1.2.3 From 0754445d71c37a7afd4f0790a9be4cf53c1b8cc4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 29 Jan 2017 07:15:31 -0800 Subject: sched/wake_q: Clarify queue reinit comment As of: bcc9a76d5ac ("locking/rwsem: Reinit wake_q after use") the comment regarding the list reinitialization no longer applies, update it with the new wake_q_init() helper. Signed-off-by: Davidlohr Bueso Acked-by: Waiman Long Cc: peterz@infradead.org Cc: longman@redhat.com Cc: akpm@linux-foundation.org Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20170129151531.GA2444@linux-80c1.suse Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4e62b378bd65..1cc0dede2122 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1000,8 +1000,8 @@ enum cpu_idle_type { * * The DEFINE_WAKE_Q macro declares and initializes the list head. * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function, where the fact that the queue is - * not used again will be easy to see by inspection. + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). * * Note that this can cause spurious wakeups. schedule() callers * must ensure the call is done inside a loop, confirming that the -- cgit v1.2.3 From f405df5de3170c00e5c54f8b7cf4766044a032ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:06:19 +0100 Subject: refcount_t: Introduce a special purpose refcount type Provide refcount_t, an atomic_t like primitive built just for refcounting. It provides saturation semantics such that overflow becomes impossible and thereby 'spurious' use-after-free is avoided. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 294 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 include/linux/refcount.h (limited to 'include') diff --git a/include/linux/refcount.h b/include/linux/refcount.h new file mode 100644 index 000000000000..600aadf9cca4 --- /dev/null +++ b/include/linux/refcount.h @@ -0,0 +1,294 @@ +#ifndef _LINUX_REFCOUNT_H +#define _LINUX_REFCOUNT_H + +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + */ + +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_REFCOUNT +#define REFCOUNT_WARN(cond, str) WARN_ON(cond) +#define __refcount_check __must_check +#else +#define REFCOUNT_WARN(cond, str) (void)(cond) +#define __refcount_check +#endif + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } + +static inline void refcount_set(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +static inline __refcount_check +bool refcount_add_not_zero(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (!val) + return false; + + if (unlikely(val == UINT_MAX)) + return true; + + new = val + i; + if (new < val) + new = UINT_MAX; + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +static inline void refcount_add(unsigned int i, refcount_t *r) +{ + REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + */ +static inline __refcount_check +bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + new = val + 1; + + if (!val) + return false; + + if (unlikely(!new)) + return true; + + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/* + * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object, will WARN when this is not so. + */ +static inline void refcount_inc(refcount_t *r) +{ + REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return false; + + new = val - i; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return !new; +} + +static inline __refcount_check +bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + +/* + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ +static inline +void refcount_dec(refcount_t *r) +{ + REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); +} + +/* + * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the + * success thereof. + * + * Like all decrement operations, it provides release memory order and provides + * a control dependency. + * + * It can be used like a try-delete operator; this explicit case is provided + * and not cmpxchg in generic, because that would allow implementing unsafe + * operations. + */ +static inline __refcount_check +bool refcount_dec_if_one(refcount_t *r) +{ + return atomic_cmpxchg_release(&r->refs, 1, 0) == 1; +} + +/* + * No atomic_t counterpart, it decrements unless the value is 1, in which case + * it will return false. + * + * Was often done like: atomic_add_unless(&var, -1, 1) + */ +static inline __refcount_check +bool refcount_dec_not_one(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return true; + + if (val == 1) + return false; + + new = val - 1; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return true; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return true; +} + +/* + * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail + * to decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) +{ + if (refcount_dec_not_one(r)) + return false; + + mutex_lock(lock); + if (!refcount_dec_and_test(r)) { + mutex_unlock(lock); + return false; + } + + return true; +} + +/* + * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock(lock); + if (!refcount_dec_and_test(r)) { + spin_unlock(lock); + return false; + } + + return true; +} + +#endif /* _LINUX_REFCOUNT_H */ -- cgit v1.2.3 From 10383aea2f445bce9b2a2b308def08134b438c8e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:06:19 +0100 Subject: kref: Implement 'struct kref' using refcount_t Use the refcount_t 'atomic' type to implement 'struct kref', this makes kref more robust by bringing saturation semantics. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kref.h | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/kref.h b/include/linux/kref.h index 9db9685fed84..f4156f88f557 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -15,17 +15,14 @@ #ifndef _KREF_H_ #define _KREF_H_ -#include -#include -#include -#include #include +#include struct kref { - atomic_t refcount; + refcount_t refcount; }; -#define KREF_INIT(n) { .refcount = ATOMIC_INIT(n), } +#define KREF_INIT(n) { .refcount = REFCOUNT_INIT(n), } /** * kref_init - initialize object. @@ -33,12 +30,12 @@ struct kref { */ static inline void kref_init(struct kref *kref) { - atomic_set(&kref->refcount, 1); + refcount_set(&kref->refcount, 1); } -static inline int kref_read(const struct kref *kref) +static inline unsigned int kref_read(const struct kref *kref) { - return atomic_read(&kref->refcount); + return refcount_read(&kref->refcount); } /** @@ -47,11 +44,7 @@ static inline int kref_read(const struct kref *kref) */ static inline void kref_get(struct kref *kref) { - /* If refcount was 0 before incrementing then we have a race - * condition when this kref is freeing by some other thread right now. - * In this case one should use kref_get_unless_zero() - */ - WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2); + refcount_inc(&kref->refcount); } /** @@ -75,7 +68,7 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) { WARN_ON(release == NULL); - if (atomic_dec_and_test(&kref->refcount)) { + if (refcount_dec_and_test(&kref->refcount)) { release(kref); return 1; } @@ -88,7 +81,7 @@ static inline int kref_put_mutex(struct kref *kref, { WARN_ON(release == NULL); - if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) { + if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { release(kref); return 1; } @@ -101,7 +94,7 @@ static inline int kref_put_lock(struct kref *kref, { WARN_ON(release == NULL); - if (atomic_dec_and_lock(&kref->refcount, lock)) { + if (refcount_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; } @@ -126,6 +119,6 @@ static inline int kref_put_lock(struct kref *kref, */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { - return atomic_add_unless(&kref->refcount, 1, 0); + return refcount_inc_not_zero(&kref->refcount); } #endif /* _KREF_H_ */ -- cgit v1.2.3