summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-08 16:12:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-08 16:12:03 -0700
commite1928328699a582a540b105e5f4c160832a7fdcb (patch)
treef36bb303b8648189d7b5a7feb27e58fe9fe3b9f0 /lib
parent46f1ec23a46940846f86a91c46f7119d8a8b5de1 (diff)
parent9156e545765e467e6268c4814cfa609ebb16237e (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle are: - rwsem scalability improvements, phase #2, by Waiman Long, which are rather impressive: "On a 2-socket 40-core 80-thread Skylake system with 40 reader and writer locking threads, the min/mean/max locking operations done in a 5-second testing window before the patchset were: 40 readers, Iterations Min/Mean/Max = 1,807/1,808/1,810 40 writers, Iterations Min/Mean/Max = 1,807/50,344/151,255 After the patchset, they became: 40 readers, Iterations Min/Mean/Max = 30,057/31,359/32,741 40 writers, Iterations Min/Mean/Max = 94,466/95,845/97,098" There's a lot of changes to the locking implementation that makes it similar to qrwlock, including owner handoff for more fair locking. Another microbenchmark shows how across the spectrum the improvements are: "With a locking microbenchmark running on 5.1 based kernel, the total locking rates (in kops/s) on a 2-socket Skylake system with equal numbers of readers and writers (mixed) before and after this patchset were: # of Threads Before Patch After Patch ------------ ------------ ----------- 2 2,618 4,193 4 1,202 3,726 8 802 3,622 16 729 3,359 32 319 2,826 64 102 2,744" The changes are extensive and the patch-set has been through several iterations addressing various locking workloads. There might be more regressions, but unless they are pathological I believe we want to use this new implementation as the baseline going forward. - jump-label optimizations by Daniel Bristot de Oliveira: the primary motivation was to remove IPI disturbance of isolated RT-workload CPUs, which resulted in the implementation of batched jump-label updates. Beyond the improvement of the real-time characteristics kernel, in one test this patchset improved static key update overhead from 57 msecs to just 1.4 msecs - which is a nice speedup as well. - atomic64_t cross-arch type cleanups by Mark Rutland: over the last ~10 years of atomic64_t existence the various types used by the APIs only had to be self-consistent within each architecture - which means they became wildly inconsistent across architectures. Mark puts and end to this by reworking all the atomic64 implementations to use 's64' as the base type for atomic64_t, and to ensure that this type is consistently used for parameters and return values in the API, avoiding further problems in this area. - A large set of small improvements to lockdep by Yuyang Du: type cleanups, output cleanups, function return type and othr cleanups all around the place. - A set of percpu ops cleanups and fixes by Peter Zijlstra. - Misc other changes - please see the Git log for more details" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (82 commits) locking/lockdep: increase size of counters for lockdep statistics locking/atomics: Use sed(1) instead of non-standard head(1) option locking/lockdep: Move mark_lock() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING x86/jump_label: Make tp_vec_nr static x86/percpu: Optimize raw_cpu_xchg() x86/percpu, sched/fair: Avoid local_clock() x86/percpu, x86/irq: Relax {set,get}_irq_regs() x86/percpu: Relax smp_processor_id() x86/percpu: Differentiate this_cpu_{}() and __this_cpu_{}() locking/rwsem: Guard against making count negative locking/rwsem: Adaptive disabling of reader optimistic spinning locking/rwsem: Enable time-based spinning on reader-owned rwsem locking/rwsem: Make rwsem->owner an atomic_long_t locking/rwsem: Enable readers spinning on writer locking/rwsem: Clarify usage of owner's nonspinaable bit locking/rwsem: Wake up almost all readers in wait queue locking/rwsem: More optimal RT task handling of null owner locking/rwsem: Always release wait_lock before waking up tasks locking/rwsem: Implement lock handoff to prevent lock starvation locking/rwsem: Make rwsem_spin_on_owner() return owner state ...
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--lib/atomic64.c32
2 files changed, 20 insertions, 20 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6629cab453e8..06d9c9d70385 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1095,7 +1095,7 @@ config PROVE_LOCKING
select DEBUG_SPINLOCK
select DEBUG_MUTEXES
select DEBUG_RT_MUTEXES if RT_MUTEXES
- select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
+ select DEBUG_RWSEMS
select DEBUG_WW_MUTEX_SLOWPATH
select DEBUG_LOCK_ALLOC
select TRACE_IRQFLAGS
@@ -1199,10 +1199,10 @@ config DEBUG_WW_MUTEX_SLOWPATH
config DEBUG_RWSEMS
bool "RW Semaphore debugging: basic checks"
- depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
+ depends on DEBUG_KERNEL
help
- This debugging feature allows mismatched rw semaphore locks and unlocks
- to be detected and reported.
+ This debugging feature allows mismatched rw semaphore locks
+ and unlocks to be detected and reported.
config DEBUG_LOCK_ALLOC
bool "Lock debugging: detect incorrect freeing of live locks"
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 7e6905751522..e98c85a99787 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -42,11 +42,11 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
}
-long long atomic64_read(const atomic64_t *v)
+s64 atomic64_read(const atomic64_t *v)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
@@ -55,7 +55,7 @@ long long atomic64_read(const atomic64_t *v)
}
EXPORT_SYMBOL(atomic64_read);
-void atomic64_set(atomic64_t *v, long long i)
+void atomic64_set(atomic64_t *v, s64 i)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
@@ -67,7 +67,7 @@ void atomic64_set(atomic64_t *v, long long i)
EXPORT_SYMBOL(atomic64_set);
#define ATOMIC64_OP(op, c_op) \
-void atomic64_##op(long long a, atomic64_t *v) \
+void atomic64_##op(s64 a, atomic64_t *v) \
{ \
unsigned long flags; \
raw_spinlock_t *lock = lock_addr(v); \
@@ -79,11 +79,11 @@ void atomic64_##op(long long a, atomic64_t *v) \
EXPORT_SYMBOL(atomic64_##op);
#define ATOMIC64_OP_RETURN(op, c_op) \
-long long atomic64_##op##_return(long long a, atomic64_t *v) \
+s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
{ \
unsigned long flags; \
raw_spinlock_t *lock = lock_addr(v); \
- long long val; \
+ s64 val; \
\
raw_spin_lock_irqsave(lock, flags); \
val = (v->counter c_op a); \
@@ -93,11 +93,11 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \
EXPORT_SYMBOL(atomic64_##op##_return);
#define ATOMIC64_FETCH_OP(op, c_op) \
-long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
{ \
unsigned long flags; \
raw_spinlock_t *lock = lock_addr(v); \
- long long val; \
+ s64 val; \
\
raw_spin_lock_irqsave(lock, flags); \
val = v->counter; \
@@ -130,11 +130,11 @@ ATOMIC64_OPS(xor, ^=)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-long long atomic64_dec_if_positive(atomic64_t *v)
+s64 atomic64_dec_if_positive(atomic64_t *v)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter - 1;
@@ -145,11 +145,11 @@ long long atomic64_dec_if_positive(atomic64_t *v)
}
EXPORT_SYMBOL(atomic64_dec_if_positive);
-long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
@@ -160,11 +160,11 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
}
EXPORT_SYMBOL(atomic64_cmpxchg);
-long long atomic64_xchg(atomic64_t *v, long long new)
+s64 atomic64_xchg(atomic64_t *v, s64 new)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
@@ -174,11 +174,11 @@ long long atomic64_xchg(atomic64_t *v, long long new)
}
EXPORT_SYMBOL(atomic64_xchg);
-long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
+s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;