diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2013-08-09 19:51:49 +0530 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2013-08-09 07:53:05 -0700 |
commit | 545ac13892ab391049a92108cf59a0d05de7e28c (patch) | |
tree | e993b90bcbedd44b77c895cf7fcee89ee5fe9d51 /arch/x86/include/asm/spinlock.h | |
parent | c095ba7224d8edc71dcef0d655911399a8bd4a3f (diff) |
x86, spinlock: Replace pv spinlocks with pv ticketlocks
Rather than outright replacing the entire spinlock implementation in
order to paravirtualize it, keep the ticket lock implementation but add
a couple of pvops hooks on the slow patch (long spin on lock, unlocking
a contended lock).
Ticket locks have a number of nice properties, but they also have some
surprising behaviours in virtual environments. They enforce a strict
FIFO ordering on cpus trying to take a lock; however, if the hypervisor
scheduler does not schedule the cpus in the correct order, the system can
waste a huge amount of time spinning until the next cpu can take the lock.
(See Thomas Friebel's talk "Prevent Guests from Spinning Around"
http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.)
To address this, we add two hooks:
- __ticket_spin_lock which is called after the cpu has been
spinning on the lock for a significant number of iterations but has
failed to take the lock (presumably because the cpu holding the lock
has been descheduled). The lock_spinning pvop is expected to block
the cpu until it has been kicked by the current lock holder.
- __ticket_spin_unlock, which on releasing a contended lock
(there are more cpus with tail tickets), it looks to see if the next
cpu is blocked and wakes it if so.
When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub
functions causes all the extra code to go away.
Results:
=======
setup: 32 core machine with 32 vcpu KVM guest (HT off) with 8GB RAM
base = 3.11-rc
patched = base + pvspinlock V12
+-----------------+----------------+--------+
dbench (Throughput in MB/sec. Higher is better)
+-----------------+----------------+--------+
| base (stdev %)|patched(stdev%) | %gain |
+-----------------+----------------+--------+
| 15035.3 (0.3) |15150.0 (0.6) | 0.8 |
| 1470.0 (2.2) | 1713.7 (1.9) | 16.6 |
| 848.6 (4.3) | 967.8 (4.3) | 14.0 |
| 652.9 (3.5) | 685.3 (3.7) | 5.0 |
+-----------------+----------------+--------+
pvspinlock shows benefits for overcommit ratio > 1 for PLE enabled cases,
and undercommits results are flat
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Link: http://lkml.kernel.org/r/1376058122-8248-2-git-send-email-raghavendra.kt@linux.vnet.ibm.com
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Attilio Rao <attilio.rao@citrix.com>
[ Raghavendra: Changed SPIN_THRESHOLD, fixed redefinition of arch_spinlock_t]
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/include/asm/spinlock.h')
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 53 |
1 files changed, 43 insertions, 10 deletions
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 33692eaabab5..4d542444bea3 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -34,6 +34,35 @@ # define UNLOCK_LOCK_PREFIX #endif +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + +#ifndef CONFIG_PARAVIRT_SPINLOCKS + +static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, + __ticket_t ticket) +{ +} + +static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock, + __ticket_t ticket) +{ +} + +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + + +/* + * If a spinlock has someone waiting on it, then kick the appropriate + * waiting cpu. + */ +static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, + __ticket_t next) +{ + if (unlikely(lock->tickets.tail != next)) + ____ticket_unlock_kick(lock, next); +} + /* * Ticket locks are conceptually two parts, one indicating the current head of * the queue, and the other indicating the current tail. The lock is acquired @@ -47,19 +76,24 @@ * in the high part, because a wide xadd increment of the low part would carry * up and contaminate the high part. */ -static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock) { register struct __raw_tickets inc = { .tail = 1 }; inc = xadd(&lock->tickets, inc); for (;;) { - if (inc.head == inc.tail) - break; - cpu_relax(); - inc.head = ACCESS_ONCE(lock->tickets.head); + unsigned count = SPIN_THRESHOLD; + + do { + if (inc.head == inc.tail) + goto out; + cpu_relax(); + inc.head = ACCESS_ONCE(lock->tickets.head); + } while (--count); + __ticket_lock_spinning(lock, inc.tail); } - barrier(); /* make sure nothing creeps before the lock is taken */ +out: barrier(); /* make sure nothing creeps before the lock is taken */ } static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) @@ -78,7 +112,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { + __ticket_t next = lock->tickets.head + 1; + __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); + __ticket_unlock_kick(lock, next); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) @@ -95,8 +132,6 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) return (__ticket_t)(tmp.tail - tmp.head) > 1; } -#ifndef CONFIG_PARAVIRT_SPINLOCKS - static inline int arch_spin_is_locked(arch_spinlock_t *lock) { return __ticket_spin_is_locked(lock); @@ -129,8 +164,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, arch_spin_lock(lock); } -#endif /* CONFIG_PARAVIRT_SPINLOCKS */ - static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { while (arch_spin_is_locked(lock)) |