diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/locking/qspinlock.c | 34 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 4 |
2 files changed, 27 insertions, 11 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce6af1ee2cac..8a8c3c208c5e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -74,12 +74,24 @@ */ #include "mcs_spinlock.h" +#define MAX_NODES 4 +/* + * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in + * size and four of them will fit nicely in one 64-byte cacheline. For + * pvqspinlock, however, we need more space for extra data. To accommodate + * that, we insert two more long words to pad it up to 32 bytes. IOW, only + * two of them can fit in a cacheline in this case. That is OK as it is rare + * to have more than 2 levels of slowpath nesting in actual use. We don't + * want to penalize pvqspinlocks to optimize for a rare case in native + * qspinlocks. + */ +struct qnode { + struct mcs_spinlock mcs; #ifdef CONFIG_PARAVIRT_SPINLOCKS -#define MAX_NODES 8 -#else -#define MAX_NODES 4 + long reserved[2]; #endif +}; /* * The pending bit spinning loop count. @@ -101,7 +113,7 @@ * * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); +static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; - return per_cpu_ptr(&mcs_nodes[idx], cpu); + return per_cpu_ptr(&qnodes[idx].mcs, cpu); +} + +static inline __pure +struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) +{ + return &((struct qnode *)base + idx)->mcs; } #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) @@ -390,11 +408,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) queue: qstat_inc(qstat_lock_slowpath, true); pv_queue: - node = this_cpu_ptr(&mcs_nodes[0]); + node = this_cpu_ptr(&qnodes[0].mcs); idx = node->count++; tail = encode_tail(smp_processor_id(), idx); - node += idx; + node = grab_mcs_node(node, idx); /* * Keep counts of non-zero index values: @@ -534,7 +552,7 @@ release: /* * release the node */ - __this_cpu_dec(mcs_nodes[0].count); + __this_cpu_dec(qnodes[0].mcs.count); } EXPORT_SYMBOL(queued_spin_lock_slowpath); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 5a0cf5f9008c..0130e488ebfe 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -49,8 +49,6 @@ enum vcpu_state { struct pv_node { struct mcs_spinlock mcs; - struct mcs_spinlock __res[3]; - int cpu; u8 state; }; @@ -281,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock)); + BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); pn->cpu = smp_processor_id(); pn->state = vcpu_running; |