From e23b257c293ce4bcc8cabb2aa3097b6ed8a8261a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 08:43:38 +0100
Subject: x86/irq: Call chip->irq_set_affinity in proper context

setup_ioapic_dest() calls irqchip->irq_set_affinity() completely
unprotected. That's wrong in several aspects:

 - it opens a race window where irq_set_affinity() can be interrupted and the
   irq chip left in unconsistent state.

 - it triggers a lockdep splat when we fix the vector race for 4.3+ because
   vector lock is taken with interrupts enabled.

The proper calling convention is irq descriptor lock held and interrupts
disabled.

Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601140919420.3575@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f25321894ad2..fdb0fbfb1197 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
 	const struct cpumask *mask;
+	struct irq_desc *desc;
 	struct irq_data *idata;
 	struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
 		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
 			continue;
 
-		idata = irq_get_irq_data(irq);
+		desc = irq_to_desc(irq);
+		raw_spin_lock_irq(&desc->lock);
+		idata = irq_desc_get_irq_data(desc);
 
 		/*
 		 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
 		/* Might be lapic_chip for irq 0 */
 		if (chip->irq_set_affinity)
 			chip->irq_set_affinity(idata, mask, false);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 }
 #endif
-- 
cgit v1.2.3


From 111abeba67e0dbdc26537429de9155e4f1d807d8 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 31 Dec 2015 16:30:44 +0000
Subject: x86/irq: Fix a race in x86_vector_free_irqs()

There's a race condition between

x86_vector_free_irqs()
{
	free_apic_chip_data(irq_data->chip_data);
	xxxxx	//irq_data->chip_data has been freed, but the pointer
		//hasn't been reset yet
	irq_domain_reset_irq_data(irq_data);
}

and

smp_irq_move_cleanup_interrupt()
{
	raw_spin_lock(&vector_lock);
	data = apic_chip_data(irq_desc_get_irq_data(desc));
	access data->xxxx	// may access freed memory
	raw_spin_unlock(&desc->lock);
}

which may cause smp_irq_move_cleanup_interrupt() to access freed memory.

Call irq_domain_reset_irq_data(), which clears the pointer with vector lock
held.

[ tglx: Free memory outside of lock held region. ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/1450880014-11741-3-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37da171..cf1e325b67ee 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -226,10 +226,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
 	struct irq_desc *desc;
-	unsigned long flags;
 	int cpu, vector;
 
-	raw_spin_lock_irqsave(&vector_lock, flags);
 	BUG_ON(!data->cfg.vector);
 
 	vector = data->cfg.vector;
@@ -239,10 +237,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress)) {
-		raw_spin_unlock_irqrestore(&vector_lock, flags);
+	if (likely(!data->move_in_progress))
 		return;
-	}
 
 	desc = irq_to_desc(irq);
 	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +251,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 		}
 	}
 	data->move_in_progress = 0;
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +271,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
+	struct apic_chip_data *apic_data;
 	struct irq_data *irq_data;
+	unsigned long flags;
 	int i;
 
 	for (i = 0; i < nr_irqs; i++) {
 		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
 		if (irq_data && irq_data->chip_data) {
+			raw_spin_lock_irqsave(&vector_lock, flags);
 			clear_irq_vector(virq + i, irq_data->chip_data);
-			free_apic_chip_data(irq_data->chip_data);
+			apic_data = irq_data->chip_data;
+			irq_domain_reset_irq_data(irq_data);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+			free_apic_chip_data(apic_data);
 #ifdef	CONFIG_X86_IO_APIC
 			if (virq + i < nr_legacy_irqs())
 				legacy_irq_data[virq + i] = NULL;
 #endif
-			irq_domain_reset_irq_data(irq_data);
 		}
 	}
 }
-- 
cgit v1.2.3


From 36f34c8c63da3e272fd66f91089228c22d2b6e8b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:45 +0000
Subject: x86/irq: Validate that irq descriptor is still active

In fixup_irqs() we unconditionally dereference the irq chip of an irq
descriptor. The descriptor might still be valid, but already cleaned up,
i.e. the chip removed. Add a check for this condition.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.236423282@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f8062aaf5df9..c0b58dd1ca04 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -470,6 +470,15 @@ void fixup_irqs(void)
 		}
 
 		chip = irq_data_get_irq_chip(data);
+		/*
+		 * The interrupt descriptor might have been cleaned up
+		 * already, but it is not yet removed from the radix tree
+		 */
+		if (!chip) {
+			raw_spin_unlock(&desc->lock);
+			continue;
+		}
+
 		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 			chip->irq_mask(data);
 
-- 
cgit v1.2.3


From 8a580f70f6936ec095da217018cdeeb5835c0207 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 31 Dec 2015 16:30:46 +0000
Subject: x86/irq: Do not use apic_chip_data.old_domain as temporary buffer

Function __assign_irq_vector() makes use of apic_chip_data.old_domain as a
temporary buffer, which is in the way of using apic_chip_data.old_domain for
synchronizing the vector cleanup with the vector assignement code.

Use a proper temporary cpumask for this.

[ tglx: Renamed the mask to searched_cpumask for clarity ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/1450880014-11741-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cf1e325b67ee..19082cf56616 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -126,6 +126,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	/* Only try and allocate irqs on cpus that are present */
 	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
+	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, vector, offset;
@@ -159,9 +160,9 @@ next:
 		}
 
 		if (unlikely(current_vector == vector)) {
-			cpumask_or(d->old_domain, d->old_domain,
+			cpumask_or(searched_cpumask, searched_cpumask,
 				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, d->old_domain);
+			cpumask_andnot(vector_cpumask, mask, searched_cpumask);
 			cpu = cpumask_first_and(vector_cpumask,
 						cpu_online_mask);
 			continue;
@@ -406,6 +407,7 @@ int __init arch_early_irq_init(void)
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
 }
-- 
cgit v1.2.3


From 433cbd57d190a1cdd02f243df41c3d7f55ec4b94 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:46 +0000
Subject: x86/irq: Reorganize the return path in assign_irq_vector

Use an explicit goto for the cases where we have success in the search/update
and return -ENOSPC if the search loop ends due to no space.

Preparatory patch for fixes. No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.403491024@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 19082cf56616..613b1cd8eecb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -118,13 +118,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
+	int cpu;
 
 	if (d->move_in_progress)
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
 	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -134,9 +133,8 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
 		if (cpumask_subset(vector_cpumask, d->domain)) {
-			err = 0;
 			if (cpumask_equal(vector_cpumask, d->domain))
-				break;
+				goto success;
 			/*
 			 * New cpumask using the vector is a proper subset of
 			 * the current in use mask. So cleanup the vector
@@ -147,7 +145,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
 			cpumask_and(d->domain, d->domain, vector_cpumask);
-			break;
+			goto success;
 		}
 
 		vector = current_vector;
@@ -187,17 +185,13 @@ next:
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
-		err = 0;
-		break;
+		goto success;
 	}
+	return -ENOSPC;
 
-	if (!err) {
-		/* cache destination APIC IDs into cfg->dest_apicid */
-		err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-						   &d->cfg.dest_apicid);
-	}
-
-	return err;
+success:
+	/* cache destination APIC IDs into cfg->dest_apicid */
+	return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid);
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
-- 
cgit v1.2.3


From 95ffeb4b5baca266e1d0d2bc90f1513e6f419cdd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:47 +0000
Subject: x86/irq: Reorganize the search in assign_irq_vector

Split out the code which advances the target cpu for the search so we can
reuse it for the next patch which adds an early validation check for the
vectormask which we get from the apic.

Add comments while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.484562040@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 613b1cd8eecb..cef31955ab18 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -157,14 +157,9 @@ next:
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
 
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(searched_cpumask, searched_cpumask,
-				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, searched_cpumask);
-			cpu = cpumask_first_and(vector_cpumask,
-						cpu_online_mask);
-			continue;
-		}
+		/* If the search wrapped around, try the next cpu */
+		if (unlikely(current_vector == vector))
+			goto next_cpu;
 
 		if (test_bit(vector, used_vectors))
 			goto next;
@@ -186,6 +181,19 @@ next:
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
 		goto success;
+
+next_cpu:
+		/*
+		 * We exclude the current @vector_cpumask from the requested
+		 * @mask and try again with the next online cpu in the
+		 * result. We cannot modify @mask, so we use @vector_cpumask
+		 * as a temporary buffer here as it will be reassigned when
+		 * calling apic->vector_allocation_domain() above.
+		 */
+		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+		continue;
 	}
 	return -ENOSPC;
 
-- 
cgit v1.2.3


From 3716fd27a604d61a91cda47083504971486b80f1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:48 +0000
Subject: x86/irq: Check vector allocation early

__assign_irq_vector() uses the vector_cpumask which is assigned by
apic->vector_allocation_domain() without doing basic sanity checks. That can
result in a situation where the final assignement of a newly found vector
fails in apic->cpu_mask_to_apicid_and(). So we have to do rollbacks for no
reason.

apic->cpu_mask_to_apicid_and() only fails if

  vector_cpumask & requested_cpumask & cpu_online_mask

is empty.

Check for this condition right away and if the result is empty try immediately
the next possible cpu in the requested mask. So in case of a failure the old
setting is unchanged and we can remove the rollback code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.561877324@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cef31955ab18..940e18d4dbcd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask, searched_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -130,8 +130,20 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, vector, offset;
 
+		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+		/*
+		 * Clear the offline cpus from @vector_cpumask for searching
+		 * and verify whether the result overlaps with @mask. If true,
+		 * then the call to apic->cpu_mask_to_apicid_and() will
+		 * succeed as well. If not, no point in trying to find a
+		 * vector in this mask.
+		 */
+		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+		if (!cpumask_intersects(vector_searchmask, mask))
+			goto next_cpu;
+
 		if (cpumask_subset(vector_cpumask, d->domain)) {
 			if (cpumask_equal(vector_cpumask, d->domain))
 				goto success;
@@ -164,7 +176,7 @@ next:
 		if (test_bit(vector, used_vectors))
 			goto next;
 
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+		for_each_cpu(new_cpu, vector_searchmask) {
 			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
 				goto next;
 		}
@@ -176,7 +188,7 @@ next:
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
 		}
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
@@ -198,8 +210,14 @@ next_cpu:
 	return -ENOSPC;
 
 success:
-	/* cache destination APIC IDs into cfg->dest_apicid */
-	return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid);
+	/*
+	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+	 * as we already established, that mask & d->domain & cpu_online_mask
+	 * is not empty.
+	 */
+	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+					    &d->cfg.dest_apicid));
+	return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -409,6 +427,7 @@ int __init arch_early_irq_init(void)
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
@@ -498,14 +517,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
 		return -EINVAL;
 
 	err = assign_irq_vector(irq, data, dest);
-	if (err) {
-		if (assign_irq_vector(irq, data,
-				      irq_data_get_affinity_mask(irq_data)))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	return IRQ_SET_MASK_OK;
+	return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
-- 
cgit v1.2.3


From 9ac15b7a8af4cf3337a101498c0ed690d23ade75 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:49 +0000
Subject: x86/irq: Copy vectormask instead of an AND operation

In the case that the new vector mask is a subset of the existing mask there is
no point to do a AND operation of currentmask & newmask. The result is
newmask. So we can simply copy the new mask to the current mask and be done
with it. Preparatory patch for further consolidation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.640253454@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 940e18d4dbcd..1bd29c624531 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -156,7 +156,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 				       vector_cpumask);
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_and(d->domain, d->domain, vector_cpumask);
+			cpumask_copy(d->domain, vector_cpumask);
 			goto success;
 		}
 
-- 
cgit v1.2.3


From ab25ac02148b600e645f77cfb8b8ea415ed75bb4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:49 +0000
Subject: x86/irq: Get rid of code duplication

Reusing an existing vector and assigning a new vector has duplicated
code. Consolidate it.

This is also a preparatory patch for finally plugging the cleanup race.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.721599216@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1bd29c624531..fccfa3f5545c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -118,7 +118,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu;
+	int cpu, vector;
 
 	if (d->move_in_progress)
 		return -EBUSY;
@@ -128,7 +128,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
+		int new_cpu, offset;
 
 		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
@@ -148,16 +148,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			if (cpumask_equal(vector_cpumask, d->domain))
 				goto success;
 			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
+			 * Mark the cpus which are not longer in the mask for
+			 * cleanup.
 			 */
-			cpumask_andnot(d->old_domain, d->domain,
-				       vector_cpumask);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_copy(d->domain, vector_cpumask);
-			goto success;
+			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+			vector = d->cfg.vector;
+			goto update;
 		}
 
 		vector = current_vector;
@@ -183,16 +179,12 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (d->cfg.vector) {
+		/* Schedule the old vector for cleanup on all cpus */
+		if (d->cfg.vector)
 			cpumask_copy(d->old_domain, d->domain);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-		}
 		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		d->cfg.vector = vector;
-		cpumask_copy(d->domain, vector_cpumask);
-		goto success;
+		goto update;
 
 next_cpu:
 		/*
@@ -209,6 +201,11 @@ next_cpu:
 	}
 	return -ENOSPC;
 
+update:
+	/* Cleanup required ? */
+	d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask);
+	d->cfg.vector = vector;
+	cpumask_copy(d->domain, vector_cpumask);
 success:
 	/*
 	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
-- 
cgit v1.2.3


From 847667ef10356b824a11c853fc8a8b1b437b6a8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:50 +0000
Subject: x86/irq: Remove offline cpus from vector cleanup

No point of keeping offline cpus in the cleanup mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.808642683@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index fccfa3f5545c..68d18b338e3a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -202,8 +202,12 @@ next_cpu:
 	return -ENOSPC;
 
 update:
-	/* Cleanup required ? */
-	d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask);
+	/*
+	 * Exclude offline cpus from the cleanup mask and set the
+	 * move_in_progress flag when the result is not empty.
+	 */
+	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+	d->move_in_progress = !cpumask_empty(d->old_domain);
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:
-- 
cgit v1.2.3


From c1684f5035b60e9f98566493e869496fb5de1d89 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:51 +0000
Subject: x86/irq: Clear move_in_progress before sending cleanup IPI

send_cleanup_vector() fiddles with the old_domain mask unprotected because it
relies on the protection by the move_in_progress flag. But this is fatal, as
the flag is reset after the IPI has been sent. So a cpu which receives the IPI
can still see the flag set and therefor ignores the cleanup request. If no
other cleanup request happens then the vector stays stale on that cpu and in
case of an irq removal the vector still persists. That can lead to use after
free when the next cleanup IPI happens.

Protect the code with vector_lock and clear move_in_progress before sending
the IPI.

This does not plug the race which Joe reported because:

CPU0                          CPU1                      CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
                              set_affinity()
                              assign_irq_vector()
                              lock_vector()             handle_IPI
                              move_in_progress = 1      lock_vector()
                              unlock_vector()
                                                        move_in_progress == 1

The full fix comes with a later patch.

Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.892412198@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 68d18b338e3a..ed62f9c3f785 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -532,6 +532,8 @@ static void __send_cleanup_vector(struct apic_chip_data *data)
 {
 	cpumask_var_t cleanup_mask;
 
+	raw_spin_lock(&vector_lock);
+	data->move_in_progress = 0;
 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
 		unsigned int i;
 
@@ -543,7 +545,7 @@ static void __send_cleanup_vector(struct apic_chip_data *data)
 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		free_cpumask_var(cleanup_mask);
 	}
-	data->move_in_progress = 0;
+	raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
-- 
cgit v1.2.3


From 5da0c1217f05d2ccc9a8ed6e6e5c23a8a1d24dd6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:52 +0000
Subject: x86/irq: Remove the cpumask allocation from send_cleanup_vector()

There is no need to allocate a new cpumask for sending the cleanup vector. The
old_domain mask is now protected by the vector_lock, so we can safely remove
the offline cpus from it and send the IPI with the resulting mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.967993932@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ed62f9c3f785..91dc2742cfb1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -530,21 +530,11 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-	cpumask_var_t cleanup_mask;
-
 	raw_spin_lock(&vector_lock);
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
 	data->move_in_progress = 0;
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-
-		for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i),
-					    IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
+	if (!cpumask_empty(data->old_domain))
+		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
 	raw_spin_unlock(&vector_lock);
 }
 
-- 
cgit v1.2.3


From 56d7d2f4bbd00fb198b7907cb3ab657d06115a42 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:52 +0000
Subject: x86/irq: Remove outgoing CPU from vector cleanup mask

We want to synchronize new vector assignments with a pending cleanup. Remove a
dying cpu from a pending cleanup mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.045961667@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 91dc2742cfb1..a7fa11e49582 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -633,9 +633,23 @@ void irq_complete_move(struct irq_cfg *cfg)
 void irq_force_complete_move(int irq)
 {
 	struct irq_cfg *cfg = irq_cfg(irq);
+	struct apic_chip_data *data;
+
+	if (!cfg)
+		return;
 
-	if (cfg)
-		__irq_complete_move(cfg, cfg->vector);
+	__irq_complete_move(cfg, cfg->vector);
+
+	/*
+	 * Remove this cpu from the cleanup mask. The IPI might have been sent
+	 * just before the cpu was removed from the offline mask, but has not
+	 * been processed because the CPU has interrupts disabled and is on
+	 * the way out.
+	 */
+	raw_spin_lock(&vector_lock);
+	data = container_of(cfg, struct apic_chip_data, cfg);
+	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
+	raw_spin_unlock(&vector_lock);
 }
 #endif
 
-- 
cgit v1.2.3


From 90a2282e23f0522e4b3f797ad447c5e91bf7fe32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:53 +0000
Subject: x86/irq: Call irq_force_move_complete with irq descriptor

First of all there is no point in looking up the irq descriptor again, but we
also need the descriptor for the final cleanup race fix in the next
patch. Make that change seperate. No functional difference.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.125211743@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 11 +++++++----
 arch/x86/kernel/irq.c         |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a7fa11e49582..5f7883578880 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -630,10 +630,14 @@ void irq_complete_move(struct irq_cfg *cfg)
 	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
-	struct apic_chip_data *data;
+	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+	struct apic_chip_data *data = apic_chip_data(irqdata);
+	struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
 	if (!cfg)
 		return;
@@ -647,7 +651,6 @@ void irq_force_complete_move(int irq)
 	 * the way out.
 	 */
 	raw_spin_lock(&vector_lock);
-	data = container_of(cfg, struct apic_chip_data, cfg);
 	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
 	raw_spin_unlock(&vector_lock);
 }
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c0b58dd1ca04..61521dc19c10 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -462,7 +462,7 @@ void fixup_irqs(void)
 		 * non intr-remapping case, we can't wait till this interrupt
 		 * arrives at this cpu before completing the irq move.
 		 */
-		irq_force_complete_move(irq);
+		irq_force_complete_move(desc);
 
 		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
-- 
cgit v1.2.3


From 98229aa36caa9c769b13565523de9b813013c703 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:54 +0000
Subject: x86/irq: Plug vector cleanup race

We still can end up with a stale vector due to the following:

CPU0                          CPU1                      CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
                              set_affinity()
                              assign_irq_vector()
                              lock_vector()             handle_IPI
                              move_in_progress = 1      lock_vector()
                              unlock_vector()
                                                        move_in_progress == 1

So we need to serialize the vector assignment against a pending cleanup. The
solution is rather simple now. We not only check for the move_in_progress flag
in assign_irq_vector(), we also check whether there is still a cleanup pending
in the old_domain cpumask. If so, we return -EBUSY to the caller and let him
deal with it. Though we have to be careful in the cpu unplug case. If the
cleanout has not yet completed then the following setaffinity() call would
return -EBUSY. Add code which prevents this.

Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com

Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 63 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 10 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5f7883578880..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	static int current_offset = VECTOR_OFFSET_START % 16;
 	int cpu, vector;
 
-	if (d->move_in_progress)
+	/*
+	 * If there is still a move in progress or the previous move has not
+	 * been cleaned up completely, tell the caller to come back later.
+	 */
+	if (d->move_in_progress ||
+	    cpumask_intersects(d->old_domain, cpu_online_mask))
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress))
+	/*
+	 * If move is in progress or the old_domain mask is not empty,
+	 * i.e. the cleanup IPI has not been processed yet, we need to remove
+	 * the old references to desc from all cpus vector tables.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain))
 		return;
 
 	desc = irq_to_desc(irq);
@@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 
 		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
+		 * Nothing to cleanup if irq migration is in progress
+		 * or this cpu is not set in the cleanup mask.
 		 */
-		if (data->move_in_progress)
+		if (data->move_in_progress ||
+		    !cpumask_test_cpu(me, data->old_domain))
 			goto unlock;
 
+		/*
+		 * We have two cases to handle here:
+		 * 1) vector is unchanged but the target mask got reduced
+		 * 2) vector and the target mask has changed
+		 *
+		 * #1 is obvious, but in #2 we have two vectors with the same
+		 * irq descriptor: the old and the new vector. So we need to
+		 * make sure that we only cleanup the old vector. The new
+		 * vector has the current @vector number in the config and
+		 * this cpu is part of the target mask. We better leave that
+		 * one alone.
+		 */
 		if (vector == data->cfg.vector &&
 		    cpumask_test_cpu(me, data->domain))
 			goto unlock;
@@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 		}
 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		cpumask_clear_cpu(me, data->old_domain);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
@@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc)
 	__irq_complete_move(cfg, cfg->vector);
 
 	/*
-	 * Remove this cpu from the cleanup mask. The IPI might have been sent
-	 * just before the cpu was removed from the offline mask, but has not
-	 * been processed because the CPU has interrupts disabled and is on
-	 * the way out.
+	 * This is tricky. If the cleanup of @data->old_domain has not been
+	 * done yet, then the following setaffinity call will fail with
+	 * -EBUSY. This can leave the interrupt in a stale state.
+	 *
+	 * The cleanup cannot make progress because we hold @desc->lock. So in
+	 * case @data->old_domain is not yet cleaned up, we need to drop the
+	 * lock and acquire it again. @desc cannot go away, because the
+	 * hotplug code holds the sparse irq lock.
 	 */
 	raw_spin_lock(&vector_lock);
-	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
+	/* Clean out all offline cpus (including ourself) first. */
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+	while (!cpumask_empty(data->old_domain)) {
+		raw_spin_unlock(&vector_lock);
+		raw_spin_unlock(&desc->lock);
+		cpu_relax();
+		raw_spin_lock(&desc->lock);
+		/*
+		 * Reevaluate apic_chip_data. It might have been cleared after
+		 * we dropped @desc->lock.
+		 */
+		data = apic_chip_data(irqdata);
+		if (!data)
+			return;
+		raw_spin_lock(&vector_lock);
+	}
 	raw_spin_unlock(&vector_lock);
 }
 #endif
-- 
cgit v1.2.3


From 3fda5bb420e79b357328b358409e4c547d8f0a18 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 15 Jan 2016 22:11:07 +0200
Subject: x86/platform/intel-mid: Enable 64-bit build

Intel Tangier SoC is known to have 64-bit dual core CPU. Enable
64-bit build for it.

The kernel has been tested on Intel Edison board:

	Linux buildroot 4.4.0-next-20160115+ #25 SMP Fri Jan 15 22:03:19 EET 2016 x86_64 GNU/Linux

	processor       : 0
	vendor_id       : GenuineIntel
	cpu family      : 6
	model           : 74
	model name      : Genuine Intel(R) CPU   4000  @  500MHz
	stepping        : 8

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1452888668-147116-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/head64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f129a9af6357..2c0f3407bd1f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
 	reserve_ebda_region();
 
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_INTEL_MID:
+		x86_intel_mid_early_setup();
+		break;
+	default:
+		break;
+	}
+
 	start_kernel();
 }
-- 
cgit v1.2.3


From d394f2d9d8e1e7b4959819344baf67b5995da9b0 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Fri, 11 Dec 2015 14:59:45 -0600
Subject: x86/platform/UV: Remove EFI memmap quirk for UV2+

Commit a5d90c923bcf ("x86/efi: Quirk out SGI UV") added a quirk
to efi_apply_memmap_quirks to force SGI UV systems to fall back
to the old EFI memmap mechanism.  We have a BIOS fix for this
issue on all systems except for UV1.  This commit fixes up the
EFI quirk/MMR mapping code so that we only apply the special
case to UV1 hardware.

Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hedi Berriche <hedi@sgi.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1449867585-189233-2-git-send-email-athorlton@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d760c6bb37b5..624db00583f4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
 		return;
 	}
 	pr_info("UV: Found %s hub\n", hub);
-	map_low_mmrs();
+
+	/* We now only need to map the MMRs on UV1 */
+	if (is_uv1_hub())
+		map_low_mmrs();
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
-- 
cgit v1.2.3


From 0e1eb0a1f5530bd751fe5bd2c62caa470aaa9643 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 7 Jan 2016 08:25:46 +0100
Subject: perf/x86: add Intel SkyLake uncore IMC PMU support

This patch enables the uncore_imc PMU for Intel
SkyLake Desktop processors (Core i7-6700, model 94).

It is possible to compute memory read/write bandwidth
using:

  $ perf stat -a -e uncore_imc/data_reads/,uncore_imc/data_writes/ ....

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Link: http://lkml.kernel.org/r/1452151546-8853-1-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c     |  3 +++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h     |  1 +
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 20 ++++++++++++++++++++
 3 files changed, 24 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index f97f8075bf04..3bf41d413775 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
 	case 87: /* Knights Landing */
 		ret = knl_uncore_pci_init();
 		break;
+	case 94: /* SkyLake */
+		ret = skl_uncore_pci_init();
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 07aa2d6bd710..a7086b862156 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 0b934820fafd..2bd030ddd0db 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
 	{ /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
 	.name		= "snb_uncore",
 	.id_table	= snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
 	.id_table	= bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+	.name		= "skl_uncore",
+	.id_table	= skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
 	__u32 pci_id;
 	struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
 	{  /* end marker */ }
 };
 
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
 	return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
-- 
cgit v1.2.3


From e01d8718de4170373cd7fbf5cf6f9cb61cebb1e9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Jan 2016 23:24:29 +0100
Subject: perf/x86: Fix uninitialized value usage

When calling intel_alt_er() with .idx != EXTRA_REG_RSP_* we will not
initialize alt_idx and then use this uninitialized value to index an
array.

When that is not fatal, it can result in an infinite loop in its
caller __intel_shared_reg_get_constraints(), with IRQs disabled.

Alternative error modes are random memory corruption due to the
cpuc->shared_regs->regs[] array overrun, which manifest in either
get_constraints or put_constraints doing weird stuff.

Only took 6 hours of painful debugging to find this. Neither GCC nor
Smatch warnings flagged this bug.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: ae3f011fc251 ("perf/x86/intel: Fix SLM MSR_OFFCORE_RSP1 valid_mask")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a667078a5180..4264a084e59b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx, u64 config)
 {
-	int alt_idx;
+	int alt_idx = idx;
+
 	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
 		return idx;
 
-- 
cgit v1.2.3


From 8f04b8536f0c94f8999b65cd1c6c7523116a00ae Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Jan 2016 23:31:09 +0100
Subject: perf/x86: De-obfuscate code

Get rid of the 'onln' obfuscation.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 4264a084e59b..fed2ab1f1065 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2898,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
 		return;
 
 	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
 		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
 			struct intel_shared_regs *pc;
 
 			pc = per_cpu(cpu_hw_events, i).shared_regs;
 			if (pc && pc->core_id == core_id) {
-				*onln = cpuc->shared_regs;
+				cpuc->kfree_on_online[0] = cpuc->shared_regs;
 				cpuc->shared_regs = pc;
 				break;
 			}
-- 
cgit v1.2.3


From 8bc9162cd2bf37036c486b384ac445b59b334b04 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 16 Feb 2016 23:04:41 +0100
Subject: perf/x86/amd/uncore: Plug reference leak

In the error path of amd_uncore_cpu_up_prepare() the newly allocated uncore
struct is freed, but the percpu pointer still references it. Set it to NULL.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1602162302170.19512@nanos
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_uncore.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 49742746a6c9..8836fc9fa84b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -323,6 +323,8 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 	return 0;
 
 fail:
+	if (amd_uncore_nb)
+		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 	kfree(uncore_nb);
 	return -ENOMEM;
 }
-- 
cgit v1.2.3