From e23b257c293ce4bcc8cabb2aa3097b6ed8a8261a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 08:43:38 +0100 Subject: x86/irq: Call chip->irq_set_affinity in proper context setup_ioapic_dest() calls irqchip->irq_set_affinity() completely unprotected. That's wrong in several aspects: - it opens a race window where irq_set_affinity() can be interrupted and the irq chip left in unconsistent state. - it triggers a lockdep splat when we fix the vector race for 4.3+ because vector lock is taken with interrupts enabled. The proper calling convention is irq descriptor lock held and interrupts disabled. Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: Joe Lawrence Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601140919420.3575@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f25321894ad2..fdb0fbfb1197 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; const struct cpumask *mask; + struct irq_desc *desc; struct irq_data *idata; struct irq_chip *chip; @@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void) if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) continue; - idata = irq_get_irq_data(irq); + desc = irq_to_desc(irq); + raw_spin_lock_irq(&desc->lock); + idata = irq_desc_get_irq_data(desc); /* * Honour affinities which have been set in early boot @@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void) /* Might be lapic_chip for irq 0 */ if (chip->irq_set_affinity) chip->irq_set_affinity(idata, mask, false); + raw_spin_unlock_irq(&desc->lock); } } #endif -- cgit v1.2.3 From 111abeba67e0dbdc26537429de9155e4f1d807d8 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 31 Dec 2015 16:30:44 +0000 Subject: x86/irq: Fix a race in x86_vector_free_irqs() There's a race condition between x86_vector_free_irqs() { free_apic_chip_data(irq_data->chip_data); xxxxx //irq_data->chip_data has been freed, but the pointer //hasn't been reset yet irq_domain_reset_irq_data(irq_data); } and smp_irq_move_cleanup_interrupt() { raw_spin_lock(&vector_lock); data = apic_chip_data(irq_desc_get_irq_data(desc)); access data->xxxx // may access freed memory raw_spin_unlock(&desc->lock); } which may cause smp_irq_move_cleanup_interrupt() to access freed memory. Call irq_domain_reset_irq_data(), which clears the pointer with vector lock held. [ tglx: Free memory outside of lock held region. ] Signed-off-by: Jiang Liu Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/1450880014-11741-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 908cb37da171..cf1e325b67ee 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -226,10 +226,8 @@ static int assign_irq_vector_policy(int irq, int node, static void clear_irq_vector(int irq, struct apic_chip_data *data) { struct irq_desc *desc; - unsigned long flags; int cpu, vector; - raw_spin_lock_irqsave(&vector_lock, flags); BUG_ON(!data->cfg.vector); vector = data->cfg.vector; @@ -239,10 +237,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) data->cfg.vector = 0; cpumask_clear(data->domain); - if (likely(!data->move_in_progress)) { - raw_spin_unlock_irqrestore(&vector_lock, flags); + if (likely(!data->move_in_progress)) return; - } desc = irq_to_desc(irq); for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { @@ -255,7 +251,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) } } data->move_in_progress = 0; - raw_spin_unlock_irqrestore(&vector_lock, flags); } void init_irq_alloc_info(struct irq_alloc_info *info, @@ -276,19 +271,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) static void x86_vector_free_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { + struct apic_chip_data *apic_data; struct irq_data *irq_data; + unsigned long flags; int i; for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); if (irq_data && irq_data->chip_data) { + raw_spin_lock_irqsave(&vector_lock, flags); clear_irq_vector(virq + i, irq_data->chip_data); - free_apic_chip_data(irq_data->chip_data); + apic_data = irq_data->chip_data; + irq_domain_reset_irq_data(irq_data); + raw_spin_unlock_irqrestore(&vector_lock, flags); + free_apic_chip_data(apic_data); #ifdef CONFIG_X86_IO_APIC if (virq + i < nr_legacy_irqs()) legacy_irq_data[virq + i] = NULL; #endif - irq_domain_reset_irq_data(irq_data); } } } -- cgit v1.2.3 From 36f34c8c63da3e272fd66f91089228c22d2b6e8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:45 +0000 Subject: x86/irq: Validate that irq descriptor is still active In fixup_irqs() we unconditionally dereference the irq chip of an irq descriptor. The descriptor might still be valid, but already cleaned up, i.e. the chip removed. Add a check for this condition. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Joe Lawrence Cc: Jeremiah Mahler Cc: Borislav Petkov Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.236423282@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f8062aaf5df9..c0b58dd1ca04 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -470,6 +470,15 @@ void fixup_irqs(void) } chip = irq_data_get_irq_chip(data); + /* + * The interrupt descriptor might have been cleaned up + * already, but it is not yet removed from the radix tree + */ + if (!chip) { + raw_spin_unlock(&desc->lock); + continue; + } + if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); -- cgit v1.2.3 From 8a580f70f6936ec095da217018cdeeb5835c0207 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 31 Dec 2015 16:30:46 +0000 Subject: x86/irq: Do not use apic_chip_data.old_domain as temporary buffer Function __assign_irq_vector() makes use of apic_chip_data.old_domain as a temporary buffer, which is in the way of using apic_chip_data.old_domain for synchronizing the vector cleanup with the vector assignement code. Use a proper temporary cpumask for this. [ tglx: Renamed the mask to searched_cpumask for clarity ] Signed-off-by: Jiang Liu Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/1450880014-11741-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index cf1e325b67ee..19082cf56616 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -31,7 +31,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask; +static cpumask_var_t vector_cpumask, searched_cpumask; static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -126,6 +126,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; cpumask_clear(d->old_domain); + cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; @@ -159,9 +160,9 @@ next: } if (unlikely(current_vector == vector)) { - cpumask_or(d->old_domain, d->old_domain, + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); - cpumask_andnot(vector_cpumask, mask, d->old_domain); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); continue; @@ -406,6 +407,7 @@ int __init arch_early_irq_init(void) arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); } -- cgit v1.2.3 From 433cbd57d190a1cdd02f243df41c3d7f55ec4b94 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:46 +0000 Subject: x86/irq: Reorganize the return path in assign_irq_vector Use an explicit goto for the cases where we have success in the search/update and return -ENOSPC if the search loop ends due to no space. Preparatory patch for fixes. No functional change. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.403491024@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 19082cf56616..613b1cd8eecb 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -118,13 +118,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, err; + int cpu; if (d->move_in_progress) return -EBUSY; /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; cpumask_clear(d->old_domain); cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); @@ -134,9 +133,8 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, apic->vector_allocation_domain(cpu, vector_cpumask, mask); if (cpumask_subset(vector_cpumask, d->domain)) { - err = 0; if (cpumask_equal(vector_cpumask, d->domain)) - break; + goto success; /* * New cpumask using the vector is a proper subset of * the current in use mask. So cleanup the vector @@ -147,7 +145,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); cpumask_and(d->domain, d->domain, vector_cpumask); - break; + goto success; } vector = current_vector; @@ -187,17 +185,13 @@ next: per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); - err = 0; - break; + goto success; } + return -ENOSPC; - if (!err) { - /* cache destination APIC IDs into cfg->dest_apicid */ - err = apic->cpu_mask_to_apicid_and(mask, d->domain, - &d->cfg.dest_apicid); - } - - return err; +success: + /* cache destination APIC IDs into cfg->dest_apicid */ + return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid); } static int assign_irq_vector(int irq, struct apic_chip_data *data, -- cgit v1.2.3 From 95ffeb4b5baca266e1d0d2bc90f1513e6f419cdd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:47 +0000 Subject: x86/irq: Reorganize the search in assign_irq_vector Split out the code which advances the target cpu for the search so we can reuse it for the next patch which adds an early validation check for the vectormask which we get from the apic. Add comments while at it. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.484562040@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 613b1cd8eecb..cef31955ab18 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -157,14 +157,9 @@ next: vector = FIRST_EXTERNAL_VECTOR + offset; } - if (unlikely(current_vector == vector)) { - cpumask_or(searched_cpumask, searched_cpumask, - vector_cpumask); - cpumask_andnot(vector_cpumask, mask, searched_cpumask); - cpu = cpumask_first_and(vector_cpumask, - cpu_online_mask); - continue; - } + /* If the search wrapped around, try the next cpu */ + if (unlikely(current_vector == vector)) + goto next_cpu; if (test_bit(vector, used_vectors)) goto next; @@ -186,6 +181,19 @@ next: d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); goto success; + +next_cpu: + /* + * We exclude the current @vector_cpumask from the requested + * @mask and try again with the next online cpu in the + * result. We cannot modify @mask, so we use @vector_cpumask + * as a temporary buffer here as it will be reassigned when + * calling apic->vector_allocation_domain() above. + */ + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); + cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); + continue; } return -ENOSPC; -- cgit v1.2.3 From 3716fd27a604d61a91cda47083504971486b80f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:48 +0000 Subject: x86/irq: Check vector allocation early __assign_irq_vector() uses the vector_cpumask which is assigned by apic->vector_allocation_domain() without doing basic sanity checks. That can result in a situation where the final assignement of a newly found vector fails in apic->cpu_mask_to_apicid_and(). So we have to do rollbacks for no reason. apic->cpu_mask_to_apicid_and() only fails if vector_cpumask & requested_cpumask & cpu_online_mask is empty. Check for this condition right away and if the result is empty try immediately the next possible cpu in the requested mask. So in case of a failure the old setting is unchanged and we can remove the rollback code. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.561877324@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index cef31955ab18..940e18d4dbcd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -31,7 +31,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask, searched_cpumask; +static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -130,8 +130,20 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; + /* Get the possible target cpus for @mask/@cpu from the apic */ apic->vector_allocation_domain(cpu, vector_cpumask, mask); + /* + * Clear the offline cpus from @vector_cpumask for searching + * and verify whether the result overlaps with @mask. If true, + * then the call to apic->cpu_mask_to_apicid_and() will + * succeed as well. If not, no point in trying to find a + * vector in this mask. + */ + cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); + if (!cpumask_intersects(vector_searchmask, mask)) + goto next_cpu; + if (cpumask_subset(vector_cpumask, d->domain)) { if (cpumask_equal(vector_cpumask, d->domain)) goto success; @@ -164,7 +176,7 @@ next: if (test_bit(vector, used_vectors)) goto next; - for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { + for_each_cpu(new_cpu, vector_searchmask) { if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) goto next; } @@ -176,7 +188,7 @@ next: d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); } - for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) + for_each_cpu(new_cpu, vector_searchmask) per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); @@ -198,8 +210,14 @@ next_cpu: return -ENOSPC; success: - /* cache destination APIC IDs into cfg->dest_apicid */ - return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid); + /* + * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail + * as we already established, that mask & d->domain & cpu_online_mask + * is not empty. + */ + BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, + &d->cfg.dest_apicid)); + return 0; } static int assign_irq_vector(int irq, struct apic_chip_data *data, @@ -409,6 +427,7 @@ int __init arch_early_irq_init(void) arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); @@ -498,14 +517,7 @@ static int apic_set_affinity(struct irq_data *irq_data, return -EINVAL; err = assign_irq_vector(irq, data, dest); - if (err) { - if (assign_irq_vector(irq, data, - irq_data_get_affinity_mask(irq_data))) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - return IRQ_SET_MASK_OK; + return err ? err : IRQ_SET_MASK_OK; } static struct irq_chip lapic_controller = { -- cgit v1.2.3 From 9ac15b7a8af4cf3337a101498c0ed690d23ade75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:49 +0000 Subject: x86/irq: Copy vectormask instead of an AND operation In the case that the new vector mask is a subset of the existing mask there is no point to do a AND operation of currentmask & newmask. The result is newmask. So we can simply copy the new mask to the current mask and be done with it. Preparatory patch for further consolidation. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.640253454@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 940e18d4dbcd..1bd29c624531 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -156,7 +156,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, vector_cpumask); d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); - cpumask_and(d->domain, d->domain, vector_cpumask); + cpumask_copy(d->domain, vector_cpumask); goto success; } -- cgit v1.2.3 From ab25ac02148b600e645f77cfb8b8ea415ed75bb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:49 +0000 Subject: x86/irq: Get rid of code duplication Reusing an existing vector and assigning a new vector has duplicated code. Consolidate it. This is also a preparatory patch for finally plugging the cleanup race. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.721599216@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1bd29c624531..fccfa3f5545c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -118,7 +118,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - int cpu; + int cpu, vector; if (d->move_in_progress) return -EBUSY; @@ -128,7 +128,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { - int new_cpu, vector, offset; + int new_cpu, offset; /* Get the possible target cpus for @mask/@cpu from the apic */ apic->vector_allocation_domain(cpu, vector_cpumask, mask); @@ -148,16 +148,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, if (cpumask_equal(vector_cpumask, d->domain)) goto success; /* - * New cpumask using the vector is a proper subset of - * the current in use mask. So cleanup the vector - * allocation for the members that are not used anymore. + * Mark the cpus which are not longer in the mask for + * cleanup. */ - cpumask_andnot(d->old_domain, d->domain, - vector_cpumask); - d->move_in_progress = - cpumask_intersects(d->old_domain, cpu_online_mask); - cpumask_copy(d->domain, vector_cpumask); - goto success; + cpumask_andnot(d->old_domain, d->domain, vector_cpumask); + vector = d->cfg.vector; + goto update; } vector = current_vector; @@ -183,16 +179,12 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (d->cfg.vector) { + /* Schedule the old vector for cleanup on all cpus */ + if (d->cfg.vector) cpumask_copy(d->old_domain, d->domain); - d->move_in_progress = - cpumask_intersects(d->old_domain, cpu_online_mask); - } for_each_cpu(new_cpu, vector_searchmask) per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); - d->cfg.vector = vector; - cpumask_copy(d->domain, vector_cpumask); - goto success; + goto update; next_cpu: /* @@ -209,6 +201,11 @@ next_cpu: } return -ENOSPC; +update: + /* Cleanup required ? */ + d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); + d->cfg.vector = vector; + cpumask_copy(d->domain, vector_cpumask); success: /* * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail -- cgit v1.2.3 From 847667ef10356b824a11c853fc8a8b1b437b6a8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:50 +0000 Subject: x86/irq: Remove offline cpus from vector cleanup No point of keeping offline cpus in the cleanup mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.808642683@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index fccfa3f5545c..68d18b338e3a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -202,8 +202,12 @@ next_cpu: return -ENOSPC; update: - /* Cleanup required ? */ - d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); + /* + * Exclude offline cpus from the cleanup mask and set the + * move_in_progress flag when the result is not empty. + */ + cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); + d->move_in_progress = !cpumask_empty(d->old_domain); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); success: -- cgit v1.2.3 From c1684f5035b60e9f98566493e869496fb5de1d89 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:51 +0000 Subject: x86/irq: Clear move_in_progress before sending cleanup IPI send_cleanup_vector() fiddles with the old_domain mask unprotected because it relies on the protection by the move_in_progress flag. But this is fatal, as the flag is reset after the IPI has been sent. So a cpu which receives the IPI can still see the flag set and therefor ignores the cleanup request. If no other cleanup request happens then the vector stays stale on that cpu and in case of an irq removal the vector still persists. That can lead to use after free when the next cleanup IPI happens. Protect the code with vector_lock and clear move_in_progress before sending the IPI. This does not plug the race which Joe reported because: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 The full fix comes with a later patch. Reported-and-tested-by: Joe Lawrence Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.892412198@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 68d18b338e3a..ed62f9c3f785 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -532,6 +532,8 @@ static void __send_cleanup_vector(struct apic_chip_data *data) { cpumask_var_t cleanup_mask; + raw_spin_lock(&vector_lock); + data->move_in_progress = 0; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; @@ -543,7 +545,7 @@ static void __send_cleanup_vector(struct apic_chip_data *data) apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } - data->move_in_progress = 0; + raw_spin_unlock(&vector_lock); } void send_cleanup_vector(struct irq_cfg *cfg) -- cgit v1.2.3 From 5da0c1217f05d2ccc9a8ed6e6e5c23a8a1d24dd6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:52 +0000 Subject: x86/irq: Remove the cpumask allocation from send_cleanup_vector() There is no need to allocate a new cpumask for sending the cleanup vector. The old_domain mask is now protected by the vector_lock, so we can safely remove the offline cpus from it and send the IPI with the resulting mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.967993932@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ed62f9c3f785..91dc2742cfb1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -530,21 +530,11 @@ static struct irq_chip lapic_controller = { #ifdef CONFIG_SMP static void __send_cleanup_vector(struct apic_chip_data *data) { - cpumask_var_t cleanup_mask; - raw_spin_lock(&vector_lock); + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); data->move_in_progress = 0; - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - - for_each_cpu_and(i, data->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), - IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } + if (!cpumask_empty(data->old_domain)) + apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); raw_spin_unlock(&vector_lock); } -- cgit v1.2.3 From 56d7d2f4bbd00fb198b7907cb3ab657d06115a42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:52 +0000 Subject: x86/irq: Remove outgoing CPU from vector cleanup mask We want to synchronize new vector assignments with a pending cleanup. Remove a dying cpu from a pending cleanup mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.045961667@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 91dc2742cfb1..a7fa11e49582 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -633,9 +633,23 @@ void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { struct irq_cfg *cfg = irq_cfg(irq); + struct apic_chip_data *data; + + if (!cfg) + return; - if (cfg) - __irq_complete_move(cfg, cfg->vector); + __irq_complete_move(cfg, cfg->vector); + + /* + * Remove this cpu from the cleanup mask. The IPI might have been sent + * just before the cpu was removed from the offline mask, but has not + * been processed because the CPU has interrupts disabled and is on + * the way out. + */ + raw_spin_lock(&vector_lock); + data = container_of(cfg, struct apic_chip_data, cfg); + cpumask_clear_cpu(smp_processor_id(), data->old_domain); + raw_spin_unlock(&vector_lock); } #endif -- cgit v1.2.3 From 90a2282e23f0522e4b3f797ad447c5e91bf7fe32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:53 +0000 Subject: x86/irq: Call irq_force_move_complete with irq descriptor First of all there is no point in looking up the irq descriptor again, but we also need the descriptor for the final cleanup race fix in the next patch. Make that change seperate. No functional difference. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.125211743@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 11 +++++++---- arch/x86/kernel/irq.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a7fa11e49582..5f7883578880 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -630,10 +630,14 @@ void irq_complete_move(struct irq_cfg *cfg) __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); } -void irq_force_complete_move(int irq) +/* + * Called with @desc->lock held and interrupts disabled. + */ +void irq_force_complete_move(struct irq_desc *desc) { - struct irq_cfg *cfg = irq_cfg(irq); - struct apic_chip_data *data; + struct irq_data *irqdata = irq_desc_get_irq_data(desc); + struct apic_chip_data *data = apic_chip_data(irqdata); + struct irq_cfg *cfg = data ? &data->cfg : NULL; if (!cfg) return; @@ -647,7 +651,6 @@ void irq_force_complete_move(int irq) * the way out. */ raw_spin_lock(&vector_lock); - data = container_of(cfg, struct apic_chip_data, cfg); cpumask_clear_cpu(smp_processor_id(), data->old_domain); raw_spin_unlock(&vector_lock); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c0b58dd1ca04..61521dc19c10 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -462,7 +462,7 @@ void fixup_irqs(void) * non intr-remapping case, we can't wait till this interrupt * arrives at this cpu before completing the irq move. */ - irq_force_complete_move(irq); + irq_force_complete_move(desc); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; -- cgit v1.2.3 From 98229aa36caa9c769b13565523de9b813013c703 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:54 +0000 Subject: x86/irq: Plug vector cleanup race We still can end up with a stale vector due to the following: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 So we need to serialize the vector assignment against a pending cleanup. The solution is rather simple now. We not only check for the move_in_progress flag in assign_irq_vector(), we also check whether there is still a cleanup pending in the old_domain cpumask. If so, we return -EBUSY to the caller and let him deal with it. Though we have to be careful in the cpu unplug case. If the cleanout has not yet completed then the following setaffinity() call would return -EBUSY. Add code which prevents this. Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com Reported-and-tested-by: Joe Lawrence Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 63 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 5f7883578880..3b670df4ba7b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, static int current_offset = VECTOR_OFFSET_START % 16; int cpu, vector; - if (d->move_in_progress) + /* + * If there is still a move in progress or the previous move has not + * been cleaned up completely, tell the caller to come back later. + */ + if (d->move_in_progress || + cpumask_intersects(d->old_domain, cpu_online_mask)) return -EBUSY; /* Only try and allocate irqs on cpus that are present */ @@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) data->cfg.vector = 0; cpumask_clear(data->domain); - if (likely(!data->move_in_progress)) + /* + * If move is in progress or the old_domain mask is not empty, + * i.e. the cleanup IPI has not been processed yet, we need to remove + * the old references to desc from all cpus vector tables. + */ + if (!data->move_in_progress && cpumask_empty(data->old_domain)) return; desc = irq_to_desc(irq); @@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) goto unlock; /* - * Check if the irq migration is in progress. If so, we - * haven't received the cleanup request yet for this irq. + * Nothing to cleanup if irq migration is in progress + * or this cpu is not set in the cleanup mask. */ - if (data->move_in_progress) + if (data->move_in_progress || + !cpumask_test_cpu(me, data->old_domain)) goto unlock; + /* + * We have two cases to handle here: + * 1) vector is unchanged but the target mask got reduced + * 2) vector and the target mask has changed + * + * #1 is obvious, but in #2 we have two vectors with the same + * irq descriptor: the old and the new vector. So we need to + * make sure that we only cleanup the old vector. The new + * vector has the current @vector number in the config and + * this cpu is part of the target mask. We better leave that + * one alone. + */ if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) goto unlock; @@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) goto unlock; } __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + cpumask_clear_cpu(me, data->old_domain); unlock: raw_spin_unlock(&desc->lock); } @@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc) __irq_complete_move(cfg, cfg->vector); /* - * Remove this cpu from the cleanup mask. The IPI might have been sent - * just before the cpu was removed from the offline mask, but has not - * been processed because the CPU has interrupts disabled and is on - * the way out. + * This is tricky. If the cleanup of @data->old_domain has not been + * done yet, then the following setaffinity call will fail with + * -EBUSY. This can leave the interrupt in a stale state. + * + * The cleanup cannot make progress because we hold @desc->lock. So in + * case @data->old_domain is not yet cleaned up, we need to drop the + * lock and acquire it again. @desc cannot go away, because the + * hotplug code holds the sparse irq lock. */ raw_spin_lock(&vector_lock); - cpumask_clear_cpu(smp_processor_id(), data->old_domain); + /* Clean out all offline cpus (including ourself) first. */ + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); + while (!cpumask_empty(data->old_domain)) { + raw_spin_unlock(&vector_lock); + raw_spin_unlock(&desc->lock); + cpu_relax(); + raw_spin_lock(&desc->lock); + /* + * Reevaluate apic_chip_data. It might have been cleared after + * we dropped @desc->lock. + */ + data = apic_chip_data(irqdata); + if (!data) + return; + raw_spin_lock(&vector_lock); + } raw_spin_unlock(&vector_lock); } #endif -- cgit v1.2.3 From 3fda5bb420e79b357328b358409e4c547d8f0a18 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Jan 2016 22:11:07 +0200 Subject: x86/platform/intel-mid: Enable 64-bit build Intel Tangier SoC is known to have 64-bit dual core CPU. Enable 64-bit build for it. The kernel has been tested on Intel Edison board: Linux buildroot 4.4.0-next-20160115+ #25 SMP Fri Jan 15 22:03:19 EET 2016 x86_64 GNU/Linux processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 74 model name : Genuine Intel(R) CPU 4000 @ 500MHz stepping : 8 Signed-off-by: Andy Shevchenko Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mika Westerberg Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1452888668-147116-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f129a9af6357..2c0f3407bd1f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data) reserve_ebda_region(); + switch (boot_params.hdr.hardware_subarch) { + case X86_SUBARCH_INTEL_MID: + x86_intel_mid_early_setup(); + break; + default: + break; + } + start_kernel(); } -- cgit v1.2.3 From d394f2d9d8e1e7b4959819344baf67b5995da9b0 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Fri, 11 Dec 2015 14:59:45 -0600 Subject: x86/platform/UV: Remove EFI memmap quirk for UV2+ Commit a5d90c923bcf ("x86/efi: Quirk out SGI UV") added a quirk to efi_apply_memmap_quirks to force SGI UV systems to fall back to the old EFI memmap mechanism. We have a BIOS fix for this issue on all systems except for UV1. This commit fixes up the EFI quirk/MMR mapping code so that we only apply the special case to UV1 hardware. Signed-off-by: Alex Thorlton Reviewed-by: Matt Fleming Cc: Dimitri Sivanich Cc: H. Peter Anvin Cc: Hedi Berriche Cc: Len Brown Cc: Linus Torvalds Cc: Mike Travis Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1449867585-189233-2-git-send-email-athorlton@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d760c6bb37b5..624db00583f4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -889,7 +889,10 @@ void __init uv_system_init(void) return; } pr_info("UV: Found %s hub\n", hub); - map_low_mmrs(); + + /* We now only need to map the MMRs on UV1 */ + if (is_uv1_hub()) + map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; -- cgit v1.2.3 From 0e1eb0a1f5530bd751fe5bd2c62caa470aaa9643 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Jan 2016 08:25:46 +0100 Subject: perf/x86: add Intel SkyLake uncore IMC PMU support This patch enables the uncore_imc PMU for Intel SkyLake Desktop processors (Core i7-6700, model 94). It is possible to compute memory read/write bandwidth using: $ perf stat -a -e uncore_imc/data_reads/,uncore_imc/data_writes/ .... Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1452151546-8853-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 3 +++ arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 + arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index f97f8075bf04..3bf41d413775 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -995,6 +995,9 @@ static int __init uncore_pci_init(void) case 87: /* Knights Landing */ ret = knl_uncore_pci_init(); break; + case 94: /* SkyLake */ + ret = skl_uncore_pci_init(); + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 07aa2d6bd710..a7086b862156 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -336,6 +336,7 @@ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); int bdw_uncore_pci_init(void); +int skl_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 0b934820fafd..2bd030ddd0db 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -8,6 +8,7 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 +#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { { /* end: all zeroes */ }, }; +static const struct pci_device_id skl_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + static struct pci_driver snb_uncore_pci_driver = { .name = "snb_uncore", .id_table = snb_uncore_pci_ids, @@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = { .id_table = bdw_uncore_pci_ids, }; +static struct pci_driver skl_uncore_pci_driver = { + .name = "skl_uncore", + .id_table = skl_uncore_pci_ids, +}; + struct imc_uncore_pci_dev { __u32 pci_id; struct pci_driver *driver; @@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ + IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ { /* end marker */ } }; @@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void) return imc_uncore_pci_init(); } +int skl_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + /* end of Sandy Bridge uncore support */ /* Nehalem uncore support */ -- cgit v1.2.3 From e01d8718de4170373cd7fbf5cf6f9cb61cebb1e9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2016 23:24:29 +0100 Subject: perf/x86: Fix uninitialized value usage When calling intel_alt_er() with .idx != EXTRA_REG_RSP_* we will not initialize alt_idx and then use this uninitialized value to index an array. When that is not fatal, it can result in an infinite loop in its caller __intel_shared_reg_get_constraints(), with IRQs disabled. Alternative error modes are random memory corruption due to the cpuc->shared_regs->regs[] array overrun, which manifest in either get_constraints or put_constraints doing weird stuff. Only took 6 hours of painful debugging to find this. Neither GCC nor Smatch warnings flagged this bug. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: ae3f011fc251 ("perf/x86/intel: Fix SLM MSR_OFFCORE_RSP1 valid_mask") Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a667078a5180..4264a084e59b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event) static int intel_alt_er(int idx, u64 config) { - int alt_idx; + int alt_idx = idx; + if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) return idx; -- cgit v1.2.3 From 8f04b8536f0c94f8999b65cd1c6c7523116a00ae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2016 23:31:09 +0100 Subject: perf/x86: De-obfuscate code Get rid of the 'onln' obfuscation. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 4264a084e59b..fed2ab1f1065 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2898,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu) return; if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { - void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; - for_each_cpu(i, topology_sibling_cpumask(cpu)) { struct intel_shared_regs *pc; pc = per_cpu(cpu_hw_events, i).shared_regs; if (pc && pc->core_id == core_id) { - *onln = cpuc->shared_regs; + cpuc->kfree_on_online[0] = cpuc->shared_regs; cpuc->shared_regs = pc; break; } -- cgit v1.2.3 From 8bc9162cd2bf37036c486b384ac445b59b334b04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 Feb 2016 23:04:41 +0100 Subject: perf/x86/amd/uncore: Plug reference leak In the error path of amd_uncore_cpu_up_prepare() the newly allocated uncore struct is freed, but the percpu pointer still references it. Set it to NULL. Signed-off-by: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1602162302170.19512@nanos Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 49742746a6c9..8836fc9fa84b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -323,6 +323,8 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) return 0; fail: + if (amd_uncore_nb) + *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; kfree(uncore_nb); return -ENOMEM; } -- cgit v1.2.3