summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2019-08-19 13:19:43 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2019-08-19 13:19:43 +1000
commit1a47908e0f81803115d679271a64d84351c91870 (patch)
tree26f23189be6a11ea245f5ce033511e6801d67739 /arch
parent4215fa2d7d3a872c4f6f383dc52961215b0ce2d6 (diff)
parentda15c03b047dca891d37b9f4ef9ca14d84a6484f (diff)
Merge branch 'topic/ppc-kvm' into next
Merge our ppc-kvm topic branch. This contains several fixes for the XIVE interrupt controller that we are sharing with the KVM tree.
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/xive.h8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S38
-rw-r--r--arch/powerpc/kvm/book3s_xive.c60
-rw-r--r--arch/powerpc/kvm/book3s_xive.h2
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c18
-rw-r--r--arch/powerpc/sysdev/xive/common.c87
6 files changed, 163 insertions, 50 deletions
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index e4016985764e..efb0e597b272 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -46,7 +46,15 @@ struct xive_irq_data {
/* Setup/used by frontend */
int target;
+ /*
+ * saved_p means that there is a queue entry for this interrupt
+ * in some CPU's queue (not including guest vcpu queues), even
+ * if P is not set in the source ESB.
+ * stale_p means that there is no queue entry for this interrupt
+ * in some CPU's queue, even if P is set in the source ESB.
+ */
bool saved_p;
+ bool stale_p;
};
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
#define XIVE_IRQ_FLAG_LSI 0x02
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 337e64468d78..07181d0dfcb7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
+ cmpwi r8, 0
+ beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
@@ -2831,29 +2833,39 @@ kvm_cede_prodded:
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
- /* Abort if we still have a pending escalation */
+ /* are we using XIVE with single escalation? */
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ li r6, XIVE_ESB_SET_PQ_00
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
- beq 1f
+ beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
-1: /* Enable XIVE escalation */
- li r5, XIVE_ESB_SET_PQ_00
+ li r6, XIVE_ESB_SET_PQ_10
+ b 5f
+4: li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+ /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
+ sync
+5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldx r0, r10, r5
+ ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldcix r0, r10, r5
+ ldcix r0, r10, r6
2: sync
- li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e3ba67095895..591bfb4bfd0f 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
- if (!tima)
+ /*
+ * Nothing to do if the platform doesn't have a XIVE
+ * or this vCPU doesn't have its own XIVE context
+ * (e.g. because it's not using an in-kernel interrupt controller).
+ */
+ if (!tima || !vcpu->arch.xive_cam_word)
return;
+
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
@@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
*/
vcpu->arch.xive_esc_on = false;
+ /* This orders xive_esc_on = false vs. subsequent stale_p = true */
+ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
+
return IRQ_HANDLED;
}
@@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.xive_esc_raddr = 0;
}
+/*
+ * In single escalation mode, the escalation interrupt is marked so
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
+ * indicate that the P bit has already been dealt with. However, the
+ * assembly code that enters the guest sets PQ to 00 without clearing
+ * stale_p (because it has no easy way to address it). Hence we have
+ * to adjust stale_p before shutting down the interrupt.
+ */
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq)
+{
+ struct irq_data *d = irq_get_irq_data(irq);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ /*
+ * This slightly odd sequence gives the right result
+ * (i.e. stale_p set if xive_esc_on is false) even if
+ * we race with xive_esc_irq() and xive_irq_eoi().
+ */
+ xd->stale_p = false;
+ smp_mb(); /* paired with smb_wmb in xive_esc_irq */
+ if (!vcpu->arch.xive_esc_on)
+ xd->stale_p = true;
+}
+
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Mask the VP IPI */
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
-
- /* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
- /* Free the queue */
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 50494d0ee375..955b820ffd6d 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index a998823f68a3..f0cab43e6f4b 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
xc->esc_virq[i] = 0;
}
+ }
- /* Free the queue */
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
kvmppc_xive_native_cleanup_queue(vcpu, i);
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 083f657091d7..6b973b7cdd8a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek)
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
{
u32 irq = 0;
- u8 prio;
+ u8 prio = 0;
/* Find highest pending priority */
while (xc->pending_prio != 0) {
@@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
irq = xive_read_eq(&xc->queue[prio], just_peek);
/* Found something ? That's it */
- if (irq)
- break;
+ if (irq) {
+ if (just_peek || irq_to_desc(irq))
+ break;
+ /*
+ * We should never get here; if we do then we must
+ * have failed to synchronize the interrupt properly
+ * when shutting it down.
+ */
+ pr_crit("xive: got interrupt %d without descriptor, dropping\n",
+ irq);
+ WARN_ON(1);
+ continue;
+ }
/* Clear pending bits */
xc->pending_prio &= ~(1 << prio);
@@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
*/
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
+ xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
@@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
}
}
-/* irq_chip eoi callback */
+/* irq_chip eoi callback, called with irq descriptor lock held */
static void xive_irq_eoi(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data *d)
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
+ else
+ xd->stale_p = true;
/*
* Clear saved_p to indicate that it's no longer occupying
@@ -397,11 +411,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
*/
if (mask) {
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
- xd->saved_p = !!(val & XIVE_ESB_VAL_P);
- } else if (xd->saved_p)
+ if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
+ xd->saved_p = true;
+ xd->stale_p = false;
+ } else if (xd->saved_p) {
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
- else
+ xd->saved_p = false;
+ } else {
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+ xd->stale_p = false;
+ }
}
/*
@@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
int target, rc;
+ xd->saved_p = false;
+ xd->stale_p = false;
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
d->irq, hw_irq, d);
@@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(struct irq_data *d)
return 0;
}
+/* called with irq descriptor lock held */
static void xive_irq_shutdown(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -602,16 +624,6 @@ static void xive_irq_shutdown(struct irq_data *d)
xive_do_source_set_mask(xd, true);
/*
- * The above may have set saved_p. We clear it otherwise it
- * will prevent re-enabling later on. It is ok to forget the
- * fact that the interrupt might be in a queue because we are
- * accounting that already in xive_dec_target_count() and will
- * be re-routing it to a new queue with proper accounting when
- * it's started up again
- */
- xd->saved_p = false;
-
- /*
* Mask the interrupt in HW in the IVT/EAS and set the number
* to be the "bad" IRQ number
*/
@@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq_data *d)
return 1;
}
+/*
+ * Caller holds the irq descriptor lock, so this won't be called
+ * concurrently with xive_get_irqchip_state on the same interrupt.
+ */
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
/* Set it to PQ=10 state to prevent further sends */
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+ if (!xd->stale_p) {
+ xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
+ xd->stale_p = !xd->saved_p;
+ }
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
@@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* An untargetted interrupt should have been
* also masked at the source
*/
- WARN_ON(pq & 2);
+ WARN_ON(xd->saved_p);
return 0;
}
@@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* This saved_p is cleared by the host EOI, when we know
* for sure the queue slot is no longer in use.
*/
- if (pq & 2) {
- pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
- xd->saved_p = true;
+ if (xd->saved_p) {
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
/*
* Sync the XIVE source HW to ensure the interrupt
@@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
- } else
- xd->saved_p = false;
+ }
} else {
irqd_clr_forwarded_to_vcpu(d);
@@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
return 0;
}
+/* Called with irq descriptor lock held. */
+static int xive_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which, bool *state)
+{
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+
+ switch (which) {
+ case IRQCHIP_STATE_ACTIVE:
+ *state = !xd->stale_p &&
+ (xd->saved_p ||
+ !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static struct irq_chip xive_irq_chip = {
.name = "XIVE-IRQ",
.irq_startup = xive_irq_startup,
@@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = {
.irq_set_type = xive_irq_set_type,
.irq_retrigger = xive_irq_retrigger,
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
+ .irq_get_irqchip_state = xive_get_irqchip_state,
};
bool is_xive_irq(struct irq_chip *chip)
@@ -1338,6 +1374,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
xd = irq_desc_get_handler_data(desc);
/*
+ * Clear saved_p to indicate that it's no longer pending
+ */
+ xd->saved_p = false;
+
+ /*
* For LSIs, we EOI, this will cause a resend if it's
* still asserted. Otherwise do an MSI retrigger.
*/