From c424c108233dc422a9a29ee833154006a5bdf9fc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:37:11 +1100 Subject: KVM: PPC: Book3S HV: Add more info about XIVE queues in debugfs Add details about enabled queues and escalation interrupts. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch/powerpc/kvm/book3s_xive.c') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..6cff5bdfd6b7 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1794,6 +1794,7 @@ static int xive_debug_show(struct seq_file *m, void *private) kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int i; if (!xc) continue; @@ -1803,6 +1804,33 @@ static int xive_debug_show(struct seq_file *m, void *private) xc->server_num, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + u32 i0, i1, idx; + + if (!q->qpage && !xc->esc_virq[i]) + continue; + + seq_printf(m, " [q%d]: ", i); + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); + } + if (xc->esc_virq[i]) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', + xc->esc_virq[i], pq, xd->eoi_page); + seq_printf(m, "\n"); + } + } t_rm_h_xirr += xc->stat_rm_h_xirr; t_rm_h_ipoll += xc->stat_rm_h_ipoll; -- cgit v1.2.3 From bf4159da4751ab8eea43ca6e7c49193dbce8398c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:37:12 +1100 Subject: KVM: PPC: Book3S HV: Enable use of the new XIVE "single escalation" feature That feature, provided by Power9 DD2.0 and later, when supported by newer OPAL versions, allows us to sacrifice a queue (priority 7) in favor of merging all the escalation interrupts of the queues of a single VP into a single interrupt. This reduces the number of host interrupts used up by KVM guests especially when those guests use multiple priorities. It will also enable a future change to control the masking of the escalation interrupts more precisely to avoid spurious ones. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 48 ++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kvm/book3s_xive.c') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6cff5bdfd6b7..a102efeabf05 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) return -EIO; } - /* - * Future improvement: start with them disabled - * and handle DD2 and later scheme of merged escalation - * interrupts - */ - name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", - vcpu->kvm->arch.lpid, xc->server_num, prio); + if (xc->xive->single_escalation) + name = kasprintf(GFP_KERNEL, "kvm-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num); + else + name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num, prio); if (!name) { pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", prio, xc->server_num); rc = -ENOMEM; goto error; } + + pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio); + rc = request_irq(xc->esc_virq[prio], xive_esc_irq, IRQF_NO_THREAD, name, vcpu); if (rc) { @@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) pr_devel("Provisioning prio... %d\n", prio); - /* Provision each VCPU and enable escalations */ + /* Provision each VCPU and enable escalations if needed */ kvm_for_each_vcpu(i, vcpu, kvm) { if (!vcpu->arch.xive_vcpu) continue; rc = xive_provision_queue(vcpu, prio); - if (rc == 0) + if (rc == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, prio); if (rc) return rc; @@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, /* Allocate IPI */ xc->vp_ipi = xive_native_alloc_irq(); if (!xc->vp_ipi) { + pr_err("Failed to allocate xive irq for VCPU IPI\n"); r = -EIO; goto bail; } @@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; + /* + * Enable the VP first as the single escalation mode will + * affect escalation interrupts numbering + */ + r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + if (r) { + pr_err("Failed to enable VP in OPAL, err %d\n", r); + goto bail; + } + /* * Initialize queues. Initially we set them all for no queueing * and we enable escalation for queue 0 only which we'll use for * our mfrr change notifications. If the VCPU is hot-plugged, we - * do handle provisioning however. + * do handle provisioning however based on the existing "map" + * of enabled queues. */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { struct xive_q *q = &xc->queues[i]; + /* Single escalation, no queue 7 */ + if (i == 7 && xive->single_escalation) + break; + /* Is queue already enabled ? Provision it */ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); - if (r == 0) + if (r == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, i); if (r) goto bail; @@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; - /* Enable the VP */ - r = xive_native_enable_vp(xc->vp_id); - if (r) - goto bail; - /* Route the IPI */ r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); if (!r) @@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", val, server, guest_prio); + /* * If the source doesn't already have an IPI, allocate * one and get the corresponding data @@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (xive->vp_base == XIVE_INVALID_VP) ret = -ENOMEM; + xive->single_escalation = xive_native_has_single_escalation(); + if (ret) { kfree(xive); return ret; -- cgit v1.2.3 From 2267ea7661798a42f0da648a2970e2a03f4bc370 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:37:13 +1100 Subject: KVM: PPC: Book3S HV: Don't use existing "prodded" flag for XIVE escalations The prodded flag is only cleared at the beginning of H_CEDE, so every time we have an escalation, we will cause the *next* H_CEDE to return immediately. Instead use a dedicated "irq_pending" flag to indicate that a guest interrupt is pending for the VCPU. We don't reuse the existing exception bitmap so as to avoid expensive atomic ops. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kvm/book3s_xive.c') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a102efeabf05..eef9ccafdc09 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -84,8 +84,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data) { struct kvm_vcpu *vcpu = data; - /* We use the existing H_PROD mechanism to wake up the target */ - vcpu->arch.prodded = 1; + vcpu->arch.irq_pending = 1; smp_mb(); if (vcpu->arch.ceded) kvmppc_fast_vcpu_kick(vcpu); -- cgit v1.2.3 From 9b9b13a6d1537ddc4caccd6f1c41b78edbc08437 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:37:16 +1100 Subject: KVM: PPC: Book3S HV: Keep XIVE escalation interrupt masked unless ceded This works on top of the single escalation support. When in single escalation, with this change, we will keep the escalation interrupt disabled unless the VCPU is in H_CEDE (idle). In any other case, we know the VCPU will be rescheduled and thus there is no need to take escalation interrupts in the host whenever a guest interrupt fires. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/powerpc/kvm/book3s_xive.c') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index eef9ccafdc09..7a047bc88f11 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -89,6 +89,17 @@ static irqreturn_t xive_esc_irq(int irq, void *data) if (vcpu->arch.ceded) kvmppc_fast_vcpu_kick(vcpu); + /* Since we have the no-EOI flag, the interrupt is effectively + * disabled now. Clearing xive_esc_on means we won't bother + * doing so on the next entry. + * + * This also allows the entry code to know that if a PQ combination + * of 10 is observed while xive_esc_on is true, it means the queue + * contains an unprocessed escalation interrupt. We don't make use of + * that knowledge today but might (see comment in book3s_hv_rmhandler.S) + */ + vcpu->arch.xive_esc_on = false; + return IRQ_HANDLED; } @@ -134,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) goto error; } xc->esc_virq_names[prio] = name; + + /* In single escalation mode, we grab the ESB MMIO of the + * interrupt and mask it. Also populate the VCPU v/raddr + * of the ESB page for use by asm entry/exit code. Finally + * set the XIVE_IRQ_NO_EOI flag which will prevent the + * core code from performing an EOI on the escalation + * interrupt, thus leaving it effectively masked after + * it fires once. + */ + if (xc->xive->single_escalation) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + + xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); + vcpu->arch.xive_esc_raddr = xd->eoi_page; + vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; + xd->flags |= XIVE_IRQ_NO_EOI; + } + return 0; error: irq_dispose_mapping(xc->esc_virq[prio]); -- cgit v1.2.3