summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h12
-rw-r--r--arch/powerpc/include/asm/kexec.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h14
-rw-r--r--arch/powerpc/include/asm/kvm_host.h8
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h4
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h6
-rw-r--r--arch/powerpc/include/asm/topology.h13
-rw-r--r--arch/powerpc/include/asm/xive.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S6
-rw-r--r--arch/powerpc/kernel/pci-common.c5
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c6
-rw-r--r--arch/powerpc/kvm/Kconfig3
-rw-r--r--arch/powerpc/kvm/book3s.c24
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c38
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c70
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S231
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S4
-rw-r--r--arch/powerpc/kvm/book3s_pr.c20
-rw-r--r--arch/powerpc/kvm/book3s_xive.c109
-rw-r--r--arch/powerpc/kvm/book3s_xive.h15
-rw-r--r--arch/powerpc/kvm/booke.c51
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c36
-rw-r--r--arch/powerpc/kvm/powerpc.c200
-rw-r--r--arch/powerpc/kvm/timing.c3
-rw-r--r--arch/powerpc/mm/drmem.c8
-rw-r--r--arch/powerpc/mm/hash64_4k.c4
-rw-r--r--arch/powerpc/mm/hash64_64k.c8
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c10
-rw-r--r--arch/powerpc/mm/init-common.c4
-rw-r--r--arch/powerpc/mm/numa.c5
-rw-r--r--arch/powerpc/mm/pgtable-radix.c117
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/mm/tlb_hash64.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/backing_ops.c8
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c10
-rw-r--r--arch/powerpc/platforms/cell/spufs/hw_ops.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c2
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c16
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c4
-rw-r--r--arch/powerpc/platforms/pseries/ras.c31
-rw-r--r--arch/powerpc/sysdev/xive/native.c18
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c16
60 files changed, 939 insertions, 313 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 30a155c0a6b0..c615abdce119 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -16,6 +16,7 @@
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 949d691094a4..67c5475311ee 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
* keeping the prototype consistent across the two formats.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
- unsigned int subpg_index, unsigned long hidx)
+ unsigned int subpg_index, unsigned long hidx,
+ int offset)
{
return (hidx << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 338b7da468ce..3bcf269f8f55 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -45,7 +45,7 @@
* generic accessors and iterators here
*/
#define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
{
real_pte_t rpte;
unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
*/
smp_rmb();
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ hidxp = (unsigned long *)(ptep + offset);
rpte.hidx = *hidxp;
return rpte;
}
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
* expected to modify the PTE bits accordingly and commit the PTE to memory.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
- unsigned int subpg_index, unsigned long hidx)
+ unsigned int subpg_index,
+ unsigned long hidx, int offset)
{
- unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ unsigned long *hidxp = (unsigned long *)(ptep + offset);
rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
*hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
}
#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
(sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
+ (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 0920eff731b3..935adcd92a81 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -23,7 +23,8 @@
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+ defined(CONFIG_PPC_64K_PAGES)
/*
* only with hash 64k we need to use the second half of pmd page table
* to store pointer to deposited pgtable_t
@@ -33,6 +34,16 @@
#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
#endif
/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
+#endif
+/*
* Define the address range of the kernel non-linear virtual area
*/
#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 1fcfa425cefa..4746bc68d446 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
+ pgd_t *pgd;
+
if (radix_enabled())
return radix__pgd_alloc(mm);
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
+
+ pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+ memset(pgd, 0, PGD_TABLE_SIZE);
+
+ return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
- kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+ kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
* ahead and flush the page walk cache
*/
flush_tlb_pgtable(tlb, address);
- pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+ pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 51017726d495..a6b9f1d74600 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
extern unsigned long __pud_index_size;
extern unsigned long __pgd_index_size;
extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
#define PTE_INDEX_SIZE __pte_index_size
#define PMD_INDEX_SIZE __pmd_index_size
#define PUD_INDEX_SIZE __pud_index_size
#define PGD_INDEX_SIZE __pgd_index_size
#define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
/*
* Because of use of pte fragments and THP, size of page table
* are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
*/
#ifndef __real_pte
-#define __real_pte(e,p) ((real_pte_t){(e)})
+#define __real_pte(e, p, o) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 176dfb73d42c..471b2274fbeb 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXC_HV, SOFTEN_TEST_HV, bitmask)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 88e5e8f17e98..855e17d158b1 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -30,6 +30,16 @@
#define PACA_IRQ_PMI 0x40
/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
+#endif
+
+/*
* flags for paca->irq_soft_mask
*/
#define IRQS_ENABLED 0
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
static inline void may_hard_irq_enable(void)
{
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
- if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+ if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
__hard_irq_enable();
}
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 9dcbfa6bbb91..d8b1e8e7e035 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
return false;
}
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 9a667007bff8..376ae803b69c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -249,10 +249,8 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
extern int kvmppc_hcall_impl_pr(unsigned long cmd);
extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
-extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu);
-extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
extern int kvm_irq_bypass;
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 735cfa35298a..998f7b7aaa9e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
lphi = (l >> 16) & 0xf;
switch ((l >> 12) & 0xf) {
case 0:
- return !lphi ? 24 : -1; /* 16MB */
+ return !lphi ? 24 : 0; /* 16MB */
break;
case 1:
return 16; /* 64kB */
break;
case 3:
- return !lphi ? 34 : -1; /* 16GB */
+ return !lphi ? 34 : 0; /* 16GB */
break;
case 7:
return (16 << 8) + 12; /* 64kB in 4kB */
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
return (24 << 8) + 12; /* 16MB in 4kB */
break;
}
- return -1;
+ return 0;
}
static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
- return 1ul << kvmppc_hpte_actual_page_shift(v, r);
+ int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+ if (shift)
+ return 1ul << shift;
+ return 0;
}
static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff;
- if (b_pgshift == 12) {
+ if (b_pgshift <= 12) {
if (a_pgshift > 12) {
sllp = (a_pgshift == 16) ? 5 : 4;
rb |= sllp << 5; /* AP field */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3aa5b577cd60..1f53b562726f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -690,6 +690,7 @@ struct kvm_vcpu_arch {
u8 mmio_vsx_offset;
u8 mmio_vsx_copy_type;
u8 mmio_vsx_tx_sx_enabled;
+ u8 mmio_vmx_copy_nums;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
@@ -709,6 +710,7 @@ struct kvm_vcpu_arch {
u8 ceded;
u8 prodded;
u8 doorbell_request;
+ u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;
struct swait_queue_head *wqp;
@@ -738,8 +740,11 @@ struct kvm_vcpu_arch {
struct kvmppc_icp *icp; /* XICS presentation controller */
struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
__be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
- u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_esc_on; /* Is the escalation irq enabled ? */
union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+ u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
+ u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -800,6 +805,7 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
#define KVM_MMIO_REG_VSX 0x0080
+#define KVM_MMIO_REG_VMX 0x00c0
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9db18287b5f4..7765a800ddae 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian, int mmio_sign_extend);
+extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
+extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes,
int is_default_endian);
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 504a3c36ce5c..03bbd1149530 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index abddf5830ad5..5c5f75d005ad 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -27,6 +27,7 @@
#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
* Define the address range of the kernel non-linear virtual area
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 24c73f5575ee..94bd1bf2c873 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1076,6 +1076,7 @@ enum {
/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
enum {
OPAL_XIVE_VP_ENABLED = 0x00000001,
+ OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
};
/* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ab5c1588b487..f1083bcf449c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -156,6 +156,12 @@
#define OP_31_XOP_LFDX 599
#define OP_31_XOP_LFDUX 631
+/* VMX Vector Load Instructions */
+#define OP_31_XOP_LVX 103
+
+/* VMX Vector Store Instructions */
+#define OP_31_XOP_STVX 231
+
#define OP_LWZ 32
#define OP_STFS 52
#define OP_STFSU 53
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 88187c285c70..9f421641a35c 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+ numa_cpu_lookup_table[cpu] = node;
+}
+
static inline int early_cpu_to_node(int cpu)
{
int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
{
return 0;
}
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int start_topology_update(void);
extern int stop_topology_update(void);
extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
#else
static inline int start_topology_update(void)
{
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
{
return 0;
}
+static inline int find_and_online_cpu_nid(int cpu)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 7624e22f5045..8d1a2792484f 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
#else
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 88b84ac76b53..ea5eb91b836e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -520,6 +520,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+ OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@ -739,6 +740,9 @@ int main(void)
DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
arch.xive_cam_word));
DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+ DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+ DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+ DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index ee832d344a5a..9b6e653e501a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -943,6 +943,8 @@ kernel_dbg_exc:
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
*/
.macro masked_interrupt_book3e paca_irq full_mask
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 243d072a225a..3ac87e53b3da 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* triggered and won't automatically refire.
* - If it was a HMI we return immediately since we handled it in realmode
* and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
#define MASKED_INTERRUPT(_H) \
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
-1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
- bne 2f; \
+1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
+ beq 2f; \
mfspr r10,SPRN_##_H##SRR1; \
xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index ae2ede4de6be..446c79611d56 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -362,7 +362,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
*/
static int pci_read_irq_line(struct pci_dev *pci_dev)
{
- unsigned int virq = 0;
+ int virq;
pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
@@ -370,7 +370,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
memset(&oirq, 0xff, sizeof(oirq));
#endif
/* Try to get a mapping from the device-tree */
- if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) {
+ virq = of_irq_parse_and_map_pci(pci_dev, 0, 0);
+ if (virq <= 0) {
u8 line, pin;
/* If that fails, lets fallback to what is in the config
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index fc600a8b1e77..f915db93cd42 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -392,7 +392,7 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
{
poll_wait(file, &rtas_log_wait, wait);
if (rtas_log_size)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5a8bfee6e187..04d0bbd7a1dd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
- if (cpu_has_feature(CPU_FTR_ARCH_206))
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
- if (cpu_has_feature(CPU_FTR_ARCH_206))
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index b12b8eb39c29..68a0e9d5b440 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_EVENTFD
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select SRCU
select KVM_VFIO
select IRQ_BYPASS_MANAGER
@@ -68,7 +69,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
- select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
+ select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 72d977e30952..234531d1bee1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -484,19 +484,33 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
regs->pc = kvmppc_get_pc(vcpu);
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = kvmppc_get_ctr(vcpu);
@@ -518,6 +532,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -525,6 +540,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
kvmppc_set_pc(vcpu, regs->pc);
kvmppc_set_cr(vcpu, regs->cr);
kvmppc_set_ctr(vcpu, regs->ctr);
@@ -545,6 +562,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -737,7 +755,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
+ vcpu_load(vcpu);
vcpu->guest_debug = dbg->control;
+ vcpu_put(vcpu);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b73dbc9e797d..ef243fed2f2b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1269,6 +1269,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Nothing to do */
goto out;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = be64_to_cpu(hptep[1]);
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ }
+
/* Unmap */
rev = &old->rev[idx];
guest_rpte = rev->guest_rpte;
@@ -1298,7 +1303,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Reload PTE after unmap */
vpte = be64_to_cpu(hptep[0]);
-
BUG_ON(vpte & HPTE_V_VALID);
BUG_ON(!(vpte & HPTE_V_ABSENT));
@@ -1307,6 +1311,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
goto out;
rpte = be64_to_cpu(hptep[1]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ rpte = hpte_new_to_old_r(rpte);
+ }
+
pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
pteg = idx / HPTES_PER_GROUP;
@@ -1337,17 +1347,17 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
}
new_pteg = hash & new_hash_mask;
- if (vpte & HPTE_V_SECONDARY) {
- BUG_ON(~pteg != (hash & old_hash_mask));
- new_pteg = ~new_pteg;
- } else {
- BUG_ON(pteg != (hash & old_hash_mask));
- }
+ if (vpte & HPTE_V_SECONDARY)
+ new_pteg = ~hash & new_hash_mask;
new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
new_hptep = (__be64 *)(new->virt + (new_idx << 4));
replace_vpte = be64_to_cpu(new_hptep[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+ replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+ }
if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
BUG_ON(new->order >= old->order);
@@ -1363,6 +1373,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Discard the previous HPTE */
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = hpte_old_to_new_r(vpte, rpte);
+ vpte = hpte_old_to_new_v(vpte);
+ }
+
new_hptep[1] = cpu_to_be64(rpte);
new->rev[new_idx].guest_rpte = guest_rpte;
/* No need for a barrier, since new HPT isn't active */
@@ -1380,12 +1395,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
unsigned long i;
int rc;
- /*
- * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
- * that POWER9 uses, and could well hit a BUG_ON on POWER9.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- return -EIO;
for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
rc = resize_hpt_rehash_hpte(resize, i);
if (rc != 0)
@@ -1416,6 +1425,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
synchronize_srcu_expedited(&kvm->srcu);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvmppc_setup_partition_table(kvm);
+
resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 58618f644c56..0c854816e653 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
j = i + 1;
if (npages) {
set_dirty_bits(map, i, npages);
- i = j + npages;
+ j = i + npages;
}
}
return 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e4f70c33fbc7..89707354c2ef 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -116,6 +116,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* If set, the threads on each CPU core have to be in the same MMU mode */
+static bool no_mixing_hpt_and_radix;
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -1003,8 +1006,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tvcpu;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return EMULATE_FAIL;
if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
return RESUME_GUEST;
if (get_op(inst) != 31)
@@ -1054,6 +1055,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+/* Called with vcpu->arch.vcore->lock held */
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -1174,7 +1176,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
swab32(vcpu->arch.emul_inst) :
vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ /* Need vcore unlocked to call kvmppc_get_last_inst */
+ spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_debug_inst(run, vcpu);
+ spin_lock(&vcpu->arch.vcore->lock);
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1189,8 +1194,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
r = EMULATE_FAIL;
- if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+ if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
+ cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* Need vcore unlocked to call kvmppc_get_last_inst */
+ spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_doorbell_instr(vcpu);
+ spin_lock(&vcpu->arch.vcore->lock);
+ }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1495,6 +1505,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ *val = get_reg_val(id, vcpu->arch.dec_expires +
+ vcpu->arch.vcore->tb_offset);
+ break;
default:
r = -EINVAL;
break;
@@ -1722,6 +1736,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ vcpu->arch.dec_expires = set_reg_val(id, *val) -
+ vcpu->arch.vcore->tb_offset;
+ break;
default:
r = -EINVAL;
break;
@@ -2376,8 +2394,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
static bool subcore_config_ok(int n_subcores, int n_threads)
{
/*
- * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
- * mode, with one thread per subcore.
+ * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
+ * split-core mode, with one thread per subcore.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
return n_subcores <= 4 && n_threads == 1;
@@ -2413,8 +2431,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return false;
- /* POWER9 currently requires all threads to be in the same MMU mode */
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ /* Some POWER9 chips require all threads to be in the same MMU mode */
+ if (no_mixing_hpt_and_radix &&
kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
return false;
@@ -2677,9 +2695,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
* On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host.
+ * this is a HPT guest on a radix host machine where the
+ * CPU threads may not be in different MMU modes.
*/
- hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
+ hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
+ !kvm_is_radix(vc->kvm);
if (((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
(hpt_on_radix && vc->kvm->arch.threads_indep)) {
@@ -2829,7 +2849,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if (!thr0_done)
kvmppc_start_thread(NULL, pvc);
- thr += pvc->num_threads;
}
/*
@@ -2932,13 +2951,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
+ preempt_enable();
+
for (sub = 0; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
post_guest_process(pvc, pvc == vc);
}
spin_lock(&vc->lock);
- preempt_enable();
out:
vc->vcore_state = VCORE_INACTIVE;
@@ -2985,7 +3005,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
{
if (!xive_enabled())
return false;
- return vcpu->arch.xive_saved_state.pipr <
+ return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
vcpu->arch.xive_saved_state.cppr;
}
#else
@@ -3174,17 +3194,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
* this thread straight away and have it join in.
*/
if (!signal_pending(current)) {
- if (vc->vcore_state == VCORE_PIGGYBACK) {
- if (spin_trylock(&vc->lock)) {
- if (vc->vcore_state == VCORE_RUNNING &&
- !VCORE_IS_EXITING(vc)) {
- kvmppc_create_dtl_entry(vcpu, vc);
- kvmppc_start_thread(vcpu, vc);
- trace_kvm_guest_enter(vcpu);
- }
- spin_unlock(&vc->lock);
- }
- } else if (vc->vcore_state == VCORE_RUNNING &&
+ if ((vc->vcore_state == VCORE_PIGGYBACK ||
+ vc->vcore_state == VCORE_RUNNING) &&
!VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
@@ -4446,6 +4457,19 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
+
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ no_mixing_hpt_and_radix = true;
+ }
+
return r;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7886b313d135..f31f357b8c5a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -413,10 +413,11 @@ FTR_SECTION_ELSE
/* On P9 we use the split_info for coordinating LPCR changes */
lwz r4, KVM_SPLIT_DO_SET(r6)
cmpwi r4, 0
- beq 63f
+ beq 1f
mr r3, r6
bl kvmhv_p9_set_lpcr
nop
+1:
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
@@ -617,13 +618,6 @@ kvmppc_hv_entry:
lbz r0, KVM_RADIX(r9)
cmpwi cr7, r0, 0
- /* Clear out SLB if hash */
- bne cr7, 2f
- li r6,0
- slbmte r6,r6
- slbia
- ptesync
-2:
/*
* POWER7/POWER8 host -> guest partition switch code.
* We don't have to lock against concurrent tlbies,
@@ -738,19 +732,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
10: cmpdi r4, 0
beq kvmppc_primary_no_guest
kvmppc_got_guest:
-
- /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
- lwz r5,VCPU_SLB_MAX(r4)
- cmpwi r5,0
- beq 9f
- mtctr r5
- addi r6,r4,VCPU_SLB
-1: ld r8,VCPU_SLB_E(r6)
- ld r9,VCPU_SLB_V(r6)
- slbmte r9,r8
- addi r6,r6,VCPU_SLB_SIZE
- bdnz 1b
-9:
/* Increment yield count if they have a VPA */
ld r3, VCPU_VPA(r4)
cmpdi r3, 0
@@ -957,7 +938,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
- std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -1018,6 +998,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
+ /* For hash guest, clear out and reload the SLB */
+ ld r6, VCPU_KVM(r4)
+ lbz r0, KVM_RADIX(r6)
+ cmpwi r0, 0
+ bne 9f
+ li r6, 0
+ slbmte r6, r6
+ slbia
+ ptesync
+
+ /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+ lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
@@ -1031,8 +1034,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li r9, TM_QW1_OS + TM_WORD2
stwcix r11,r9,r10
li r9, 1
- stw r9, VCPU_XIVE_PUSHED(r4)
+ stb r9, VCPU_XIVE_PUSHED(r4)
eieio
+
+ /*
+ * We clear the irq_pending flag. There is a small chance of a
+ * race vs. the escalation interrupt happening on another
+ * processor setting it again, but the only consequence is to
+ * cause a spurrious wakeup on the next H_CEDE which is not an
+ * issue.
+ */
+ li r0,0
+ stb r0, VCPU_IRQ_PENDING(r4)
+
+ /*
+ * In single escalation mode, if the escalation interrupt is
+ * on, we mask it.
+ */
+ lbz r0, VCPU_XIVE_ESC_ON(r4)
+ cmpwi r0,0
+ beq 1f
+ ld r10, VCPU_XIVE_ESC_RADDR(r4)
+ li r9, XIVE_ESB_SET_PQ_01
+ ldcix r0, r10, r9
+ sync
+
+ /* We have a possible subtle race here: The escalation interrupt might
+ * have fired and be on its way to the host queue while we mask it,
+ * and if we unmask it early enough (re-cede right away), there is
+ * a theorical possibility that it fires again, thus landing in the
+ * target queue more than once which is a big no-no.
+ *
+ * Fortunately, solving this is rather easy. If the above load setting
+ * PQ to 01 returns a previous value where P is set, then we know the
+ * escalation interrupt is somewhere on its way to the host. In that
+ * case we simply don't clear the xive_esc_on flag below. It will be
+ * eventually cleared by the handler for the escalation interrupt.
+ *
+ * Then, when doing a cede, we check that flag again before re-enabling
+ * the escalation interrupt, and if set, we abort the cede.
+ */
+ andi. r0, r0, XIVE_ESB_VAL_P
+ bne- 1f
+
+ /* Now P is 0, we can clear the flag */
+ li r0, 0
+ stb r0, VCPU_XIVE_ESC_ON(r4)
+1:
no_xive:
#endif /* CONFIG_KVM_XICS */
@@ -1193,7 +1241,7 @@ hdec_soon:
addi r3, r4, VCPU_TB_RMEXIT
bl kvmhv_accumulate_time
#endif
- b guest_exit_cont
+ b guest_bypass
/******************************************************************************
* *
@@ -1423,15 +1471,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
blt deliver_guest_interrupt
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+ /* Save more register state */
+ mfdar r6
+ mfdsisr r7
+ std r6, VCPU_DAR(r9)
+ stw r7, VCPU_DSISR(r9)
+ /* don't overwrite fault_dar/fault_dsisr if HDSI */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq mc_cont
+ std r6, VCPU_FAULT_DAR(r9)
+ stw r7, VCPU_FAULT_DSISR(r9)
+
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r9, VCPU_TB_RMEXIT
+ mr r4, r9
+ bl kvmhv_accumulate_time
+#endif
#ifdef CONFIG_KVM_XICS
/* We are exiting, pull the VP from the XIVE */
- lwz r0, VCPU_XIVE_PUSHED(r9)
+ lbz r0, VCPU_XIVE_PUSHED(r9)
cmpwi cr0, r0, 0
beq 1f
li r7, TM_SPC_PULL_OS_CTX
li r6, TM_QW1_OS
mfmsr r0
- andi. r0, r0, MSR_IR /* in real mode? */
+ andi. r0, r0, MSR_DR /* in real mode? */
beq 2f
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi cr0, r10, 0
@@ -1454,33 +1522,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Fixup some of the state for the next load */
li r10, 0
li r0, 0xff
- stw r10, VCPU_XIVE_PUSHED(r9)
+ stb r10, VCPU_XIVE_PUSHED(r9)
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
eieio
1:
#endif /* CONFIG_KVM_XICS */
- /* Save more register state */
- mfdar r6
- mfdsisr r7
- std r6, VCPU_DAR(r9)
- stw r7, VCPU_DSISR(r9)
- /* don't overwrite fault_dar/fault_dsisr if HDSI */
- cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
- beq mc_cont
- std r6, VCPU_FAULT_DAR(r9)
- stw r7, VCPU_FAULT_DSISR(r9)
- /* See if it is a machine check */
- cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- beq machine_check_realmode
-mc_cont:
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
- addi r3, r9, VCPU_TB_RMEXIT
- mr r4, r9
- bl kvmhv_accumulate_time
-#endif
+ /* For hash guest, read the guest SLB and save it away */
+ ld r5, VCPU_KVM(r9)
+ lbz r0, KVM_RADIX(r5)
+ li r5, 0
+ cmpwi r0, 0
+ bne 3f /* for radix, save 0 entries */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+ addi r7,r9,VCPU_SLB
+1: slbmfee r8,r6
+ andis. r0,r8,SLB_ESID_V@h
+ beq 2f
+ add r8,r8,r6 /* put index in */
+ slbmfev r3,r6
+ std r8,VCPU_SLB_E(r7)
+ std r3,VCPU_SLB_V(r7)
+ addi r7,r7,VCPU_SLB_SIZE
+ addi r5,r5,1
+2: addi r6,r6,1
+ bdnz 1b
+ /* Finally clear out the SLB */
+ li r0,0
+ slbmte r0,r0
+ slbia
+ ptesync
+3: stw r5,VCPU_SLB_MAX(r9)
+guest_bypass:
mr r3, r12
/* Increment exit count, poke other threads to exit */
bl kvmhv_commence_exit
@@ -1501,31 +1578,6 @@ mc_cont:
ori r6,r6,1
mtspr SPRN_CTRLT,r6
4:
- /* Check if we are running hash or radix and store it in cr2 */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2,r0,0
-
- /* Read the guest SLB and save it away */
- li r5, 0
- bne cr2, 3f /* for radix, save 0 entries */
- lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
- mtctr r0
- li r6,0
- addi r7,r9,VCPU_SLB
-1: slbmfee r8,r6
- andis. r0,r8,SLB_ESID_V@h
- beq 2f
- add r8,r8,r6 /* put index in */
- slbmfev r3,r6
- std r8,VCPU_SLB_E(r7)
- std r3,VCPU_SLB_V(r7)
- addi r7,r7,VCPU_SLB_SIZE
- addi r5,r5,1
-2: addi r6,r6,1
- bdnz 1b
-3: stw r5,VCPU_SLB_MAX(r9)
-
/*
* Save the guest PURR/SPURR
*/
@@ -1803,7 +1855,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi cr2, r0, 0
- beq cr2, 3f
+ beq cr2, 4f
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
@@ -1839,15 +1891,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
- b 4f
+4:
#endif /* CONFIG_PPC_RADIX_MMU */
- /* Hash: clear out SLB */
-3: li r5,0
- slbmte r5,r5
- slbia
- ptesync
-4:
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
@@ -2745,7 +2791,32 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
- b guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+ /* Abort if we still have a pending escalation */
+ lbz r5, VCPU_XIVE_ESC_ON(r9)
+ cmpwi r5, 0
+ beq 1f
+ li r0, 0
+ stb r0, VCPU_CEDED(r9)
+1: /* Enable XIVE escalation */
+ li r5, XIVE_ESB_SET_PQ_00
+ mfmsr r0
+ andi. r0, r0, MSR_DR /* in real mode? */
+ beq 1f
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ ldx r0, r10, r5
+ b 2f
+1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ ldcix r0, r10, r5
+2: sync
+ li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+#endif /* CONFIG_KVM_XICS */
+3: b guest_exit_cont
/* Try to handle a machine check in real mode */
machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 901e6fe00c39..c18e845019ec 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -96,7 +96,7 @@ kvm_start_entry:
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- GET_SHADOW_VCPU(r3)
+ mr r3, r4
bl FUNC(kvmppc_copy_to_svcpu)
nop
REST_GPR(4, r1)
@@ -165,9 +165,7 @@ after_sprg3_load:
stw r12, VCPU_TRAP(r3)
/* Transfer reg values from shadow vcpu back to vcpu struct */
- /* On 64-bit, interrupts are still off at this point */
- GET_SHADOW_VCPU(r4)
bl FUNC(kvmppc_copy_from_svcpu)
nop
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7deaeeb14b93..3ae752314b34 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -121,7 +121,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
if (svcpu->in_use) {
- kvmppc_copy_from_svcpu(vcpu, svcpu);
+ kvmppc_copy_from_svcpu(vcpu);
}
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
@@ -143,9 +143,10 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
}
/* Copy data needed by real-mode code from vcpu to shadow vcpu */
-void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu)
+void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
svcpu->gpr[0] = vcpu->arch.gpr[0];
svcpu->gpr[1] = vcpu->arch.gpr[1];
svcpu->gpr[2] = vcpu->arch.gpr[2];
@@ -177,17 +178,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
if (cpu_has_feature(CPU_FTR_ARCH_207S))
vcpu->arch.entry_ic = mfspr(SPRN_IC);
svcpu->in_use = true;
+
+ svcpu_put(svcpu);
}
/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
-void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu)
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
{
- /*
- * vcpu_put would just call us again because in_use hasn't
- * been updated yet.
- */
- preempt_disable();
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
/*
* Maybe we were already preempted and synced the svcpu from
@@ -233,7 +231,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
svcpu->in_use = false;
out:
- preempt_enable();
+ svcpu_put(svcpu);
}
static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6882bc94eba8..f0f5cd4d2fe7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
{
struct kvm_vcpu *vcpu = data;
- /* We use the existing H_PROD mechanism to wake up the target */
- vcpu->arch.prodded = 1;
+ vcpu->arch.irq_pending = 1;
smp_mb();
if (vcpu->arch.ceded)
kvmppc_fast_vcpu_kick(vcpu);
+ /* Since we have the no-EOI flag, the interrupt is effectively
+ * disabled now. Clearing xive_esc_on means we won't bother
+ * doing so on the next entry.
+ *
+ * This also allows the entry code to know that if a PQ combination
+ * of 10 is observed while xive_esc_on is true, it means the queue
+ * contains an unprocessed escalation interrupt. We don't make use of
+ * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+ */
+ vcpu->arch.xive_esc_on = false;
+
return IRQ_HANDLED;
}
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO;
}
- /*
- * Future improvement: start with them disabled
- * and handle DD2 and later scheme of merged escalation
- * interrupts
- */
- name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
- vcpu->kvm->arch.lpid, xc->server_num, prio);
+ if (xc->xive->single_escalation)
+ name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num);
+ else
+ name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
prio, xc->server_num);
rc = -ENOMEM;
goto error;
}
+
+ pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
IRQF_NO_THREAD, name, vcpu);
if (rc) {
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
goto error;
}
xc->esc_virq_names[prio] = name;
+
+ /* In single escalation mode, we grab the ESB MMIO of the
+ * interrupt and mask it. Also populate the VCPU v/raddr
+ * of the ESB page for use by asm entry/exit code. Finally
+ * set the XIVE_IRQ_NO_EOI flag which will prevent the
+ * core code from performing an EOI on the escalation
+ * interrupt, thus leaving it effectively masked after
+ * it fires once.
+ */
+ if (xc->xive->single_escalation) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+ vcpu->arch.xive_esc_raddr = xd->eoi_page;
+ vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+ xd->flags |= XIVE_IRQ_NO_EOI;
+ }
+
return 0;
error:
irq_dispose_mapping(xc->esc_virq[prio]);
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
pr_devel("Provisioning prio... %d\n", prio);
- /* Provision each VCPU and enable escalations */
+ /* Provision each VCPU and enable escalations if needed */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
- if (rc == 0)
+ if (rc == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, prio);
if (rc)
return rc;
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
/* Allocate IPI */
xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) {
+ pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO;
goto bail;
}
@@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
goto bail;
/*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ if (r) {
+ pr_err("Failed to enable VP in OPAL, err %d\n", r);
+ goto bail;
+ }
+
+ /*
* Initialize queues. Initially we set them all for no queueing
* and we enable escalation for queue 0 only which we'll use for
* our mfrr change notifications. If the VCPU is hot-plugged, we
- * do handle provisioning however.
+ * do handle provisioning however based on the existing "map"
+ * of enabled queues.
*/
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
+ /* Single escalation, no queue 7 */
+ if (i == 7 && xive->single_escalation)
+ break;
+
/* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
- if (r == 0)
+ if (r == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, i);
if (r)
goto bail;
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
- /* Enable the VP */
- r = xive_native_enable_vp(xc->vp_id);
- if (r)
- goto bail;
-
/* Route the IPI */
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
if (!r)
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
val, server, guest_prio);
+
/*
* If the source doesn't already have an IPI, allocate
* one and get the corresponding data
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
if (xive->vp_base == XIVE_INVALID_VP)
ret = -ENOMEM;
+ xive->single_escalation = xive_native_has_single_escalation();
+
if (ret) {
kfree(xive);
return ret;
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
if (!xc)
continue;
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
xc->server_num, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ seq_printf(m, " [q%d]: ", i);
+
+ if (q->qpage) {
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+ xc->esc_virq[i], pq, xd->eoi_page);
+ seq_printf(m, "\n");
+ }
+ }
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
u32 q_order;
u32 q_page_order;
+ /* Flags */
+ u8 single_escalation;
};
#define KVMPPC_XIVE_Q_COUNT 8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
* is as follow.
*
* Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
*
* Similar mapping is done for CPPR values
*/
static inline u8 xive_prio_from_guest(u8 prio)
{
- if (prio == 0xff || prio < 8)
+ if (prio == 0xff || prio < 6)
return prio;
- return 7;
+ return 6;
}
static inline u8 xive_prio_to_guest(u8 prio)
{
- if (prio == 0xff || prio < 7)
- return prio;
- return 0xb;
+ return prio;
}
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 83b485810aea..6038e2e7aee0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1431,6 +1431,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
regs->pc = vcpu->arch.pc;
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
@@ -1452,6 +1454,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -1459,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
vcpu->arch.pc = regs->pc;
kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
@@ -1480,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -1607,30 +1613,42 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
sregs->pvr = vcpu->arch.pvr;
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- int ret;
+ int ret = -EINVAL;
+ vcpu_load(vcpu);
if (vcpu->arch.pvr != sregs->pvr)
- return -EINVAL;
+ goto out;
ret = set_sregs_base(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
ret = set_sregs_arch206(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
@@ -1773,7 +1791,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
{
int r;
+ vcpu_load(vcpu);
r = kvmppc_core_vcpu_translate(vcpu, tr);
+ vcpu_put(vcpu);
return r;
}
@@ -1996,12 +2016,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
struct debug_reg *dbg_reg;
int n, b = 0, w = 0;
+ int ret = 0;
+
+ vcpu_load(vcpu);
if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
vcpu->arch.dbg_reg.dbcr0 = 0;
vcpu->guest_debug = 0;
kvm_guest_protect_msr(vcpu, MSR_DE, false);
- return 0;
+ goto out;
}
kvm_guest_protect_msr(vcpu, MSR_DE, true);
@@ -2033,8 +2056,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
#endif
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- return 0;
+ goto out;
+ ret = -EINVAL;
for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
uint64_t addr = dbg->arch.bp[n].addr;
uint32_t type = dbg->arch.bp[n].type;
@@ -2045,21 +2069,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (type & ~(KVMPPC_DEBUG_WATCH_READ |
KVMPPC_DEBUG_WATCH_WRITE |
KVMPPC_DEBUG_BREAKPOINT))
- return -EINVAL;
+ goto out;
if (type & KVMPPC_DEBUG_BREAKPOINT) {
/* Setting H/W breakpoint */
if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
- return -EINVAL;
+ goto out;
} else {
/* Setting H/W watchpoint */
if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
type, w++))
- return -EINVAL;
+ goto out;
}
}
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index af833531af31..a382e15135e6 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,6 +58,18 @@ static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
+ kvmppc_core_queue_vec_unavail(vcpu);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_ALTIVEC */
+
/*
* XXX to do:
* lfiwax, lfiwzx
@@ -98,6 +110,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
vcpu->arch.mmio_sp64_extend = 0;
vcpu->arch.mmio_sign_extend = 0;
+ vcpu->arch.mmio_vmx_copy_nums = 0;
switch (get_op(inst)) {
case 31:
@@ -459,6 +472,29 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
rs, 4, 1);
break;
#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+ case OP_31_XOP_LVX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.vaddr_accessed &= ~0xFULL;
+ vcpu->arch.paddr_accessed &= ~0xFULL;
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
+ KVM_MMIO_REG_VMX|rt, 1);
+ break;
+
+ case OP_31_XOP_STVX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.vaddr_accessed &= ~0xFULL;
+ vcpu->arch.paddr_accessed &= ~0xFULL;
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
+ rs, 1);
+ break;
+#endif /* CONFIG_ALTIVEC */
+
default:
emulated = EMULATE_FAIL;
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0a7c88786ec0..403e642c78f5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -638,8 +638,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_SPAPR_RESIZE_HPT:
- /* Disable this on POWER9 until code handles new HPTE format */
- r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
+ r = !!hv_enabled;
break;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -763,7 +762,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = ~(u64)0;
+ vcpu->arch.dec_expires = get_tb();
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
@@ -930,6 +929,34 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+ u32 hi, lo;
+ u32 di;
+
+#ifdef __BIG_ENDIAN
+ hi = gpr >> 32;
+ lo = gpr & 0xffffffff;
+#else
+ lo = gpr >> 32;
+ hi = gpr & 0xffffffff;
+#endif
+
+ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
+ if (di > 1)
+ return;
+
+ if (vcpu->arch.mmio_host_swabbed)
+ di = 1 - di;
+
+ VCPU_VSX_VR(vcpu, index).u[di * 2] = hi;
+ VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo;
+}
+#endif /* CONFIG_ALTIVEC */
+
#ifdef CONFIG_PPC_FPU
static inline u64 sp_to_dp(u32 fprs)
{
@@ -1033,6 +1060,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_vsr_dword_dump(vcpu, gpr);
break;
#endif
+#ifdef CONFIG_ALTIVEC
+ case KVM_MMIO_REG_VMX:
+ kvmppc_set_vmx_dword(vcpu, gpr);
+ break;
+#endif
default:
BUG();
}
@@ -1106,11 +1138,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
enum emulation_result emulated = EMULATE_DONE;
- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
return EMULATE_FAIL;
- }
while (vcpu->arch.mmio_vsx_copy_nums) {
emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
@@ -1252,11 +1282,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.io_gpr = rs;
- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
return EMULATE_FAIL;
- }
while (vcpu->arch.mmio_vsx_copy_nums) {
if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
@@ -1312,6 +1340,111 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+/* handle quadword load access in two halves */
+int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, int is_default_endian)
+{
+ enum emulation_result emulated;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
+ is_default_endian, 0);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ }
+
+ return emulated;
+}
+
+static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+ vector128 vrs = VCPU_VSX_VR(vcpu, rs);
+ u32 di;
+ u64 w0, w1;
+
+ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
+ if (di > 1)
+ return -1;
+
+ if (vcpu->arch.mmio_host_swabbed)
+ di = 1 - di;
+
+ w0 = vrs.u[di * 2];
+ w1 = vrs.u[di * 2 + 1];
+
+#ifdef __BIG_ENDIAN
+ *val = (w0 << 32) | w1;
+#else
+ *val = (w1 << 32) | w0;
+#endif
+ return 0;
+}
+
+/* handle quadword store in two halves */
+int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rs, int is_default_endian)
+{
+ u64 val = 0;
+ enum emulation_result emulated = EMULATE_DONE;
+
+ vcpu->arch.io_gpr = rs;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1)
+ return EMULATE_FAIL;
+
+ emulated = kvmppc_handle_store(run, vcpu, val, 8,
+ is_default_endian);
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ enum emulation_result emulated = EMULATE_FAIL;
+ int r;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ if (!vcpu->mmio_is_write) {
+ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
+ vcpu->arch.io_gpr, 1);
+ } else {
+ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
+ vcpu->arch.io_gpr, 1);
+ }
+
+ switch (emulated) {
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ r = RESUME_HOST;
+ break;
+ default:
+ r = RESUME_GUEST;
+ break;
+ }
+ return r;
+}
+#endif /* CONFIG_ALTIVEC */
+
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = 0;
@@ -1413,6 +1546,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
+ vcpu_load(vcpu);
+
if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0;
if (!vcpu->mmio_is_write)
@@ -1427,7 +1562,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
if (r == RESUME_HOST) {
vcpu->mmio_needed = 1;
- return r;
+ goto out;
+ }
+ }
+#endif
+#ifdef CONFIG_ALTIVEC
+ if (vcpu->arch.mmio_vmx_copy_nums > 0)
+ vcpu->arch.mmio_vmx_copy_nums--;
+
+ if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+ r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
+ if (r == RESUME_HOST) {
+ vcpu->mmio_needed = 1;
+ goto out;
}
}
#endif
@@ -1461,6 +1608,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
+out:
+ vcpu_put(vcpu);
return r;
}
@@ -1608,23 +1757,31 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- long r;
- switch (ioctl) {
- case KVM_INTERRUPT: {
+ if (ioctl == KVM_INTERRUPT) {
struct kvm_interrupt irq;
- r = -EFAULT;
if (copy_from_user(&irq, argp, sizeof(irq)))
- goto out;
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- goto out;
+ return -EFAULT;
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ vcpu_load(vcpu);
+ switch (ioctl) {
case KVM_ENABLE_CAP:
{
struct kvm_enable_cap cap;
@@ -1664,6 +1821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
out:
+ vcpu_put(vcpu);
return r;
}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index e44d2b2ea97e..1c03c978eb18 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
int i;
u64 min, max, sum, sum_quad;
- seq_printf(m, "%s", "type count min max sum sum_squared\n");
-
+ seq_puts(m, "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 1604110c4238..916844f99c64 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
u32 i, n_lmbs;
n_lmbs = of_read_number(prop++, 1);
+ if (n_lmbs == 0)
+ return;
for (i = 0; i < n_lmbs; i++) {
read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
u32 i, j, lmb_sets;
lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
for (i = 0; i < lmb_sets; i++) {
read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
struct drmem_lmb *lmb;
drmem_info->n_lmbs = of_read_number(prop++, 1);
+ if (drmem_info->n_lmbs == 0)
+ return;
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
int lmb_index;
lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
/* first pass, calculate the number of LMBs */
p = prop;
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 5a69b51d08a3..d573d7d07f25 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 2253bbc6a599..e601d95c3b20 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn = hpt_vpn(ea, vsid, ssize);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
/*
*None of the sub 4k page is hashed
*/
@@ -214,7 +214,7 @@ repeat:
return -1;
}
- new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7d07c7e17db6..cf290d415dcd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
__pmd_index_size = H_PMD_INDEX_SIZE;
__pud_index_size = H_PUD_INDEX_SIZE;
__pgd_index_size = H_PGD_INDEX_SIZE;
+ __pud_cache_index = H_PUD_CACHE_INDEX;
__pmd_cache_index = H_PMD_CACHE_INDEX;
__pte_table_size = H_PTE_TABLE_SIZE;
__pmd_table_size = H_PMD_TABLE_SIZE;
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 12511f5a015f..b320f5097a06 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
- long slot;
+ long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ if (unlikely(mmu_psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
+ rpte = __real_pte(__pte(old_pte), ptep, offset);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
}
/*
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index eb8c6c8c4851..2b656e67f2ea 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
- if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
- pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+ if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+ pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 314d19ab9385..edd8d0bc9364 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
}
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
- numa_cpu_lookup_table[cpu] = node;
-}
-
static void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 573a9a2ee455..2e10a964e290 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -17,9 +17,11 @@
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+ /*
+ * The init_mm context is given the first available (non-zero) PID,
+ * which is the "guard PID" and contains no page table. PIDR should
+ * never be set to zero because that duplicates the kernel address
+ * space at the 0x0... offset (quadrant 0)!
+ *
+ * An arbitrary PID that may later be allocated by the PID allocator
+ * for userspace processes must not be used either, because that
+ * would cause stale user mappings for that PID on CPUs outside of
+ * the TLB invalidation scheme (because it won't be in mm_cpumask).
+ *
+ * So permanently carve out one PID for the purpose of a guard PID.
+ */
+ init_mm.context.id = mmu_base_pid;
+ mmu_base_pid++;
}
static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
__pmd_index_size = RADIX_PMD_INDEX_SIZE;
__pud_index_size = RADIX_PUD_INDEX_SIZE;
__pgd_index_size = RADIX_PGD_INDEX_SIZE;
+ __pud_cache_index = RADIX_PUD_INDEX_SIZE;
__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
__pte_table_size = RADIX_PTE_TABLE_SIZE;
__pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
radix_init_iamr();
radix_init_pgtable();
-
+ /* Switch to the guard PID before turning on MMU */
+ radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
}
radix_init_iamr();
+ radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
}
+struct change_mapping_params {
+ pte_t *pte;
+ unsigned long start;
+ unsigned long end;
+ unsigned long aligned_start;
+ unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+ struct change_mapping_params *params =
+ (struct change_mapping_params *)data;
+
+ if (!data)
+ return -1;
+
+ spin_unlock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, params->aligned_start, params->pte);
+ create_physical_mapping(params->aligned_start, params->start);
+ create_physical_mapping(params->end, params->aligned_end);
+ spin_lock(&init_mm.page_table_lock);
+ return 0;
+}
+
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
unsigned long end)
{
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+ unsigned long size, pte_t *pte)
+{
+ unsigned long mask = ~(size - 1);
+ unsigned long aligned_start = addr & mask;
+ unsigned long aligned_end = addr + size;
+ struct change_mapping_params params;
+ bool split_region = false;
+
+ if ((end - addr) < size) {
+ /*
+ * We're going to clear the PTE, but not flushed
+ * the mapping, time to remap and flush. The
+ * effects if visible outside the processor or
+ * if we are running in code close to the
+ * mapping we cleared, we are in trouble.
+ */
+ if (overlaps_kernel_text(aligned_start, addr) ||
+ overlaps_kernel_text(end, aligned_end)) {
+ /*
+ * Hack, just return, don't pte_clear
+ */
+ WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+ "text, not splitting\n", addr, end);
+ return;
+ }
+ split_region = true;
+ }
+
+ if (split_region) {
+ params.pte = pte;
+ params.start = addr;
+ params.end = end;
+ params.aligned_start = addr & ~(size - 1);
+ params.aligned_end = min_t(unsigned long, aligned_end,
+ (unsigned long)__va(memblock_end_of_DRAM()));
+ stop_machine(stop_machine_change_mapping, &params, NULL);
+ return;
+ }
+
+ pte_clear(&init_mm, addr, pte);
+}
+
static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_huge(*pmd)) {
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pmd);
+ split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_huge(*pud)) {
- if (!IS_ALIGNED(addr, PUD_SIZE) ||
- !IS_ALIGNED(next, PUD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pud);
+ split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
continue;
if (pgd_huge(*pgd)) {
- if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
- !IS_ALIGNED(next, PGDIR_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pgd);
+ split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c9a623c2d8a2..28c980eb4422 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
EXPORT_SYMBOL(__pgd_index_size);
unsigned long __pmd_cache_index;
EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
unsigned long __pte_table_size;
EXPORT_SYMBOL(__pte_table_size);
unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
} else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 881ebd53ffc2..9b23f12e863c 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
unsigned int psize;
int ssize;
real_pte_t rpte;
- int i;
+ int i, offset;
i = batch->index;
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
psize = get_slice_psize(mm, addr);
/* Mask the address for the correct page size */
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+ if (unlikely(psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
#else
BUG();
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
* support 64k pages, this might be different from the
* hardware page size encoded in the slice table. */
addr &= PAGE_MASK;
+ offset = PTRS_PER_PTE;
}
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
}
WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
- rpte = __real_pte(__pte(pte), ptep);
+ rpte = __real_pte(__pte(pte), ptep, offset);
/*
* Check if we have an active batch on this CPU. If not, just
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index 1a9a756b0b2f..857580a78bbd 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -101,9 +101,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
but first mark any pending interrupts as done so
we don't get woken up unnecessarily */
- if (events & (POLLIN | POLLRDNORM)) {
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
if (stat & 0xff0000)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
else {
ctx->csa.priv1.int_stat_class2_RW &=
~CLASS2_MAILBOX_INTR;
@@ -111,9 +111,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
CLASS2_ENABLE_MAILBOX_INTR;
}
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
if (stat & 0x00ff00)
- ret = POLLOUT | POLLWRNORM;
+ ret = EPOLLOUT | EPOLLWRNORM;
else {
ctx->csa.priv1.int_stat_class2_RW &=
~CLASS2_MAILBOX_THRESHOLD_INTR;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c1be486da899..469bdd0b748f 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -774,7 +774,7 @@ static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait)
* that poll should not sleep. Will be fixed later.
*/
mutex_lock(&ctx->state_mutex);
- mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM);
spu_release(ctx);
return mask;
@@ -910,7 +910,7 @@ static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait)
* that poll should not sleep. Will be fixed later.
*/
mutex_lock(&ctx->state_mutex);
- mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM);
spu_release(ctx);
return mask;
@@ -1710,9 +1710,9 @@ static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait)
mask = 0;
if (free_elements & 0xffff)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (tagstatus & ctx->tagwait)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
free_elements, tagstatus, ctx->tagwait);
@@ -2469,7 +2469,7 @@ static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait)
return rc;
if (spufs_switch_log_used(ctx) > 0)
- mask |= POLLIN;
+ mask |= EPOLLIN;
spu_release(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index fff58198b5b6..ae9d24d31eed 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -70,17 +70,17 @@ static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events)
but first mark any pending interrupts as done so
we don't get woken up unnecessarily */
- if (events & (POLLIN | POLLRDNORM)) {
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
if (stat & 0xff0000)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
else {
spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
}
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
if (stat & 0x00ff00)
- ret = POLLOUT | POLLWRNORM;
+ ret = EPOLLOUT | EPOLLWRNORM;
else {
spu_int_stat_clear(spu, 2,
CLASS2_MAILBOX_THRESHOLD_INTR);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index dd4c9b8b8a81..f6f55ab4980e 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
const struct cpumask *l_cpumask;
get_online_cpus();
- for_each_online_node(nid) {
+ for_each_node_with_cpus(nid) {
l_cpumask = cpumask_of_node(nid);
- cpu = cpumask_first(l_cpumask);
+ cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ continue;
opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(cpu));
}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index c18de0a9b1bd..4070bb4e9da4 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -153,7 +153,7 @@ static __poll_t opal_prd_poll(struct file *file,
poll_wait(file, &opal_prd_msg_wait, wait);
if (!opal_msg_queue_empty())
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 2b3eb01ab110..b7c53a51c31b 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
rc = PTR_ERR(txwin->paste_kaddr);
goto free_window;
}
+ } else {
+ /*
+ * A user mapping must ensure that context switch issues
+ * CP_ABORT for this thread.
+ */
+ rc = set_thread_uses_vas();
+ if (rc)
+ goto free_window;
}
- /*
- * Now that we have a send window, ensure context switch issues
- * CP_ABORT for this thread.
- */
- rc = -EINVAL;
- if (set_thread_uses_vas() < 0)
- goto free_window;
-
set_vinst_win(vinst, txwin);
return txwin;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index dceb51454d8d..652d3e96b812 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
#include "pseries.h"
#include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
+ update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
-extern int find_and_online_cpu_nid(int cpu);
-
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 81d8614e7379..5e1ef9150182 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+ struct device_node *np;
+
+ /* Hotplug Events */
+ np = of_find_node_by_path("/event-sources/hot-plug-events");
+ if (np != NULL) {
+ if (dlpar_workqueue_init() == 0)
+ request_event_sources_irqs(np, ras_hotplug_interrupt,
+ "RAS_HOTPLUG");
+ of_node_put(np);
+ }
+
+ return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
+/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
*/
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
of_node_put(np);
}
- /* Hotplug Events */
- np = of_find_node_by_path("/event-sources/hot-plug-events");
- if (np != NULL) {
- if (dlpar_workqueue_init() == 0)
- request_event_sources_irqs(np, ras_hotplug_interrupt,
- "RAS_HOTPLUG");
- of_node_put(np);
- }
-
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
break;
}
+ /* Do we support single escalation */
+ if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+ xive_has_single_esc = true;
+
/* Configure Thread Management areas for KVM */
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
}
EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
{
s64 rc;
+ u64 flags = OPAL_XIVE_VP_ENABLED;
+ if (single_escalation)
+ flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
for (;;) {
- rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+ rc = opal_xive_set_vp_info(vp_id, flags, 0);
if (rc != OPAL_BUSY)
break;
msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
return 0;
}
EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+ return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index d9c4c9366049..091f1d0d0af1 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
if (rc) {
- pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+ pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
goto fail;
}
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
/* Configure and enable the queue in HW */
rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
if (rc) {
- pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
} else {
q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
if (IS_ERR(qpage))
return PTR_ERR(qpage);
- return xive_spapr_configure_queue(cpu, q, prio, qpage,
- xive_queue_shift);
+ return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+ q, prio, qpage, xive_queue_shift);
}
static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
struct xive_q *q = &xc->queue[prio];
unsigned int alloc_order;
long rc;
+ int hw_cpu = get_hard_smp_processor_id(cpu);
- rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+ rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
if (rc)
- pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+ hw_cpu, prio);
alloc_order = xive_alloc_order(xive_queue_shift);
free_pages((unsigned long)q->qpage, alloc_order);