diff options
Diffstat (limited to 'arch/powerpc')
60 files changed, 939 insertions, 313 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 30a155c0a6b0..c615abdce119 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -16,6 +16,7 @@ #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) #define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define PUD_CACHE_INDEX PUD_INDEX_SIZE #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 949d691094a4..67c5475311ee 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd) * keeping the prototype consistent across the two formats. */ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, - unsigned int subpg_index, unsigned long hidx) + unsigned int subpg_index, unsigned long hidx, + int offset) { return (hidx << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 338b7da468ce..3bcf269f8f55 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -45,7 +45,7 @@ * generic accessors and iterators here */ #define __real_pte __real_pte -static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) { real_pte_t rpte; unsigned long *hidxp; @@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) */ smp_rmb(); - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + hidxp = (unsigned long *)(ptep + offset); rpte.hidx = *hidxp; return rpte; } @@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) * expected to modify the PTE bits accordingly and commit the PTE to memory. */ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, - unsigned int subpg_index, unsigned long hidx) + unsigned int subpg_index, + unsigned long hidx, int offset) { - unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + unsigned long *hidxp = (unsigned long *)(ptep + offset); rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); @@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a } #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE) #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ (sizeof(unsigned long) << PMD_INDEX_SIZE)) #else #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif +#ifdef CONFIG_HUGETLB_PAGE +#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \ + (sizeof(unsigned long) << PUD_INDEX_SIZE)) +#else #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#endif #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0920eff731b3..935adcd92a81 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -23,7 +23,8 @@ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) +#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \ + defined(CONFIG_PPC_64K_PAGES) /* * only with hash 64k we need to use the second half of pmd page table * to store pointer to deposited pgtable_t @@ -33,6 +34,16 @@ #define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE #endif /* + * We store the slot details in the second half of page table. + * Increase the pud level table so that hugetlb ptes can be stored + * at pud level. + */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) +#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1) +#else +#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE) +#endif +/* * Define the address range of the kernel non-linear virtual area */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 1fcfa425cefa..4746bc68d446 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { + pgd_t *pgd; + if (radix_enabled()) return radix__pgd_alloc(mm); - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); + + pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgtable_gfp_flags(mm, GFP_KERNEL)); + memset(pgd, 0, PGD_TABLE_SIZE); + + return pgd; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX), pgtable_gfp_flags(mm, GFP_KERNEL)); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, * ahead and flush the page walk cache */ flush_tlb_pgtable(tlb, address); - pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); + pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 51017726d495..a6b9f1d74600 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size; extern unsigned long __pud_index_size; extern unsigned long __pgd_index_size; extern unsigned long __pmd_cache_index; +extern unsigned long __pud_cache_index; #define PTE_INDEX_SIZE __pte_index_size #define PMD_INDEX_SIZE __pmd_index_size #define PUD_INDEX_SIZE __pud_index_size #define PGD_INDEX_SIZE __pgd_index_size #define PMD_CACHE_INDEX __pmd_cache_index +#define PUD_CACHE_INDEX __pud_cache_index /* * Because of use of pte fragments and THP, size of page table * are not always derived out of index size above. @@ -348,7 +350,7 @@ extern unsigned long pci_io_base; */ #ifndef __real_pte -#define __real_pte(e,p) ((real_pte_t){(e)}) +#define __real_pte(e, p, o) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) #define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 176dfb73d42c..471b2274fbeb 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 88e5e8f17e98..855e17d158b1 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -30,6 +30,16 @@ #define PACA_IRQ_PMI 0x40 /* + * Some soft-masked interrupts must be hard masked until they are replayed + * (e.g., because the soft-masked handler does not clear the exception). + */ +#ifdef CONFIG_PPC_BOOK3S +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI) +#else +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) +#endif + +/* * flags for paca->irq_soft_mask */ #define IRQS_ENABLED 0 @@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void) static inline void may_hard_irq_enable(void) { get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - if (!(get_paca()->irq_happened & PACA_IRQ_EE)) + if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) __hard_irq_enable(); } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 9dcbfa6bbb91..d8b1e8e7e035 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void) return false; } +static inline void crash_ipi_callback(struct pt_regs *regs) { } + +static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ +} + #endif /* CONFIG_KEXEC_CORE */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 9a667007bff8..376ae803b69c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -249,10 +249,8 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); extern int kvmppc_hcall_impl_pr(unsigned long cmd); extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); -extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, - struct kvm_vcpu *vcpu); -extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, - struct kvmppc_book3s_shadow_vcpu *svcpu); +extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); +extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 735cfa35298a..998f7b7aaa9e 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l) lphi = (l >> 16) & 0xf; switch ((l >> 12) & 0xf) { case 0: - return !lphi ? 24 : -1; /* 16MB */ + return !lphi ? 24 : 0; /* 16MB */ break; case 1: return 16; /* 64kB */ break; case 3: - return !lphi ? 34 : -1; /* 16GB */ + return !lphi ? 34 : 0; /* 16GB */ break; case 7: return (16 << 8) + 12; /* 64kB in 4kB */ @@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l) return (24 << 8) + 12; /* 16MB in 4kB */ break; } - return -1; + return 0; } static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l) @@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r) { - return 1ul << kvmppc_hpte_actual_page_shift(v, r); + int shift = kvmppc_hpte_actual_page_shift(v, r); + + if (shift) + return 1ul << shift; + return 0; } static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift) @@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, va_low ^= v >> (SID_SHIFT_1T - 16); va_low &= 0x7ff; - if (b_pgshift == 12) { + if (b_pgshift <= 12) { if (a_pgshift > 12) { sllp = (a_pgshift == 16) ? 5 : 4; rb |= sllp << 5; /* AP field */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3aa5b577cd60..1f53b562726f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -690,6 +690,7 @@ struct kvm_vcpu_arch { u8 mmio_vsx_offset; u8 mmio_vsx_copy_type; u8 mmio_vsx_tx_sx_enabled; + u8 mmio_vmx_copy_nums; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; @@ -709,6 +710,7 @@ struct kvm_vcpu_arch { u8 ceded; u8 prodded; u8 doorbell_request; + u8 irq_pending; /* Used by XIVE to signal pending guest irqs */ u32 last_inst; struct swait_queue_head *wqp; @@ -738,8 +740,11 @@ struct kvm_vcpu_arch { struct kvmppc_icp *icp; /* XICS presentation controller */ struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */ __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */ - u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */ + u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */ + u8 xive_esc_on; /* Is the escalation irq enabled ? */ union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */ + u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */ + u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */ #endif #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -800,6 +805,7 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_QPR 0x0040 #define KVM_MMIO_REG_FQPR 0x0060 #define KVM_MMIO_REG_VSX 0x0080 +#define KVM_MMIO_REG_VMX 0x00c0 #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9db18287b5f4..7765a800ddae 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend); +extern int kvmppc_handle_load128_by2x64(struct kvm_run *run, + struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian); +extern int kvmppc_handle_store128_by2x64(struct kvm_run *run, + struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 504a3c36ce5c..03bbd1149530 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -24,6 +24,7 @@ extern int icache_44x_need_flush; #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) #define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define PUD_CACHE_INDEX PUD_INDEX_SIZE #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index abddf5830ad5..5c5f75d005ad 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -27,6 +27,7 @@ #else #define PMD_CACHE_INDEX PMD_INDEX_SIZE #endif +#define PUD_CACHE_INDEX PUD_INDEX_SIZE /* * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 24c73f5575ee..94bd1bf2c873 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1076,6 +1076,7 @@ enum { /* Flags for OPAL_XIVE_GET/SET_VP_INFO */ enum { OPAL_XIVE_VP_ENABLED = 0x00000001, + OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002, }; /* "Any chip" replacement for chip ID for allocation functions */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ab5c1588b487..f1083bcf449c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -156,6 +156,12 @@ #define OP_31_XOP_LFDX 599 #define OP_31_XOP_LFDUX 631 +/* VMX Vector Load Instructions */ +#define OP_31_XOP_LVX 103 + +/* VMX Vector Store Instructions */ +#define OP_31_XOP_STVX 231 + #define OP_LWZ 32 #define OP_STFS 52 #define OP_STFSU 53 diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 88187c285c70..9f421641a35c 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); extern int numa_update_cpu_topology(bool cpus_locked); +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) +{ + numa_cpu_lookup_table[cpu] = node; +} + static inline int early_cpu_to_node(int cpu) { int nid; @@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked) { return 0; } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int start_topology_update(void); extern int stop_topology_update(void); extern int prrn_is_enabled(void); +extern int find_and_online_cpu_nid(int cpu); #else static inline int start_topology_update(void) { @@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void) { return 0; } +static inline int find_and_online_cpu_nid(int cpu) +{ + return 0; +} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 7624e22f5045..8d1a2792484f 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); extern void xive_native_sync_source(u32 hw_irq); extern bool is_xive_irq(struct irq_chip *chip); -extern int xive_native_enable_vp(u32 vp_id); +extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); extern int xive_native_disable_vp(u32 vp_id); extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); +extern bool xive_native_has_single_escalation(void); #else diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 637b7263cb86..833ed9a16adf 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) +#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 88b84ac76b53..ea5eb91b836e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -520,6 +520,7 @@ int main(void) OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); + OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); @@ -739,6 +740,9 @@ int main(void) DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, arch.xive_cam_word)); DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); + DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on)); + DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr)); + DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr)); #endif #ifdef CONFIG_KVM_EXIT_TIMING diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index ee832d344a5a..9b6e653e501a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -943,6 +943,8 @@ kernel_dbg_exc: /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable + * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so + * keep these in synch. */ .macro masked_interrupt_book3e paca_irq full_mask diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 243d072a225a..3ac87e53b3da 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode * and it won't refire. - * - else we hard disable and return. + * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ #define MASKED_INTERRUPT(_H) \ @@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ b MASKED_DEC_HANDLER_LABEL; \ -1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \ - bne 2f; \ +1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \ + beq 2f; \ mfspr r10,SPRN_##_H##SRR1; \ xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index ae2ede4de6be..446c79611d56 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -362,7 +362,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) */ static int pci_read_irq_line(struct pci_dev *pci_dev) { - unsigned int virq = 0; + int virq; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -370,7 +370,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) memset(&oirq, 0xff, sizeof(oirq)); #endif /* Try to get a mapping from the device-tree */ - if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) { + virq = of_irq_parse_and_map_pci(pci_dev, 0, 0); + if (virq <= 0) { u8 line, pin; /* If that fails, lets fallback to what is in the config diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index fc600a8b1e77..f915db93cd42 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -392,7 +392,7 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait) { poll_wait(file, &rtas_log_wait, wait); if (rtas_log_size) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; return 0; } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5a8bfee6e187..04d0bbd7a1dd 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_create_file(s, &dev_attr_pir); - if (cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_ARCH_206) && + !firmware_has_feature(FW_FEATURE_LPAR)) device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ @@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_remove_file(s, &dev_attr_pir); - if (cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_ARCH_206) && + !firmware_has_feature(FW_FEATURE_LPAR)) device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index b12b8eb39c29..68a0e9d5b440 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_EVENTFD + select HAVE_KVM_VCPU_ASYNC_IOCTL select SRCU select KVM_VFIO select IRQ_BYPASS_MANAGER @@ -68,7 +69,7 @@ config KVM_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE - select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV) + select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV) ---help--- Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 72d977e30952..234531d1bee1 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -484,19 +484,33 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + int ret; + + vcpu_load(vcpu); + ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + vcpu_put(vcpu); + + return ret; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); + int ret; + + vcpu_load(vcpu); + ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); + vcpu_put(vcpu); + + return ret; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + regs->pc = kvmppc_get_pc(vcpu); regs->cr = kvmppc_get_cr(vcpu); regs->ctr = kvmppc_get_ctr(vcpu); @@ -518,6 +532,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + vcpu_put(vcpu); return 0; } @@ -525,6 +540,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + kvmppc_set_pc(vcpu, regs->pc); kvmppc_set_cr(vcpu, regs->cr); kvmppc_set_ctr(vcpu, regs->ctr); @@ -545,6 +562,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + vcpu_put(vcpu); return 0; } @@ -737,7 +755,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { + vcpu_load(vcpu); vcpu->guest_debug = dbg->control; + vcpu_put(vcpu); return 0; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b73dbc9e797d..ef243fed2f2b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1269,6 +1269,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, /* Nothing to do */ goto out; + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + rpte = be64_to_cpu(hptep[1]); + vpte = hpte_new_to_old_v(vpte, rpte); + } + /* Unmap */ rev = &old->rev[idx]; guest_rpte = rev->guest_rpte; @@ -1298,7 +1303,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, /* Reload PTE after unmap */ vpte = be64_to_cpu(hptep[0]); - BUG_ON(vpte & HPTE_V_VALID); BUG_ON(!(vpte & HPTE_V_ABSENT)); @@ -1307,6 +1311,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, goto out; rpte = be64_to_cpu(hptep[1]); + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + vpte = hpte_new_to_old_v(vpte, rpte); + rpte = hpte_new_to_old_r(rpte); + } + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23); pteg = idx / HPTES_PER_GROUP; @@ -1337,17 +1347,17 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, } new_pteg = hash & new_hash_mask; - if (vpte & HPTE_V_SECONDARY) { - BUG_ON(~pteg != (hash & old_hash_mask)); - new_pteg = ~new_pteg; - } else { - BUG_ON(pteg != (hash & old_hash_mask)); - } + if (vpte & HPTE_V_SECONDARY) + new_pteg = ~hash & new_hash_mask; new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP); new_hptep = (__be64 *)(new->virt + (new_idx << 4)); replace_vpte = be64_to_cpu(new_hptep[0]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + unsigned long replace_rpte = be64_to_cpu(new_hptep[1]); + replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte); + } if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { BUG_ON(new->order >= old->order); @@ -1363,6 +1373,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, /* Discard the previous HPTE */ } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + rpte = hpte_old_to_new_r(vpte, rpte); + vpte = hpte_old_to_new_v(vpte); + } + new_hptep[1] = cpu_to_be64(rpte); new->rev[new_idx].guest_rpte = guest_rpte; /* No need for a barrier, since new HPT isn't active */ @@ -1380,12 +1395,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize) unsigned long i; int rc; - /* - * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs - * that POWER9 uses, and could well hit a BUG_ON on POWER9. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - return -EIO; for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) { rc = resize_hpt_rehash_hpte(resize, i); if (rc != 0) @@ -1416,6 +1425,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize) synchronize_srcu_expedited(&kvm->srcu); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + kvmppc_setup_partition_table(kvm); + resize_hpt_debug(resize, "resize_hpt_pivot() done\n"); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 58618f644c56..0c854816e653 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, j = i + 1; if (npages) { set_dirty_bits(map, i, npages); - i = j + npages; + j = i + npages; } } return 0; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e4f70c33fbc7..89707354c2ef 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -116,6 +116,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* If set, the threads on each CPU core have to be in the same MMU mode */ +static bool no_mixing_hpt_and_radix; + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -1003,8 +1006,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tvcpu; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return EMULATE_FAIL; if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE) return RESUME_GUEST; if (get_op(inst) != 31) @@ -1054,6 +1055,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* Called with vcpu->arch.vcore->lock held */ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -1174,7 +1176,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, swab32(vcpu->arch.emul_inst) : vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + /* Need vcore unlocked to call kvmppc_get_last_inst */ + spin_unlock(&vcpu->arch.vcore->lock); r = kvmppc_emulate_debug_inst(run, vcpu); + spin_lock(&vcpu->arch.vcore->lock); } else { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; @@ -1189,8 +1194,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, */ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: r = EMULATE_FAIL; - if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) + if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && + cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Need vcore unlocked to call kvmppc_get_last_inst */ + spin_unlock(&vcpu->arch.vcore->lock); r = kvmppc_emulate_doorbell_instr(vcpu); + spin_lock(&vcpu->arch.vcore->lock); + } if (r == EMULATE_FAIL) { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; @@ -1495,6 +1505,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_ARCH_COMPAT: *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; + case KVM_REG_PPC_DEC_EXPIRY: + *val = get_reg_val(id, vcpu->arch.dec_expires + + vcpu->arch.vcore->tb_offset); + break; default: r = -EINVAL; break; @@ -1722,6 +1736,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_ARCH_COMPAT: r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; + case KVM_REG_PPC_DEC_EXPIRY: + vcpu->arch.dec_expires = set_reg_val(id, *val) - + vcpu->arch.vcore->tb_offset; + break; default: r = -EINVAL; break; @@ -2376,8 +2394,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) static bool subcore_config_ok(int n_subcores, int n_threads) { /* - * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core - * mode, with one thread per subcore. + * POWER9 "SMT4" cores are permanently in what is effectively a 4-way + * split-core mode, with one thread per subcore. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return n_subcores <= 4 && n_threads == 1; @@ -2413,8 +2431,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (!cpu_has_feature(CPU_FTR_ARCH_207S)) return false; - /* POWER9 currently requires all threads to be in the same MMU mode */ - if (cpu_has_feature(CPU_FTR_ARCH_300) && + /* Some POWER9 chips require all threads to be in the same MMU mode */ + if (no_mixing_hpt_and_radix && kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) return false; @@ -2677,9 +2695,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. * On POWER9, we need to be not in independent-threads mode if - * this is a HPT guest on a radix host. + * this is a HPT guest on a radix host machine where the + * CPU threads may not be in different MMU modes. */ - hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm); + hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() && + !kvm_is_radix(vc->kvm); if (((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || (hpt_on_radix && vc->kvm->arch.threads_indep)) { @@ -2829,7 +2849,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if (!thr0_done) kvmppc_start_thread(NULL, pvc); - thr += pvc->num_threads; } /* @@ -2932,13 +2951,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); + preempt_enable(); + for (sub = 0; sub < core_info.n_subcores; ++sub) { pvc = core_info.vc[sub]; post_guest_process(pvc, pvc == vc); } spin_lock(&vc->lock); - preempt_enable(); out: vc->vcore_state = VCORE_INACTIVE; @@ -2985,7 +3005,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) { if (!xive_enabled()) return false; - return vcpu->arch.xive_saved_state.pipr < + return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < vcpu->arch.xive_saved_state.cppr; } #else @@ -3174,17 +3194,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * this thread straight away and have it join in. */ if (!signal_pending(current)) { - if (vc->vcore_state == VCORE_PIGGYBACK) { - if (spin_trylock(&vc->lock)) { - if (vc->vcore_state == VCORE_RUNNING && - !VCORE_IS_EXITING(vc)) { - kvmppc_create_dtl_entry(vcpu, vc); - kvmppc_start_thread(vcpu, vc); - trace_kvm_guest_enter(vcpu); - } - spin_unlock(&vc->lock); - } - } else if (vc->vcore_state == VCORE_RUNNING && + if ((vc->vcore_state == VCORE_PIGGYBACK || + vc->vcore_state == VCORE_RUNNING) && !VCORE_IS_EXITING(vc)) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu, vc); @@ -4446,6 +4457,19 @@ static int kvmppc_book3s_init_hv(void) if (kvmppc_radix_possible()) r = kvmppc_radix_init(); + + /* + * POWER9 chips before version 2.02 can't have some threads in + * HPT mode and some in radix mode on the same core. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + unsigned int pvr = mfspr(SPRN_PVR); + if ((pvr >> 16) == PVR_POWER9 && + (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) || + ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101))) + no_mixing_hpt_and_radix = true; + } + return r; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7886b313d135..f31f357b8c5a 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -413,10 +413,11 @@ FTR_SECTION_ELSE /* On P9 we use the split_info for coordinating LPCR changes */ lwz r4, KVM_SPLIT_DO_SET(r6) cmpwi r4, 0 - beq 63f + beq 1f mr r3, r6 bl kvmhv_p9_set_lpcr nop +1: ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 63: /* Order load of vcpu after load of vcore */ @@ -617,13 +618,6 @@ kvmppc_hv_entry: lbz r0, KVM_RADIX(r9) cmpwi cr7, r0, 0 - /* Clear out SLB if hash */ - bne cr7, 2f - li r6,0 - slbmte r6,r6 - slbia - ptesync -2: /* * POWER7/POWER8 host -> guest partition switch code. * We don't have to lock against concurrent tlbies, @@ -738,19 +732,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 10: cmpdi r4, 0 beq kvmppc_primary_no_guest kvmppc_got_guest: - - /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ - lwz r5,VCPU_SLB_MAX(r4) - cmpwi r5,0 - beq 9f - mtctr r5 - addi r6,r4,VCPU_SLB -1: ld r8,VCPU_SLB_E(r6) - ld r9,VCPU_SLB_V(r6) - slbmte r9,r8 - addi r6,r6,VCPU_SLB_SIZE - bdnz 1b -9: /* Increment yield count if they have a VPA */ ld r3, VCPU_VPA(r4) cmpdi r3, 0 @@ -957,7 +938,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mftb r7 subf r3,r7,r8 mtspr SPRN_DEC,r3 - std r3,VCPU_DEC(r4) ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) @@ -1018,6 +998,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon + /* For hash guest, clear out and reload the SLB */ + ld r6, VCPU_KVM(r4) + lbz r0, KVM_RADIX(r6) + cmpwi r0, 0 + bne 9f + li r6, 0 + slbmte r6, r6 + slbia + ptesync + + /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ + lwz r5,VCPU_SLB_MAX(r4) + cmpwi r5,0 + beq 9f + mtctr r5 + addi r6,r4,VCPU_SLB +1: ld r8,VCPU_SLB_E(r6) + ld r9,VCPU_SLB_V(r6) + slbmte r9,r8 + addi r6,r6,VCPU_SLB_SIZE + bdnz 1b +9: + #ifdef CONFIG_KVM_XICS /* We are entering the guest on that thread, push VCPU to XIVE */ ld r10, HSTATE_XIVE_TIMA_PHYS(r13) @@ -1031,8 +1034,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) li r9, TM_QW1_OS + TM_WORD2 stwcix r11,r9,r10 li r9, 1 - stw r9, VCPU_XIVE_PUSHED(r4) + stb r9, VCPU_XIVE_PUSHED(r4) eieio + + /* + * We clear the irq_pending flag. There is a small chance of a + * race vs. the escalation interrupt happening on another + * processor setting it again, but the only consequence is to + * cause a spurrious wakeup on the next H_CEDE which is not an + * issue. + */ + li r0,0 + stb r0, VCPU_IRQ_PENDING(r4) + + /* + * In single escalation mode, if the escalation interrupt is + * on, we mask it. + */ + lbz r0, VCPU_XIVE_ESC_ON(r4) + cmpwi r0,0 + beq 1f + ld r10, VCPU_XIVE_ESC_RADDR(r4) + li r9, XIVE_ESB_SET_PQ_01 + ldcix r0, r10, r9 + sync + + /* We have a possible subtle race here: The escalation interrupt might + * have fired and be on its way to the host queue while we mask it, + * and if we unmask it early enough (re-cede right away), there is + * a theorical possibility that it fires again, thus landing in the + * target queue more than once which is a big no-no. + * + * Fortunately, solving this is rather easy. If the above load setting + * PQ to 01 returns a previous value where P is set, then we know the + * escalation interrupt is somewhere on its way to the host. In that + * case we simply don't clear the xive_esc_on flag below. It will be + * eventually cleared by the handler for the escalation interrupt. + * + * Then, when doing a cede, we check that flag again before re-enabling + * the escalation interrupt, and if set, we abort the cede. + */ + andi. r0, r0, XIVE_ESB_VAL_P + bne- 1f + + /* Now P is 0, we can clear the flag */ + li r0, 0 + stb r0, VCPU_XIVE_ESC_ON(r4) +1: no_xive: #endif /* CONFIG_KVM_XICS */ @@ -1193,7 +1241,7 @@ hdec_soon: addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif - b guest_exit_cont + b guest_bypass /****************************************************************************** * * @@ -1423,15 +1471,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) blt deliver_guest_interrupt guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ + /* Save more register state */ + mfdar r6 + mfdsisr r7 + std r6, VCPU_DAR(r9) + stw r7, VCPU_DSISR(r9) + /* don't overwrite fault_dar/fault_dsisr if HDSI */ + cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE + beq mc_cont + std r6, VCPU_FAULT_DAR(r9) + stw r7, VCPU_FAULT_DSISR(r9) + + /* See if it is a machine check */ + cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK + beq machine_check_realmode +mc_cont: +#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING + addi r3, r9, VCPU_TB_RMEXIT + mr r4, r9 + bl kvmhv_accumulate_time +#endif #ifdef CONFIG_KVM_XICS /* We are exiting, pull the VP from the XIVE */ - lwz r0, VCPU_XIVE_PUSHED(r9) + lbz r0, VCPU_XIVE_PUSHED(r9) cmpwi cr0, r0, 0 beq 1f li r7, TM_SPC_PULL_OS_CTX li r6, TM_QW1_OS mfmsr r0 - andi. r0, r0, MSR_IR /* in real mode? */ + andi. r0, r0, MSR_DR /* in real mode? */ beq 2f ld r10, HSTATE_XIVE_TIMA_VIRT(r13) cmpldi cr0, r10, 0 @@ -1454,33 +1522,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Fixup some of the state for the next load */ li r10, 0 li r0, 0xff - stw r10, VCPU_XIVE_PUSHED(r9) + stb r10, VCPU_XIVE_PUSHED(r9) stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) eieio 1: #endif /* CONFIG_KVM_XICS */ - /* Save more register state */ - mfdar r6 - mfdsisr r7 - std r6, VCPU_DAR(r9) - stw r7, VCPU_DSISR(r9) - /* don't overwrite fault_dar/fault_dsisr if HDSI */ - cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE - beq mc_cont - std r6, VCPU_FAULT_DAR(r9) - stw r7, VCPU_FAULT_DSISR(r9) - /* See if it is a machine check */ - cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_realmode -mc_cont: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING - addi r3, r9, VCPU_TB_RMEXIT - mr r4, r9 - bl kvmhv_accumulate_time -#endif + /* For hash guest, read the guest SLB and save it away */ + ld r5, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r5) + li r5, 0 + cmpwi r0, 0 + bne 3f /* for radix, save 0 entries */ + lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ + mtctr r0 + li r6,0 + addi r7,r9,VCPU_SLB +1: slbmfee r8,r6 + andis. r0,r8,SLB_ESID_V@h + beq 2f + add r8,r8,r6 /* put index in */ + slbmfev r3,r6 + std r8,VCPU_SLB_E(r7) + std r3,VCPU_SLB_V(r7) + addi r7,r7,VCPU_SLB_SIZE + addi r5,r5,1 +2: addi r6,r6,1 + bdnz 1b + /* Finally clear out the SLB */ + li r0,0 + slbmte r0,r0 + slbia + ptesync +3: stw r5,VCPU_SLB_MAX(r9) +guest_bypass: mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit @@ -1501,31 +1578,6 @@ mc_cont: ori r6,r6,1 mtspr SPRN_CTRLT,r6 4: - /* Check if we are running hash or radix and store it in cr2 */ - ld r5, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r5) - cmpwi cr2,r0,0 - - /* Read the guest SLB and save it away */ - li r5, 0 - bne cr2, 3f /* for radix, save 0 entries */ - lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ - mtctr r0 - li r6,0 - addi r7,r9,VCPU_SLB -1: slbmfee r8,r6 - andis. r0,r8,SLB_ESID_V@h - beq 2f - add r8,r8,r6 /* put index in */ - slbmfev r3,r6 - std r8,VCPU_SLB_E(r7) - std r3,VCPU_SLB_V(r7) - addi r7,r7,VCPU_SLB_SIZE - addi r5,r5,1 -2: addi r6,r6,1 - bdnz 1b -3: stw r5,VCPU_SLB_MAX(r9) - /* * Save the guest PURR/SPURR */ @@ -1803,7 +1855,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) cmpwi cr2, r0, 0 - beq cr2, 3f + beq cr2, 4f /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) @@ -1839,15 +1891,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) - b 4f +4: #endif /* CONFIG_PPC_RADIX_MMU */ - /* Hash: clear out SLB */ -3: li r5,0 - slbmte r5,r5 - slbia - ptesync -4: /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -2745,7 +2791,32 @@ kvm_cede_prodded: /* we've ceded but we want to give control to the host */ kvm_cede_exit: ld r9, HSTATE_KVM_VCPU(r13) - b guest_exit_cont +#ifdef CONFIG_KVM_XICS + /* Abort if we still have a pending escalation */ + lbz r5, VCPU_XIVE_ESC_ON(r9) + cmpwi r5, 0 + beq 1f + li r0, 0 + stb r0, VCPU_CEDED(r9) +1: /* Enable XIVE escalation */ + li r5, XIVE_ESB_SET_PQ_00 + mfmsr r0 + andi. r0, r0, MSR_DR /* in real mode? */ + beq 1f + ld r10, VCPU_XIVE_ESC_VADDR(r9) + cmpdi r10, 0 + beq 3f + ldx r0, r10, r5 + b 2f +1: ld r10, VCPU_XIVE_ESC_RADDR(r9) + cmpdi r10, 0 + beq 3f + ldcix r0, r10, r5 +2: sync + li r0, 1 + stb r0, VCPU_XIVE_ESC_ON(r9) +#endif /* CONFIG_KVM_XICS */ +3: b guest_exit_cont /* Try to handle a machine check in real mode */ machine_check_realmode: diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 901e6fe00c39..c18e845019ec 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -96,7 +96,7 @@ kvm_start_entry: kvm_start_lightweight: /* Copy registers into shadow vcpu so we can access them in real mode */ - GET_SHADOW_VCPU(r3) + mr r3, r4 bl FUNC(kvmppc_copy_to_svcpu) nop REST_GPR(4, r1) @@ -165,9 +165,7 @@ after_sprg3_load: stw r12, VCPU_TRAP(r3) /* Transfer reg values from shadow vcpu back to vcpu struct */ - /* On 64-bit, interrupts are still off at this point */ - GET_SHADOW_VCPU(r4) bl FUNC(kvmppc_copy_from_svcpu) nop diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7deaeeb14b93..3ae752314b34 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -121,7 +121,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_BOOK3S_64 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); if (svcpu->in_use) { - kvmppc_copy_from_svcpu(vcpu, svcpu); + kvmppc_copy_from_svcpu(vcpu); } memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; @@ -143,9 +143,10 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) } /* Copy data needed by real-mode code from vcpu to shadow vcpu */ -void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, - struct kvm_vcpu *vcpu) +void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) { + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + svcpu->gpr[0] = vcpu->arch.gpr[0]; svcpu->gpr[1] = vcpu->arch.gpr[1]; svcpu->gpr[2] = vcpu->arch.gpr[2]; @@ -177,17 +178,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, if (cpu_has_feature(CPU_FTR_ARCH_207S)) vcpu->arch.entry_ic = mfspr(SPRN_IC); svcpu->in_use = true; + + svcpu_put(svcpu); } /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ -void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, - struct kvmppc_book3s_shadow_vcpu *svcpu) +void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) { - /* - * vcpu_put would just call us again because in_use hasn't - * been updated yet. - */ - preempt_disable(); + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); /* * Maybe we were already preempted and synced the svcpu from @@ -233,7 +231,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, svcpu->in_use = false; out: - preempt_enable(); + svcpu_put(svcpu); } static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6882bc94eba8..f0f5cd4d2fe7 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data) { struct kvm_vcpu *vcpu = data; - /* We use the existing H_PROD mechanism to wake up the target */ - vcpu->arch.prodded = 1; + vcpu->arch.irq_pending = 1; smp_mb(); if (vcpu->arch.ceded) kvmppc_fast_vcpu_kick(vcpu); + /* Since we have the no-EOI flag, the interrupt is effectively + * disabled now. Clearing xive_esc_on means we won't bother + * doing so on the next entry. + * + * This also allows the entry code to know that if a PQ combination + * of 10 is observed while xive_esc_on is true, it means the queue + * contains an unprocessed escalation interrupt. We don't make use of + * that knowledge today but might (see comment in book3s_hv_rmhandler.S) + */ + vcpu->arch.xive_esc_on = false; + return IRQ_HANDLED; } @@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) return -EIO; } - /* - * Future improvement: start with them disabled - * and handle DD2 and later scheme of merged escalation - * interrupts - */ - name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", - vcpu->kvm->arch.lpid, xc->server_num, prio); + if (xc->xive->single_escalation) + name = kasprintf(GFP_KERNEL, "kvm-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num); + else + name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num, prio); if (!name) { pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", prio, xc->server_num); rc = -ENOMEM; goto error; } + + pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio); + rc = request_irq(xc->esc_virq[prio], xive_esc_irq, IRQF_NO_THREAD, name, vcpu); if (rc) { @@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) goto error; } xc->esc_virq_names[prio] = name; + + /* In single escalation mode, we grab the ESB MMIO of the + * interrupt and mask it. Also populate the VCPU v/raddr + * of the ESB page for use by asm entry/exit code. Finally + * set the XIVE_IRQ_NO_EOI flag which will prevent the + * core code from performing an EOI on the escalation + * interrupt, thus leaving it effectively masked after + * it fires once. + */ + if (xc->xive->single_escalation) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + + xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); + vcpu->arch.xive_esc_raddr = xd->eoi_page; + vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; + xd->flags |= XIVE_IRQ_NO_EOI; + } + return 0; error: irq_dispose_mapping(xc->esc_virq[prio]); @@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) pr_devel("Provisioning prio... %d\n", prio); - /* Provision each VCPU and enable escalations */ + /* Provision each VCPU and enable escalations if needed */ kvm_for_each_vcpu(i, vcpu, kvm) { if (!vcpu->arch.xive_vcpu) continue; rc = xive_provision_queue(vcpu, prio); - if (rc == 0) + if (rc == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, prio); if (rc) return rc; @@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, /* Allocate IPI */ xc->vp_ipi = xive_native_alloc_irq(); if (!xc->vp_ipi) { + pr_err("Failed to allocate xive irq for VCPU IPI\n"); r = -EIO; goto bail; } @@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, goto bail; /* + * Enable the VP first as the single escalation mode will + * affect escalation interrupts numbering + */ + r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + if (r) { + pr_err("Failed to enable VP in OPAL, err %d\n", r); + goto bail; + } + + /* * Initialize queues. Initially we set them all for no queueing * and we enable escalation for queue 0 only which we'll use for * our mfrr change notifications. If the VCPU is hot-plugged, we - * do handle provisioning however. + * do handle provisioning however based on the existing "map" + * of enabled queues. */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { struct xive_q *q = &xc->queues[i]; + /* Single escalation, no queue 7 */ + if (i == 7 && xive->single_escalation) + break; + /* Is queue already enabled ? Provision it */ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); - if (r == 0) + if (r == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, i); if (r) goto bail; @@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; - /* Enable the VP */ - r = xive_native_enable_vp(xc->vp_id); - if (r) - goto bail; - /* Route the IPI */ r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); if (!r) @@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", val, server, guest_prio); + /* * If the source doesn't already have an IPI, allocate * one and get the corresponding data @@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (xive->vp_base == XIVE_INVALID_VP) ret = -ENOMEM; + xive->single_escalation = xive_native_has_single_escalation(); + if (ret) { kfree(xive); return ret; @@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private) kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int i; if (!xc) continue; @@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private) xc->server_num, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + u32 i0, i1, idx; + + if (!q->qpage && !xc->esc_virq[i]) + continue; + + seq_printf(m, " [q%d]: ", i); + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); + } + if (xc->esc_virq[i]) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', + xc->esc_virq[i], pq, xd->eoi_page); + seq_printf(m, "\n"); + } + } t_rm_h_xirr += xc->stat_rm_h_xirr; t_rm_h_ipoll += xc->stat_rm_h_ipoll; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 6ba63f8e8a61..a08ae6fd4c51 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -120,6 +120,8 @@ struct kvmppc_xive { u32 q_order; u32 q_page_order; + /* Flags */ + u8 single_escalation; }; #define KVMPPC_XIVE_Q_COUNT 8 @@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp * is as follow. * * Guest request for 0...6 are honored. Guest request for anything - * higher results in a priority of 7 being applied. - * - * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb - * in order to match AIX expectations + * higher results in a priority of 6 being applied. * * Similar mapping is done for CPPR values */ static inline u8 xive_prio_from_guest(u8 prio) { - if (prio == 0xff || prio < 8) + if (prio == 0xff || prio < 6) return prio; - return 7; + return 6; } static inline u8 xive_prio_to_guest(u8 prio) { - if (prio == 0xff || prio < 7) - return prio; - return 0xb; + return prio; } static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 83b485810aea..6038e2e7aee0 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1431,6 +1431,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + regs->pc = vcpu->arch.pc; regs->cr = kvmppc_get_cr(vcpu); regs->ctr = vcpu->arch.ctr; @@ -1452,6 +1454,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + vcpu_put(vcpu); return 0; } @@ -1459,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + vcpu->arch.pc = regs->pc; kvmppc_set_cr(vcpu, regs->cr); vcpu->arch.ctr = regs->ctr; @@ -1480,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + vcpu_put(vcpu); return 0; } @@ -1607,30 +1613,42 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + int ret; + + vcpu_load(vcpu); + sregs->pvr = vcpu->arch.pvr; get_sregs_base(vcpu, sregs); get_sregs_arch206(vcpu, sregs); - return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); + + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - int ret; + int ret = -EINVAL; + vcpu_load(vcpu); if (vcpu->arch.pvr != sregs->pvr) - return -EINVAL; + goto out; ret = set_sregs_base(vcpu, sregs); if (ret < 0) - return ret; + goto out; ret = set_sregs_arch206(vcpu, sregs); if (ret < 0) - return ret; + goto out; - return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); + ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); + +out: + vcpu_put(vcpu); + return ret; } int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, @@ -1773,7 +1791,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, { int r; + vcpu_load(vcpu); r = kvmppc_core_vcpu_translate(vcpu, tr); + vcpu_put(vcpu); return r; } @@ -1996,12 +2016,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { struct debug_reg *dbg_reg; int n, b = 0, w = 0; + int ret = 0; + + vcpu_load(vcpu); if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { vcpu->arch.dbg_reg.dbcr0 = 0; vcpu->guest_debug = 0; kvm_guest_protect_msr(vcpu, MSR_DE, false); - return 0; + goto out; } kvm_guest_protect_msr(vcpu, MSR_DE, true); @@ -2033,8 +2056,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, #endif if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - return 0; + goto out; + ret = -EINVAL; for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { uint64_t addr = dbg->arch.bp[n].addr; uint32_t type = dbg->arch.bp[n].type; @@ -2045,21 +2069,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (type & ~(KVMPPC_DEBUG_WATCH_READ | KVMPPC_DEBUG_WATCH_WRITE | KVMPPC_DEBUG_BREAKPOINT)) - return -EINVAL; + goto out; if (type & KVMPPC_DEBUG_BREAKPOINT) { /* Setting H/W breakpoint */ if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++)) - return -EINVAL; + goto out; } else { /* Setting H/W watchpoint */ if (kvmppc_booke_add_watchpoint(dbg_reg, addr, type, w++)) - return -EINVAL; + goto out; } } - return 0; + ret = 0; +out: + vcpu_put(vcpu); + return ret; } void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index af833531af31..a382e15135e6 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -58,6 +58,18 @@ static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_ALTIVEC +static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu) +{ + if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) { + kvmppc_core_queue_vec_unavail(vcpu); + return true; + } + + return false; +} +#endif /* CONFIG_ALTIVEC */ + /* * XXX to do: * lfiwax, lfiwzx @@ -98,6 +110,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE; vcpu->arch.mmio_sp64_extend = 0; vcpu->arch.mmio_sign_extend = 0; + vcpu->arch.mmio_vmx_copy_nums = 0; switch (get_op(inst)) { case 31: @@ -459,6 +472,29 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) rs, 4, 1); break; #endif /* CONFIG_VSX */ + +#ifdef CONFIG_ALTIVEC + case OP_31_XOP_LVX: + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.vaddr_accessed &= ~0xFULL; + vcpu->arch.paddr_accessed &= ~0xFULL; + vcpu->arch.mmio_vmx_copy_nums = 2; + emulated = kvmppc_handle_load128_by2x64(run, vcpu, + KVM_MMIO_REG_VMX|rt, 1); + break; + + case OP_31_XOP_STVX: + if (kvmppc_check_altivec_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.vaddr_accessed &= ~0xFULL; + vcpu->arch.paddr_accessed &= ~0xFULL; + vcpu->arch.mmio_vmx_copy_nums = 2; + emulated = kvmppc_handle_store128_by2x64(run, vcpu, + rs, 1); + break; +#endif /* CONFIG_ALTIVEC */ + default: emulated = EMULATE_FAIL; break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0a7c88786ec0..403e642c78f5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -638,8 +638,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_SPAPR_RESIZE_HPT: - /* Disable this on POWER9 until code handles new HPTE format */ - r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); + r = !!hv_enabled; break; #endif #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -763,7 +762,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; - vcpu->arch.dec_expires = ~(u64)0; + vcpu->arch.dec_expires = get_tb(); #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); @@ -930,6 +929,34 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, } #endif /* CONFIG_VSX */ +#ifdef CONFIG_ALTIVEC +static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu, + u64 gpr) +{ + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + u32 hi, lo; + u32 di; + +#ifdef __BIG_ENDIAN + hi = gpr >> 32; + lo = gpr & 0xffffffff; +#else + lo = gpr >> 32; + hi = gpr & 0xffffffff; +#endif + + di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */ + if (di > 1) + return; + + if (vcpu->arch.mmio_host_swabbed) + di = 1 - di; + + VCPU_VSX_VR(vcpu, index).u[di * 2] = hi; + VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo; +} +#endif /* CONFIG_ALTIVEC */ + #ifdef CONFIG_PPC_FPU static inline u64 sp_to_dp(u32 fprs) { @@ -1033,6 +1060,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_vsr_dword_dump(vcpu, gpr); break; #endif +#ifdef CONFIG_ALTIVEC + case KVM_MMIO_REG_VMX: + kvmppc_set_vmx_dword(vcpu, gpr); + break; +#endif default: BUG(); } @@ -1106,11 +1138,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ - if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || - (vcpu->arch.mmio_vsx_copy_nums < 0) ) { + /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */ + if (vcpu->arch.mmio_vsx_copy_nums > 4) return EMULATE_FAIL; - } while (vcpu->arch.mmio_vsx_copy_nums) { emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, @@ -1252,11 +1282,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.io_gpr = rs; - /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ - if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || - (vcpu->arch.mmio_vsx_copy_nums < 0) ) { + /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */ + if (vcpu->arch.mmio_vsx_copy_nums > 4) return EMULATE_FAIL; - } while (vcpu->arch.mmio_vsx_copy_nums) { if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) @@ -1312,6 +1340,111 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, } #endif /* CONFIG_VSX */ +#ifdef CONFIG_ALTIVEC +/* handle quadword load access in two halves */ +int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, int is_default_endian) +{ + enum emulation_result emulated; + + while (vcpu->arch.mmio_vmx_copy_nums) { + emulated = __kvmppc_handle_load(run, vcpu, rt, 8, + is_default_endian, 0); + + if (emulated != EMULATE_DONE) + break; + + vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.mmio_vmx_copy_nums--; + } + + return emulated; +} + +static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val) +{ + vector128 vrs = VCPU_VSX_VR(vcpu, rs); + u32 di; + u64 w0, w1; + + di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */ + if (di > 1) + return -1; + + if (vcpu->arch.mmio_host_swabbed) + di = 1 - di; + + w0 = vrs.u[di * 2]; + w1 = vrs.u[di * 2 + 1]; + +#ifdef __BIG_ENDIAN + *val = (w0 << 32) | w1; +#else + *val = (w1 << 32) | w0; +#endif + return 0; +} + +/* handle quadword store in two halves */ +int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rs, int is_default_endian) +{ + u64 val = 0; + enum emulation_result emulated = EMULATE_DONE; + + vcpu->arch.io_gpr = rs; + + while (vcpu->arch.mmio_vmx_copy_nums) { + if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1) + return EMULATE_FAIL; + + emulated = kvmppc_handle_store(run, vcpu, val, 8, + is_default_endian); + if (emulated != EMULATE_DONE) + break; + + vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.mmio_vmx_copy_nums--; + } + + return emulated; +} + +static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + enum emulation_result emulated = EMULATE_FAIL; + int r; + + vcpu->arch.paddr_accessed += run->mmio.len; + + if (!vcpu->mmio_is_write) { + emulated = kvmppc_handle_load128_by2x64(run, vcpu, + vcpu->arch.io_gpr, 1); + } else { + emulated = kvmppc_handle_store128_by2x64(run, vcpu, + vcpu->arch.io_gpr, 1); + } + + switch (emulated) { + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST; + break; + case EMULATE_FAIL: + pr_info("KVM: MMIO emulation failed (VMX repeat)\n"); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + r = RESUME_HOST; + break; + default: + r = RESUME_GUEST; + break; + } + return r; +} +#endif /* CONFIG_ALTIVEC */ + int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { int r = 0; @@ -1413,6 +1546,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; + vcpu_load(vcpu); + if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) @@ -1427,7 +1562,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run); if (r == RESUME_HOST) { vcpu->mmio_needed = 1; - return r; + goto out; + } + } +#endif +#ifdef CONFIG_ALTIVEC + if (vcpu->arch.mmio_vmx_copy_nums > 0) + vcpu->arch.mmio_vmx_copy_nums--; + + if (vcpu->arch.mmio_vmx_copy_nums > 0) { + r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run); + if (r == RESUME_HOST) { + vcpu->mmio_needed = 1; + goto out; } } #endif @@ -1461,6 +1608,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +out: + vcpu_put(vcpu); return r; } @@ -1608,23 +1757,31 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; } -long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; - long r; - switch (ioctl) { - case KVM_INTERRUPT: { + if (ioctl == KVM_INTERRUPT) { struct kvm_interrupt irq; - r = -EFAULT; if (copy_from_user(&irq, argp, sizeof(irq))) - goto out; - r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); - goto out; + return -EFAULT; + return kvm_vcpu_ioctl_interrupt(vcpu, &irq); } + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + vcpu_load(vcpu); + switch (ioctl) { case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; @@ -1664,6 +1821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: + vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index e44d2b2ea97e..1c03c978eb18 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) int i; u64 min, max, sum, sum_quad; - seq_printf(m, "%s", "type count min max sum sum_squared\n"); - + seq_puts(m, "type count min max sum sum_squared\n"); for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 1604110c4238..916844f99c64 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, u32 i, n_lmbs; n_lmbs = of_read_number(prop++, 1); + if (n_lmbs == 0) + return; for (i = 0; i < n_lmbs; i++) { read_drconf_v1_cell(&lmb, &prop); @@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, u32 i, j, lmb_sets; lmb_sets = of_read_number(prop++, 1); + if (lmb_sets == 0) + return; for (i = 0; i < lmb_sets; i++) { read_drconf_v2_cell(&dr_cell, &prop); @@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) struct drmem_lmb *lmb; drmem_info->n_lmbs = of_read_number(prop++, 1); + if (drmem_info->n_lmbs == 0) + return; drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), GFP_KERNEL); @@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) int lmb_index; lmb_sets = of_read_number(prop++, 1); + if (lmb_sets == 0) + return; /* first pass, calculate the number of LMBs */ p = prop; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 5a69b51d08a3..d573d7d07f25 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * need to add in 0x1 if it's a read-only user page */ rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -117,7 +117,7 @@ repeat: return -1; } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 2253bbc6a599..e601d95c3b20 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; vpn = hpt_vpn(ea, vsid, ssize); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); /* *None of the sub 4k page is hashed */ @@ -214,7 +214,7 @@ repeat: return -1; } - new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); + new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; *ptep = __pte(new_pte & ~H_PAGE_BUSY); @@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -327,7 +327,7 @@ repeat: } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d07c7e17db6..cf290d415dcd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void) __pmd_index_size = H_PMD_INDEX_SIZE; __pud_index_size = H_PUD_INDEX_SIZE; __pgd_index_size = H_PGD_INDEX_SIZE; + __pud_cache_index = H_PUD_CACHE_INDEX; __pmd_cache_index = H_PMD_CACHE_INDEX; __pte_table_size = H_PTE_TABLE_SIZE; __pmd_table_size = H_PMD_TABLE_SIZE; diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 12511f5a015f..b320f5097a06 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long vpn; unsigned long old_pte, new_pte; unsigned long rflags, pa, sz; - long slot; + long slot, offset; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); @@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + if (unlikely(mmu_psize == MMU_PAGE_16G)) + offset = PTRS_PER_PUD; + else + offset = PTRS_PER_PMD; + rpte = __real_pte(__pte(old_pte), ptep, offset); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset); } /* diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index eb8c6c8c4851..2b656e67f2ea 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -100,6 +100,6 @@ void pgtable_cache_init(void) * same size as either the pgd or pmd index except with THP enabled * on book3s 64 */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) + pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 314d19ab9385..edd8d0bc9364 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void) numa_cpu_lookup_table[cpu] = -1; } -static void update_numa_cpu_lookup_table(unsigned int cpu, int node) -{ - numa_cpu_lookup_table[cpu] = node; -} - static void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 573a9a2ee455..2e10a964e290 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,9 +17,11 @@ #include <linux/of_fdt.h> #include <linux/mm.h> #include <linux/string_helpers.h> +#include <linux/stop_machine.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/mmu_context.h> #include <asm/dma.h> #include <asm/machdep.h> #include <asm/mmu.h> @@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void) "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); + + /* + * The init_mm context is given the first available (non-zero) PID, + * which is the "guard PID" and contains no page table. PIDR should + * never be set to zero because that duplicates the kernel address + * space at the 0x0... offset (quadrant 0)! + * + * An arbitrary PID that may later be allocated by the PID allocator + * for userspace processes must not be used either, because that + * would cause stale user mappings for that PID on CPUs outside of + * the TLB invalidation scheme (because it won't be in mm_cpumask). + * + * So permanently carve out one PID for the purpose of a guard PID. + */ + init_mm.context.id = mmu_base_pid; + mmu_base_pid++; } static void __init radix_init_partition_table(void) @@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void) __pmd_index_size = RADIX_PMD_INDEX_SIZE; __pud_index_size = RADIX_PUD_INDEX_SIZE; __pgd_index_size = RADIX_PGD_INDEX_SIZE; + __pud_cache_index = RADIX_PUD_INDEX_SIZE; __pmd_cache_index = RADIX_PMD_INDEX_SIZE; __pte_table_size = RADIX_PTE_TABLE_SIZE; __pmd_table_size = RADIX_PMD_TABLE_SIZE; @@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void) radix_init_iamr(); radix_init_pgtable(); - + /* Switch to the guard PID before turning on MMU */ + radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); } @@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void) } radix_init_iamr(); + radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); } @@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } +struct change_mapping_params { + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long aligned_start; + unsigned long aligned_end; +}; + +static int stop_machine_change_mapping(void *data) +{ + struct change_mapping_params *params = + (struct change_mapping_params *)data; + + if (!data) + return -1; + + spin_unlock(&init_mm.page_table_lock); + pte_clear(&init_mm, params->aligned_start, params->pte); + create_physical_mapping(params->aligned_start, params->start); + create_physical_mapping(params->end, params->aligned_end); + spin_lock(&init_mm.page_table_lock); + return 0; +} + static void remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end) { @@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } +/* + * clear the pte and potentially split the mapping helper + */ +static void split_kernel_mapping(unsigned long addr, unsigned long end, + unsigned long size, pte_t *pte) +{ + unsigned long mask = ~(size - 1); + unsigned long aligned_start = addr & mask; + unsigned long aligned_end = addr + size; + struct change_mapping_params params; + bool split_region = false; + + if ((end - addr) < size) { + /* + * We're going to clear the PTE, but not flushed + * the mapping, time to remap and flush. The + * effects if visible outside the processor or + * if we are running in code close to the + * mapping we cleared, we are in trouble. + */ + if (overlaps_kernel_text(aligned_start, addr) || + overlaps_kernel_text(end, aligned_end)) { + /* + * Hack, just return, don't pte_clear + */ + WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " + "text, not splitting\n", addr, end); + return; + } + split_region = true; + } + + if (split_region) { + params.pte = pte; + params.start = addr; + params.end = end; + params.aligned_start = addr & ~(size - 1); + params.aligned_end = min_t(unsigned long, aligned_end, + (unsigned long)__va(memblock_end_of_DRAM())); + stop_machine(stop_machine_change_mapping, ¶ms, NULL); + return; + } + + pte_clear(&init_mm, addr, pte); +} + static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { @@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_huge(*pmd)) { - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pmd); + split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); continue; } @@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_huge(*pud)) { - if (!IS_ALIGNED(addr, PUD_SIZE) || - !IS_ALIGNED(next, PUD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pud); + split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); continue; } @@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end) continue; if (pgd_huge(*pgd)) { - if (!IS_ALIGNED(addr, PGDIR_SIZE) || - !IS_ALIGNED(next, PGDIR_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pgd); + split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); continue; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c9a623c2d8a2..28c980eb4422 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -82,6 +82,8 @@ unsigned long __pgd_index_size; EXPORT_SYMBOL(__pgd_index_size); unsigned long __pmd_cache_index; EXPORT_SYMBOL(__pmd_cache_index); +unsigned long __pud_cache_index; +EXPORT_SYMBOL(__pud_cache_index); unsigned long __pte_table_size; EXPORT_SYMBOL(__pte_table_size); unsigned long __pmd_table_size; @@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, if (old & PATB_HR) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); } else { asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 881ebd53ffc2..9b23f12e863c 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, unsigned int psize; int ssize; real_pte_t rpte; - int i; + int i, offset; i = batch->index; @@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, psize = get_slice_psize(mm, addr); /* Mask the address for the correct page size */ addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); + if (unlikely(psize == MMU_PAGE_16G)) + offset = PTRS_PER_PUD; + else + offset = PTRS_PER_PMD; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ @@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, * support 64k pages, this might be different from the * hardware page size encoded in the slice table. */ addr &= PAGE_MASK; + offset = PTRS_PER_PTE; } @@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, } WARN_ON(vsid == 0); vpn = hpt_vpn(addr, vsid, ssize); - rpte = __real_pte(__pte(pte), ptep); + rpte = __real_pte(__pte(pte), ptep, offset); /* * Check if we have an active batch on this CPU. If not, just diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 1a9a756b0b2f..857580a78bbd 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -101,9 +101,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx, but first mark any pending interrupts as done so we don't get woken up unnecessarily */ - if (events & (POLLIN | POLLRDNORM)) { + if (events & (EPOLLIN | EPOLLRDNORM)) { if (stat & 0xff0000) - ret |= POLLIN | POLLRDNORM; + ret |= EPOLLIN | EPOLLRDNORM; else { ctx->csa.priv1.int_stat_class2_RW &= ~CLASS2_MAILBOX_INTR; @@ -111,9 +111,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx, CLASS2_ENABLE_MAILBOX_INTR; } } - if (events & (POLLOUT | POLLWRNORM)) { + if (events & (EPOLLOUT | EPOLLWRNORM)) { if (stat & 0x00ff00) - ret = POLLOUT | POLLWRNORM; + ret = EPOLLOUT | EPOLLWRNORM; else { ctx->csa.priv1.int_stat_class2_RW &= ~CLASS2_MAILBOX_THRESHOLD_INTR; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c1be486da899..469bdd0b748f 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -774,7 +774,7 @@ static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait) * that poll should not sleep. Will be fixed later. */ mutex_lock(&ctx->state_mutex); - mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM); + mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM); spu_release(ctx); return mask; @@ -910,7 +910,7 @@ static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait) * that poll should not sleep. Will be fixed later. */ mutex_lock(&ctx->state_mutex); - mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM); + mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM); spu_release(ctx); return mask; @@ -1710,9 +1710,9 @@ static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait) mask = 0; if (free_elements & 0xffff) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; if (tagstatus & ctx->tagwait) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__, free_elements, tagstatus, ctx->tagwait); @@ -2469,7 +2469,7 @@ static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait) return rc; if (spufs_switch_log_used(ctx) > 0) - mask |= POLLIN; + mask |= EPOLLIN; spu_release(ctx); diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c index fff58198b5b6..ae9d24d31eed 100644 --- a/arch/powerpc/platforms/cell/spufs/hw_ops.c +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -70,17 +70,17 @@ static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events) but first mark any pending interrupts as done so we don't get woken up unnecessarily */ - if (events & (POLLIN | POLLRDNORM)) { + if (events & (EPOLLIN | EPOLLRDNORM)) { if (stat & 0xff0000) - ret |= POLLIN | POLLRDNORM; + ret |= EPOLLIN | EPOLLRDNORM; else { spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR); spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); } } - if (events & (POLLOUT | POLLWRNORM)) { + if (events & (EPOLLOUT | EPOLLWRNORM)) { if (stat & 0x00ff00) - ret = POLLOUT | POLLWRNORM; + ret = EPOLLOUT | EPOLLWRNORM; else { spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_THRESHOLD_INTR); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index dd4c9b8b8a81..f6f55ab4980e 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void) const struct cpumask *l_cpumask; get_online_cpus(); - for_each_online_node(nid) { + for_each_node_with_cpus(nid) { l_cpumask = cpumask_of_node(nid); - cpu = cpumask_first(l_cpumask); + cpu = cpumask_first_and(l_cpumask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + continue; opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(cpu)); } diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index c18de0a9b1bd..4070bb4e9da4 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -153,7 +153,7 @@ static __poll_t opal_prd_poll(struct file *file, poll_wait(file, &opal_prd_msg_wait, wait); if (!opal_msg_queue_empty()) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; return 0; } diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 2b3eb01ab110..b7c53a51c31b 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = PTR_ERR(txwin->paste_kaddr); goto free_window; } + } else { + /* + * A user mapping must ensure that context switch issues + * CP_ABORT for this thread. + */ + rc = set_thread_uses_vas(); + if (rc) + goto free_window; } - /* - * Now that we have a send window, ensure context switch issues - * CP_ABORT for this thread. - */ - rc = -EINVAL; - if (set_thread_uses_vas() < 0) - goto free_window; - set_vinst_win(vinst, txwin); return txwin; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index dceb51454d8d..652d3e96b812 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -36,6 +36,7 @@ #include <asm/xics.h> #include <asm/xive.h> #include <asm/plpar_wrappers.h> +#include <asm/topology.h> #include "pseries.h" #include "offline_states.h" @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np) BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); set_hard_smp_processor_id(cpu, -1); + update_numa_cpu_lookup_table(cpu, -1); break; } if (cpu >= nr_cpu_ids) @@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } -extern int find_and_online_cpu_nid(int cpu); - static int dlpar_online_cpu(struct device_node *dn) { int rc = 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 81d8614e7379..5e1ef9150182 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id); /* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +int __init init_ras_hotplug_IRQ(void) +{ + struct device_node *np; + + /* Hotplug Events */ + np = of_find_node_by_path("/event-sources/hot-plug-events"); + if (np != NULL) { + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, + "RAS_HOTPLUG"); + of_node_put(np); + } + + return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); + +/* * Initialize handlers for the set of interrupts caused by hardware errors * and power system events. */ @@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void) of_node_put(np); } - /* Hotplug Events */ - np = of_find_node_by_path("/event-sources/hot-plug-events"); - if (np != NULL) { - if (dlpar_workqueue_init() == 0) - request_event_sources_irqs(np, ras_hotplug_interrupt, - "RAS_HOTPLUG"); - of_node_put(np); - } - /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index ebc244b08d67..d22aeb0b69e1 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -42,6 +42,7 @@ static u32 xive_provision_chip_count; static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; +static bool xive_has_single_esc; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -571,6 +572,10 @@ bool __init xive_native_init(void) break; } + /* Do we support single escalation */ + if (of_get_property(np, "single-escalation-support", NULL) != NULL) + xive_has_single_esc = true; + /* Configure Thread Management areas for KVM */ for_each_possible_cpu(cpu) kvmppc_set_xive_tima(cpu, r.start, tima); @@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base) } EXPORT_SYMBOL_GPL(xive_native_free_vp_block); -int xive_native_enable_vp(u32 vp_id) +int xive_native_enable_vp(u32 vp_id, bool single_escalation) { s64 rc; + u64 flags = OPAL_XIVE_VP_ENABLED; + if (single_escalation) + flags |= OPAL_XIVE_VP_SINGLE_ESCALATION; for (;;) { - rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0); + rc = opal_xive_set_vp_info(vp_id, flags, 0); if (rc != OPAL_BUSY) break; msleep(1); @@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) return 0; } EXPORT_SYMBOL_GPL(xive_native_get_vp_info); + +bool xive_native_has_single_escalation(void) +{ + return xive_has_single_esc; +} +EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d9c4c9366049..091f1d0d0af1 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + pr_err("Error %lld getting queue info CPU %d prio %d\n", rc, + target, prio); rc = -EIO; goto fail; } @@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, /* Configure and enable the queue in HW */ rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order); if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + pr_err("Error %lld setting queue for CPU %d prio %d\n", rc, + target, prio); rc = -EIO; } else { q->qpage = qpage; @@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc, if (IS_ERR(qpage)) return PTR_ERR(qpage); - return xive_spapr_configure_queue(cpu, q, prio, qpage, - xive_queue_shift); + return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu), + q, prio, qpage, xive_queue_shift); } static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, @@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, struct xive_q *q = &xc->queue[prio]; unsigned int alloc_order; long rc; + int hw_cpu = get_hard_smp_processor_id(cpu); - rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); + rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0); if (rc) - pr_err("Error %ld setting queue for prio %d\n", rc, prio); + pr_err("Error %ld setting queue for CPU %d prio %d\n", rc, + hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); free_pages((unsigned long)q->qpage, alloc_order); |