diff options
Diffstat (limited to 'arch/powerpc')
38 files changed, 687 insertions, 589 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 940c955d1c9c..076fe3094856 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -13,12 +13,6 @@ config 64BIT bool default y if PPC64 -config ARCH_PHYS_ADDR_T_64BIT - def_bool PPC64 || PHYS_64BIT - -config ARCH_DMA_ADDR_T_64BIT - def_bool ARCH_PHYS_ADDR_T_64BIT - config MMU bool default y @@ -188,7 +182,6 @@ config PPC select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_EBPF_JIT if PPC64 @@ -206,6 +199,7 @@ config PPC select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP @@ -225,9 +219,11 @@ config PPC select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING + select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA + select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF select OF_EARLY_FLATTREE @@ -481,19 +477,6 @@ config MPROFILE_KERNEL depends on PPC64 && CPU_LITTLE_ENDIAN def_bool !DISABLE_MPROFILE_KERNEL -config IOMMU_HELPER - def_bool PPC64 - -config SWIOTLB - bool "SWIOTLB support" - default n - select IOMMU_HELPER - ---help--- - Support for IO bounce buffering for systems without an IOMMU. - This allows us to DMA to the full physical address space on - platforms where the size of a physical address is larger - than the bus address. Not all platforms support this. - config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ @@ -886,7 +869,7 @@ config PPC_MEM_KEYS page-based protections, but without requiring modification of the page tables when an application changes protection domains. - For details, see Documentation/vm/protection-keys.txt + For details, see Documentation/vm/protection-keys.rst If unsure, say y. @@ -916,9 +899,6 @@ config ZONE_DMA config NEED_DMA_MAP_STATE def_bool (PPC64 || NOT_COHERENT_CACHE) -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool depends on ISA_DMA_API diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 62168e1158f1..85c8af2bb272 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -17,7 +17,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u32 __compat_uid_t; @@ -45,16 +44,6 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; compat_ino_t st_ino; @@ -173,10 +162,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - unsigned int __unused1; - compat_time_t sem_otime; - unsigned int __unused2; - compat_time_t sem_ctime; + unsigned int sem_otime_high; + unsigned int sem_otime; + unsigned int sem_ctime_high; + unsigned int sem_ctime; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -184,12 +173,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - unsigned int __unused1; - compat_time_t msg_stime; - unsigned int __unused2; - compat_time_t msg_rtime; - unsigned int __unused3; - compat_time_t msg_ctime; + unsigned int msg_stime_high; + unsigned int msg_stime; + unsigned int msg_rtime_high; + unsigned int msg_rtime; + unsigned int msg_ctime_high; + unsigned int msg_ctime; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -201,12 +190,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; - unsigned int __unused1; - compat_time_t shm_atime; - unsigned int __unused2; - compat_time_t shm_dtime; - unsigned int __unused3; - compat_time_t shm_ctime; + unsigned int shm_atime_high; + unsigned int shm_atime; + unsigned int shm_dtime_high; + unsigned int shm_dtime; + unsigned int shm_ctime_high; + unsigned int shm_ctime; unsigned int __unused4; compat_size_t shm_segsz; compat_pid_t shm_cpid; diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 471b2274fbeb..c40b4380951c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,27 @@ */ #define EX_R3 EX_DAR +#define STF_ENTRY_BARRIER_SLOT \ + STF_ENTRY_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define STF_EXIT_BARRIER_SLOT \ + STF_EXIT_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop + +/* + * r10 must be free to use, r13 must be paca + */ +#define INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT + /* * Macros for annotating the expected destination of (h)rfid * @@ -90,16 +111,19 @@ rfid #define RFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback @@ -108,21 +132,25 @@ hrfid #define HRFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback @@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ + INTERRUPT_TO_KERNEL; \ SAVE_CTR(r10, area); \ mfcr r9; diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 86ac59e75f36..fcfd05672b1b 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,6 +187,22 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define STF_ENTRY_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __stf_entry_barrier_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + +#define STF_EXIT_BARRIER_FIXUP_SECTION \ +955: \ + .pushsection __stf_exit_barrier_fixup,"a"; \ + .align 2; \ +956: \ + FTR_ENTRY_OFFSET 955b-956b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -207,6 +223,9 @@ label##3: \ #ifndef __ASSEMBLY__ #include <linux/types.h> +extern long stf_barrier_fallback; +extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; +extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 20b01897ea5d..f1e9067bd5ac 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -26,15 +26,8 @@ typedef struct { DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT - -#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING #define __ARCH_IRQ_EXIT_IRQS_DISABLED -#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) - static inline void ack_bad_irq(unsigned int irq) { printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 2e2dddab5d65..662c8347d699 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -279,6 +279,7 @@ #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C #define H_CLEAR_HPT 0x358 +#define H_REQUEST_VMC 0x360 #define H_RESIZE_HPT_PREPARE 0x36C #define H_RESIZE_HPT_COMMIT 0x370 #define H_REGISTER_PROC_TBL 0x37C diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4c02a7378d06..e7377b73cfec 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -96,6 +96,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runner; struct kvm *kvm; u64 tb_offset; /* guest timebase - host timebase */ + u64 tb_offset_applied; /* timebase offset currently in force */ ulong lpcr; u32 arch_compat; ulong pcr; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 401c62aad5e4..2af9ded80540 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -92,24 +92,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define HAVE_PCI_LEGACY 1 -#ifdef CONFIG_PPC64 - -/* The PCI address space does not equal the physical memory address - * space (we have an IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - -#else /* 32-bit */ - -/* The PCI address space does equal the physical memory - * address space (no IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - -#endif /* CONFIG_PPC64 */ - extern void pcibios_claim_one_bus(struct pci_bus *b); extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fa4d2e1cf772..44989b22383c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -12,6 +12,17 @@ extern unsigned long powerpc_security_features; extern bool rfi_flush; +/* These are bit flags */ +enum stf_barrier_type { + STF_BARRIER_NONE = 0x1, + STF_BARRIER_FALLBACK = 0x2, + STF_BARRIER_EIEIO = 0x4, + STF_BARRIER_SYNC_ORI = 0x8, +}; + +void setup_stf_barrier(void); +void do_stf_barrier_fixups(enum stf_barrier_type types); + static inline void security_ftr_set(unsigned long feature) { powerpc_security_features |= feature; diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h index 65beb0942500..2b1b37797a47 100644 --- a/arch/powerpc/include/uapi/asm/msgbuf.h +++ b/arch/powerpc/include/uapi/asm/msgbuf.h @@ -10,18 +10,18 @@ struct msqid64_ds { struct ipc64_perm msg_perm; -#ifndef __powerpc64__ - unsigned int __unused1; -#endif +#ifdef __powerpc64__ __kernel_time_t msg_stime; /* last msgsnd time */ -#ifndef __powerpc64__ - unsigned int __unused2; -#endif __kernel_time_t msg_rtime; /* last msgrcv time */ -#ifndef __powerpc64__ - unsigned int __unused3; -#endif __kernel_time_t msg_ctime; /* last change time */ +#else + unsigned long msg_stime_high; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_rtime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_ctime_high; + unsigned long msg_ctime; /* last change time */ +#endif unsigned long msg_cbytes; /* current number of bytes on queue */ unsigned long msg_qnum; /* number of messages in queue */ unsigned long msg_qbytes; /* max number of bytes on queue */ diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h index 8f393d60f02d..3f60946f77e3 100644 --- a/arch/powerpc/include/uapi/asm/sembuf.h +++ b/arch/powerpc/include/uapi/asm/sembuf.h @@ -15,20 +15,20 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values + * - 2 miscellaneous 32/64-bit values */ struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ #ifndef __powerpc64__ - unsigned long __unused1; -#endif + unsigned long sem_otime_high; + unsigned long sem_otime; /* last semop time */ + unsigned long sem_ctime_high; + unsigned long sem_ctime; /* last change time */ +#else __kernel_time_t sem_otime; /* last semop time */ -#ifndef __powerpc64__ - unsigned long __unused2; -#endif __kernel_time_t sem_ctime; /* last change time */ +#endif unsigned long sem_nsems; /* no. of semaphores in array */ unsigned long __unused3; unsigned long __unused4; diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h index deb1c3e503d3..b591c4d7e4c5 100644 --- a/arch/powerpc/include/uapi/asm/shmbuf.h +++ b/arch/powerpc/include/uapi/asm/shmbuf.h @@ -16,25 +16,22 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ -#ifndef __powerpc64__ - unsigned long __unused1; -#endif +#ifdef __powerpc64__ __kernel_time_t shm_atime; /* last attach time */ -#ifndef __powerpc64__ - unsigned long __unused2; -#endif __kernel_time_t shm_dtime; /* last detach time */ -#ifndef __powerpc64__ - unsigned long __unused3; -#endif __kernel_time_t shm_ctime; /* last change time */ -#ifndef __powerpc64__ +#else + unsigned long shm_atime_high; + unsigned long shm_atime; /* last attach time */ + unsigned long shm_dtime_high; + unsigned long shm_dtime; /* last detach time */ + unsigned long shm_ctime_high; + unsigned long shm_ctime; /* last change time */ unsigned long __unused4; #endif size_t shm_segsz; /* size of segment (bytes) */ diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h index 9f142451a01f..1d51d9b88221 100644 --- a/arch/powerpc/include/uapi/asm/siginfo.h +++ b/arch/powerpc/include/uapi/asm/siginfo.h @@ -15,19 +15,4 @@ #include <asm-generic/siginfo.h> -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -/* - * SIGTRAP si_codes - */ -#ifdef __KERNEL__ -#define TRAP_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - - #endif /* _ASM_POWERPC_SIGINFO_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 262c44a90ea1..9fc9e0977009 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compat.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -42,7 +43,6 @@ #include <asm/paca.h> #include <asm/lppaca.h> #include <asm/cache.h> -#include <asm/compat.h> #include <asm/mmu.h> #include <asm/hvcall.h> #include <asm/xics.h> @@ -563,6 +563,7 @@ int main(void) OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); OFFSET(VCORE_KVM, kvmppc_vcore, kvm); OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); + OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied); OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); OFFSET(VCORE_PCR, kvmppc_vcore, pcr); OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index e1cd6e979348..155170d70324 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -307,8 +307,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -359,7 +357,6 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 1139821a9aec..5746809cfaad 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1776,18 +1776,6 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } -static int proc_eeh_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_eeh_show, NULL); -} - -static const struct file_operations proc_eeh_operations = { - .open = proc_eeh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #ifdef CONFIG_DEBUG_FS static int eeh_enable_dbgfs_set(void *data, u64 val) { @@ -1829,7 +1817,7 @@ DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, static int __init eeh_init_proc(void) { if (machine_is(pseries) || machine_is(powernv)) { - proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); + proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); #ifdef CONFIG_DEBUG_FS debugfs_create_file("eeh_enable", 0600, powerpc_debugfs_root, NULL, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index bb26fe9e90ce..285c6465324a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtctr r13; \ GET_PACA(r13); \ std r10,PACA_EXGEN+EX_R10(r13); \ + INTERRUPT_TO_KERNEL; \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ mfctr r9; @@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) #define SYSCALL_KVMTEST \ HMT_MEDIUM; \ mr r9,r13; \ - GET_PACA(r13); + GET_PACA(r13); \ + INTERRUPT_TO_KERNEL; #endif #define LOAD_SYSCALL_HANDLER(reg) \ @@ -1508,6 +1510,19 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(stf_barrier_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + sync + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ori 31,31,0 + .rept 14 + b 1f +1: + .endr + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ebcd3956f2be..9ef4aea9fffe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -633,6 +633,7 @@ void do_break (struct pt_regs *regs, unsigned long address, hw_breakpoint_disable(); /* Deliver the signal to userspace */ + clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 5f13c8358aeb..487dcd8da4de 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -154,18 +154,6 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v); static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v); -static int sensors_open(struct inode *inode, struct file *file) -{ - return single_open(file, ppc_rtas_sensors_show, NULL); -} - -static const struct file_operations ppc_rtas_sensors_operations = { - .open = sensors_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int poweron_open(struct inode *inode, struct file *file) { return single_open(file, ppc_rtas_poweron_show, NULL); @@ -231,18 +219,6 @@ static const struct file_operations ppc_rtas_tone_volume_operations = { .release = single_release, }; -static int rmo_buf_open(struct inode *inode, struct file *file) -{ - return single_open(file, ppc_rtas_rmo_buf_show, NULL); -} - -static const struct file_operations ppc_rtas_rmo_buf_ops = { - .open = rmo_buf_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ppc_rtas_find_all_sensors(void); static void ppc_rtas_process_sensor(struct seq_file *m, struct individual_sensor *s, int state, int error, const char *loc); @@ -267,14 +243,14 @@ static int __init proc_rtas_init(void) &ppc_rtas_clock_operations); proc_create("powerpc/rtas/poweron", 0644, NULL, &ppc_rtas_poweron_operations); - proc_create("powerpc/rtas/sensors", 0444, NULL, - &ppc_rtas_sensors_operations); + proc_create_single("powerpc/rtas/sensors", 0444, NULL, + ppc_rtas_sensors_show); proc_create("powerpc/rtas/frequency", 0644, NULL, &ppc_rtas_tone_freq_operations); proc_create("powerpc/rtas/volume", 0644, NULL, &ppc_rtas_tone_volume_operations); - proc_create("powerpc/rtas/rmo_buffer", 0400, NULL, - &ppc_rtas_rmo_buf_ops); + proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL, + ppc_rtas_rmo_buf_show); return 0; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 3eb9c45f28d7..a8b277362931 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -158,3 +158,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } + +/* + * Store-forwarding barrier support. + */ + +static enum stf_barrier_type stf_enabled_flush_types; +static bool no_stf_barrier; +bool stf_barrier; + +static int __init handle_no_stf_barrier(char *p) +{ + pr_info("stf-barrier: disabled on command line."); + no_stf_barrier = true; + return 0; +} + +early_param("no_stf_barrier", handle_no_stf_barrier); + +/* This is the generic flag used by other architectures */ +static int __init handle_ssbd(char *p) +{ + if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) { + /* Until firmware tells us, we have the barrier with auto */ + return 0; + } else if (strncmp(p, "off", 3) == 0) { + handle_no_stf_barrier(NULL); + return 0; + } else + return 1; + + return 0; +} +early_param("spec_store_bypass_disable", handle_ssbd); + +/* This is the generic flag used by other architectures */ +static int __init handle_no_ssbd(char *p) +{ + handle_no_stf_barrier(NULL); + return 0; +} +early_param("nospec_store_bypass_disable", handle_no_ssbd); + +static void stf_barrier_enable(bool enable) +{ + if (enable) + do_stf_barrier_fixups(stf_enabled_flush_types); + else + do_stf_barrier_fixups(STF_BARRIER_NONE); + + stf_barrier = enable; +} + +void setup_stf_barrier(void) +{ + enum stf_barrier_type type; + bool enable, hv; + + hv = cpu_has_feature(CPU_FTR_HVMODE); + + /* Default to fallback in case fw-features are not available */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + type = STF_BARRIER_EIEIO; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + type = STF_BARRIER_SYNC_ORI; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + type = STF_BARRIER_FALLBACK; + else + type = STF_BARRIER_NONE; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + + if (type == STF_BARRIER_FALLBACK) { + pr_info("stf-barrier: fallback barrier available\n"); + } else if (type == STF_BARRIER_SYNC_ORI) { + pr_info("stf-barrier: hwsync barrier available\n"); + } else if (type == STF_BARRIER_EIEIO) { + pr_info("stf-barrier: eieio barrier available\n"); + } + + stf_enabled_flush_types = type; + + if (!no_stf_barrier) + stf_barrier_enable(enable); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) { + const char *type; + switch (stf_enabled_flush_types) { + case STF_BARRIER_EIEIO: + type = "eieio"; + break; + case STF_BARRIER_SYNC_ORI: + type = "hwsync"; + break; + case STF_BARRIER_FALLBACK: + type = "fallback"; + break; + default: + type = "unknown"; + } + return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type); + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +#ifdef CONFIG_DEBUG_FS +static int stf_barrier_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != stf_barrier) + stf_barrier_enable(enable); + + return 0; +} + +static int stf_barrier_get(void *data, u64 *val) +{ + *val = stf_barrier ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); + +static __init int stf_barrier_debugfs_init(void) +{ + debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + return 0; +} +device_initcall(stf_barrier_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0904492e7032..0e17dcb48720 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -296,7 +296,6 @@ NOKPROBE_SYMBOL(die); void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { - memset(info, 0, sizeof(*info)); info->si_signo = SIGTRAP; info->si_code = TRAP_TRACE; info->si_addr = (void __user *)regs->nip; @@ -334,7 +333,7 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code, */ thread_pkey_regs_save(¤t->thread); - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); info.si_signo = signr; info.si_code = code; info.si_addr = (void __user *) addr; @@ -970,7 +969,7 @@ void unknown_exception(struct pt_regs *regs) printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); - _exception(SIGTRAP, regs, TRAP_FIXME, 0); + _exception(SIGTRAP, regs, TRAP_UNK, 0); exception_exit(prev_state); } @@ -992,7 +991,7 @@ bail: void RunModeException(struct pt_regs *regs) { - _exception(SIGTRAP, regs, TRAP_FIXME, 0); + _exception(SIGTRAP, regs, TRAP_UNK, 0); } void single_step_exception(struct pt_regs *regs) @@ -1032,7 +1031,7 @@ static void emulate_single_step(struct pt_regs *regs) static inline int __parse_fpscr(unsigned long fpscr) { - int ret = FPE_FIXME; + int ret = FPE_FLTUNK; /* Invalid operation */ if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) @@ -1973,7 +1972,7 @@ void SPEFloatingPointException(struct pt_regs *regs) extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; int fpexc_mode; - int code = FPE_FIXME; + int code = FPE_FLTUNK; int err; flush_spe_to_thread(current); @@ -2042,7 +2041,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) printk(KERN_ERR "unrecognized spe instruction " "in %s at %lx\n", current->comm, regs->nip); } else { - _exception(SIGFPE, regs, FPE_FIXME, regs->nip); + _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip); return; } } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index ff73f498568c..5baac79df97e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -89,7 +89,7 @@ SECTIONS */ .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) { #ifdef CONFIG_LD_HEAD_STUB_CATCH - *(.linker_stub_catch); + KEEP(*(.linker_stub_catch)); . = . ; #endif @@ -98,7 +98,7 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT @@ -134,6 +134,20 @@ SECTIONS #ifdef CONFIG_PPC64 . = ALIGN(8); + __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { + __start___stf_entry_barrier_fixup = .; + *(__stf_entry_barrier_fixup) + __stop___stf_entry_barrier_fixup = .; + } + + . = ALIGN(8); + __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { + __start___stf_exit_barrier_fixup = .; + *(__stf_exit_barrier_fixup) + __stop___stf_exit_barrier_fixup = .; + } + + . = ALIGN(8); __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { __start___rfi_flush_fixup = .; *(__rfi_flush_fixup) @@ -177,10 +191,10 @@ SECTIONS .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA __vtop_table_begin = .; - *(.vtop_fixup); + KEEP(*(.vtop_fixup)); __vtop_table_end = .; __ptov_table_begin = .; - *(.ptov_fixup); + KEEP(*(.ptov_fixup)); __ptov_table_end = .; } @@ -201,26 +215,26 @@ SECTIONS . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { __start___ftr_fixup = .; - *(__ftr_fixup) + KEEP(*(__ftr_fixup)) __stop___ftr_fixup = .; } . = ALIGN(8); __mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) { __start___mmu_ftr_fixup = .; - *(__mmu_ftr_fixup) + KEEP(*(__mmu_ftr_fixup)) __stop___mmu_ftr_fixup = .; } . = ALIGN(8); __lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) { __start___lwsync_fixup = .; - *(__lwsync_fixup) + KEEP(*(__lwsync_fixup)) __stop___lwsync_fixup = .; } #ifdef CONFIG_PPC64 . = ALIGN(8); __fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) { __start___fw_ftr_fixup = .; - *(__fw_ftr_fixup) + KEEP(*(__fw_ftr_fixup)) __stop___fw_ftr_fixup = .; } #endif @@ -233,7 +247,7 @@ SECTIONS . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { __machine_desc_start = . ; - *(.machine.desc) + KEEP(*(.machine.desc)) __machine_desc_end = . ; } #ifdef CONFIG_RELOCATABLE @@ -281,7 +295,7 @@ SECTIONS .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA *(.data.rel*) - *(.sdata) + *(SDATA_MAIN) *(.sdata2) *(.got.plt) *(.got) *(.plt) @@ -296,7 +310,7 @@ SECTIONS .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; - *(.opd) + KEEP(*(.opd)) __end_opd = .; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index ccdf3761eec0..481da8f93fa4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) @@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) /* RIC=1 PRS=0 R=1 IS=2 */ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, @@ -595,7 +595,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { - old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, + old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f604cbd8fc34..cb6d2313b19f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc) vc->in_guest = 0; vc->napping_threads = 0; vc->conferring_threads = 0; + vc->tb_offset_applied = 0; } static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2c3cbe0067b2..b97d261d3b89 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -695,6 +695,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 22: ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 beq 37f + std r8, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current host timebase */ add r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -943,18 +944,6 @@ FTR_SECTION_ELSE ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: - /* - * Set the decrementer to the guest decrementer. - */ - ld r8,VCPU_DEC_EXPIRES(r4) - /* r8 is a host timebase value here, convert to guest TB */ - ld r5,HSTATE_KVM_VCORE(r13) - ld r6,VCORE_TB_OFFSET(r5) - add r8,r8,r6 - mftb r7 - subf r3,r7,r8 - mtspr SPRN_DEC,r3 - ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) ld r7, VCPU_SPRG2(r4) @@ -1008,6 +997,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r8 isync + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + /* r8 is a host timebase value here, convert to guest TB */ + ld r5,HSTATE_KVM_VCORE(r13) + ld r6,VCORE_TB_OFFSET_APPL(r5) + add r8,r8,r6 + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC EXTEND_HDEC(r3) @@ -1600,8 +1601,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) guest_bypass: stw r12, STACK_SLOT_TRAP(r1) - mr r3, r12 + + /* Save DEC */ + /* Do this before kvmhv_commence_exit so we know TB is guest TB */ + ld r3, HSTATE_KVM_VCORE(r13) + mfspr r5,SPRN_DEC + mftb r6 + /* On P9, if the guest has large decr enabled, don't sign extend */ +BEGIN_FTR_SECTION + ld r4, VCORE_LPCR(r3) + andis. r4, r4, LPCR_LD@h + bne 16f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + extsw r5,r5 +16: add r5,r5,r6 + /* r5 is a guest timebase value here, convert to host TB */ + ld r4,VCORE_TB_OFFSET_APPL(r3) + subf r5,r4,r5 + std r5,VCPU_DEC_EXPIRES(r9) + /* Increment exit count, poke other threads to exit */ + mr r3, r12 bl kvmhv_commence_exit nop ld r9, HSTATE_KVM_VCPU(r13) @@ -1642,23 +1662,6 @@ guest_bypass: mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 - /* Save DEC */ - ld r3, HSTATE_KVM_VCORE(r13) - mfspr r5,SPRN_DEC - mftb r6 - /* On P9, if the guest has large decr enabled, don't sign extend */ -BEGIN_FTR_SECTION - ld r4, VCORE_LPCR(r3) - andis. r4, r4, LPCR_LD@h - bne 16f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - extsw r5,r5 -16: add r5,r5,r6 - /* r5 is a guest timebase value here, convert to host TB */ - ld r4,VCORE_TB_OFFSET(r3) - subf r5,r4,r5 - std r5,VCPU_DEC_EXPIRES(r9) - BEGIN_FTR_SECTION b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) @@ -1908,6 +1911,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) cmpwi cr2, r0, 0 beq cr2, 4f + /* + * Radix: do eieio; tlbsync; ptesync sequence in case we + * interrupted the guest between a tlbie and a ptesync. + */ + eieio + tlbsync + ptesync + /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) lwz r3, VCPU_GUEST_PID(r9) @@ -2020,9 +2031,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 27: /* Subtract timebase offset from timebase */ - ld r8,VCORE_TB_OFFSET(r5) + ld r8, VCORE_TB_OFFSET_APPL(r5) cmpdi r8,0 beq 17f + li r0, 0 + std r0, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current guest timebase */ subf r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -2703,7 +2716,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) subf r3, r6, r3 /* convert to host TB value */ std r3, VCPU_DEC_EXPIRES(r4) @@ -2802,7 +2815,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* Restore guest decrementer */ ld r3, VCPU_DEC_EXPIRES(r4) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) add r3, r3, r6 /* convert host TB to guest TB value */ mftb r7 subf r3, r7, r3 @@ -3609,12 +3622,9 @@ kvmppc_fix_pmao: */ kvmhv_start_timing: ld r5, HSTATE_KVM_VCORE(r13) - lbz r6, VCORE_IN_GUEST(r5) - cmpwi r6, 0 - beq 5f /* if in guest, need to */ - ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -5: mftb r5 - subf r5, r6, r5 + ld r6, VCORE_TB_OFFSET_APPL(r5) + mftb r5 + subf r5, r6, r5 /* subtract current timebase offset */ std r3, VCPU_CUR_ACTIVITY(r4) std r5, VCPU_ACTIVITY_START(r4) blr @@ -3625,15 +3635,12 @@ kvmhv_start_timing: */ kvmhv_accumulate_time: ld r5, HSTATE_KVM_VCORE(r13) - lbz r8, VCORE_IN_GUEST(r5) - cmpwi r8, 0 - beq 4f /* if in guest, need to */ - ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -4: ld r5, VCPU_CUR_ACTIVITY(r4) + ld r8, VCORE_TB_OFFSET_APPL(r5) + ld r5, VCPU_CUR_ACTIVITY(r4) ld r6, VCPU_ACTIVITY_START(r4) std r3, VCPU_CUR_ACTIVITY(r4) mftb r7 - subf r7, r8, r7 + subf r7, r8, r7 /* subtract current timebase offset */ std r7, VCPU_ACTIVITY_START(r4) cmpdi r5, 0 beqlr diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index c7a5deadd1cc..99c3620b40d9 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -11,6 +11,9 @@ #define XGLUE(a,b) a##b #define GLUE(a,b) XGLUE(a,b) +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */ +#define XICS_DUMMY 1 + static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) { u8 cppr; @@ -205,6 +208,10 @@ skip_ipi: goto skip_ipi; } + /* If it's the dummy interrupt, continue searching */ + if (hirq == XICS_DUMMY) + goto skip_ipi; + /* If fetching, update queue pointers */ if (scan_type == scan_fetch) { q->idx = idx; @@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc) __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); } +static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive, + struct kvmppc_xive_vcpu *xc) +{ + unsigned int prio; + + /* For each priority that is now masked */ + for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + struct kvmppc_xive_irq_state *state; + struct kvmppc_xive_src_block *sb; + u32 idx, toggle, entry, irq, hw_num; + struct xive_irq_data *xd; + __be32 *qpage; + u16 src; + + idx = q->idx; + toggle = q->toggle; + qpage = READ_ONCE(q->qpage); + if (!qpage) + continue; + + /* For each interrupt in the queue */ + for (;;) { + entry = be32_to_cpup(qpage + idx); + + /* No more ? */ + if ((entry >> 31) == toggle) + break; + irq = entry & 0x7fffffff; + + /* Skip dummies and IPIs */ + if (irq == XICS_DUMMY || irq == XICS_IPI) + goto next; + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + goto next; + state = &sb->irq_state[src]; + + /* Has it been rerouted ? */ + if (xc->server_num == state->act_server) + goto next; + + /* + * Allright, it *has* been re-routed, kill it from + * the queue. + */ + qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY); + + /* Find the HW interrupt */ + kvmppc_xive_select_irq(state, &hw_num, &xd); + + /* If it's not an LSI, set PQ to 11 the EOI will force a resend */ + if (!(xd->flags & XIVE_IRQ_FLAG_LSI)) + GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11); + + /* EOI the source */ + GLUE(X_PFX,source_eoi)(hw_num, xd); + + next: + idx = (idx + 1) & q->msk; + if (idx == 0) + toggle ^= 1; + } + } +} + X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; u8 old_cppr; pr_devel("H_CPPR(cppr=%ld)\n", cppr); @@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) */ smp_mb(); - /* - * We are masking less, we need to look for pending things - * to deliver and set VP pending bits accordingly to trigger - * a new interrupt otherwise we might miss MFRR changes for - * which we have optimized out sending an IPI signal. - */ - if (cppr > old_cppr) + if (cppr > old_cppr) { + /* + * We are masking less, we need to look for pending things + * to deliver and set VP pending bits accordingly to trigger + * a new interrupt otherwise we might miss MFRR changes for + * which we have optimized out sending an IPI signal. + */ GLUE(X_PFX,push_pending_to_hw)(xc); + } else { + /* + * We are masking more, we need to check the queue for any + * interrupt that has been routed to another CPU, take + * it out (replace it with the dummy) and retrigger it. + * + * This is necessary since those interrupts may otherwise + * never be processed, at least not until this CPU restores + * its CPPR. + * + * This is in theory racy vs. HW adding new interrupts to + * the queue. In practice this works because the interesting + * cases are when the guest has done a set_xive() to move the + * interrupt away, which flushes the xive, followed by the + * target CPU doing a H_CPPR. So any new interrupt coming into + * the queue must still be routed to us and isn't a source + * of concern. + */ + GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc); + } /* Apply new CPPR */ xc->hw_cppr = cppr; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 2b9173d09f24..8b69f868298c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/security_features.h> #include <asm/firmware.h> struct fixup_entry { @@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup), + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types & STF_BARRIER_FALLBACK) + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + BRANCH_SET_LINK); + else + patch_instruction(dest + 1, instrs[1]); + + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup), + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x60000000; /* nop */ + instrs[4] = 0x60000000; /* nop */ + instrs[5] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ + instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + } else { + instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ + instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + } + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ + } else { + instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ + } + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + patch_instruction(dest + 3, instrs[3]); + patch_instruction(dest + 4, instrs[4]); + patch_instruction(dest + 5, instrs[5]); + } + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + do_stf_entry_barrier_fixups(types); + do_stf_exit_barrier_fixups(types); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 3fafacd3f907..b1ca7a0974e3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -165,6 +165,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, return SIGBUS; current->thread.trap_nr = BUS_ADRERR; + clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 02d369ca6a53..809f019d3cba 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -3,7 +3,7 @@ # Arch-specific network modules # ifeq ($(CONFIG_PPC64),y) -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o else obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 8bdef7ed28a8..3609be4692b3 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -20,7 +20,7 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 8*8 | + * [ nv gpr save area ] 6*8 | * [ tail_call_cnt ] 8 | * [ local_tmp_var ] 8 | * fp (r31) --> [ ebpf stack space ] upto 512 | @@ -28,8 +28,8 @@ * sp (r1) ---> [ stack pointer ] -------------- */ -/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ -#define BPF_PPC_STACK_SAVE (8*8) +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (6*8) /* for bpf JIT code internal usage */ #define BPF_PPC_STACK_LOCALS 16 /* stack frame excluding BPF stack, ensure this is quadword aligned */ @@ -39,10 +39,8 @@ #ifndef __ASSEMBLY__ /* BPF register usage */ -#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) -#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) -#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ static const int b2p[] = { @@ -63,40 +61,23 @@ static const int b2p[] = { [BPF_REG_FP] = 31, /* eBPF jit internal registers */ [BPF_REG_AX] = 2, - [SKB_HLEN_REG] = 25, - [SKB_DATA_REG] = 26, [TMP_REG_1] = 9, [TMP_REG_2] = 10 }; -/* PPC NVR range -- update this if we ever use NVRs below r24 */ -#define BPF_PPC_NVR_MIN 24 - -/* Assembly helpers */ -#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ - u64 func##_negative_offset(u64 r3, u64 r4); \ - u64 func##_positive_offset(u64 r3, u64 r4); - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); - -#define CHOOSE_LOAD_FUNC(imm, func) \ - (imm < 0 ? \ - (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ - func##_positive_offset) +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN 27 #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_SKB 0x4000 /* uses sk_buff */ -#define SEEN_TAILCALL 0x8000 /* uses tail calls */ +#define SEEN_TAILCALL 0x4000 /* uses tail calls */ struct codegen_context { /* * This is used to track register usage as well * as calls to external helpers. * - register usage is tracked with corresponding - * bits (r3-r10 and r25-r31) + * bits (r3-r10 and r27-r31) * - rest of the bits can be used to track other * things -- for now, we use bits 16 to 23 * encoded in SEEN_* macros above diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S deleted file mode 100644 index 7e4c51430b84..000000000000 --- a/arch/powerpc/net/bpf_jit_asm64.S +++ /dev/null @@ -1,180 +0,0 @@ -/* - * bpf_jit_asm64.S: Packet/header access helper functions - * for PPC64 BPF compiler. - * - * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> - * IBM Corporation - * - * Based on bpf_jit_asm.S by Matt Evans - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include <asm/ppc_asm.h> -#include <asm/ptrace.h> -#include "bpf_jit64.h" - -/* - * All of these routines are called directly from generated code, - * with the below register usage: - * r27 skb pointer (ctx) - * r25 skb header length - * r26 skb->data pointer - * r4 offset - * - * Result is passed back in: - * r8 data read in host endian format (accumulator) - * - * r9 is used as a temporary register - */ - -#define r_skb r27 -#define r_hlen r25 -#define r_data r26 -#define r_off r4 -#define r_val r8 -#define r_tmp r9 - -_GLOBAL_TOC(sk_load_word) - cmpdi r_off, 0 - blt bpf_slow_path_word_neg - b sk_load_word_positive_offset - -_GLOBAL_TOC(sk_load_word_positive_offset) - /* Are we accessing past headlen? */ - subi r_tmp, r_hlen, 4 - cmpd r_tmp, r_off - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ - LWZX_BE r_val, r_data, r_off - blr /* Return success, cr0 != LT */ - -_GLOBAL_TOC(sk_load_half) - cmpdi r_off, 0 - blt bpf_slow_path_half_neg - b sk_load_half_positive_offset - -_GLOBAL_TOC(sk_load_half_positive_offset) - subi r_tmp, r_hlen, 2 - cmpd r_tmp, r_off - blt bpf_slow_path_half - LHZX_BE r_val, r_data, r_off - blr - -_GLOBAL_TOC(sk_load_byte) - cmpdi r_off, 0 - blt bpf_slow_path_byte_neg - b sk_load_byte_positive_offset - -_GLOBAL_TOC(sk_load_byte_positive_offset) - cmpd r_hlen, r_off - ble bpf_slow_path_byte - lbzx r_val, r_data, r_off - blr - -/* - * Call out to skb_copy_bits: - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - std r0, PPC_LR_STKOFF(r1); \ - stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ - mr r3, r_skb; \ - /* r4 = r_off as passed */ \ - addi r5, r1, STACK_FRAME_MIN_SIZE; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* save r5 */ \ - addi r5, r1, STACK_FRAME_MIN_SIZE; \ - /* r3 = 0 on success */ \ - addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ - ld r0, PPC_LR_STKOFF(r1); \ - mtlr r0; \ - cmpdi r3, 0; \ - blt bpf_error; /* cr0 = LT */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - LWZX_BE r_val, 0, r5 - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - LHZX_BE r_val, 0, r5 - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbzx r_val, 0, r5 - blr - -/* - * Call out to bpf_internal_load_pointer_neg_helper - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - std r0, PPC_LR_STKOFF(r1); \ - stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ - mr r3, r_skb; \ - /* r4 = r_off, as passed */ \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - addi r1, r1, STACK_FRAME_MIN_SIZE; \ - ld r0, PPC_LR_STKOFF(r1); \ - mtlr r0; \ - /* R3 != 0 on success */ \ - cmpldi r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ - -bpf_slow_path_word_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_word_negative_offset - -_GLOBAL_TOC(sk_load_word_negative_offset) - sk_negative_common(4) - LWZX_BE r_val, 0, r3 - blr - -bpf_slow_path_half_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_half_negative_offset - -_GLOBAL_TOC(sk_load_half_negative_offset) - sk_negative_common(2) - LHZX_BE r_val, 0, r3 - blr - -bpf_slow_path_byte_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_byte_negative_offset - -_GLOBAL_TOC(sk_load_byte_negative_offset) - sk_negative_common(1) - lbzx r_val, 0, r3 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_tmp, -1 - cmpdi r_tmp, 0 -bpf_error: - /* - * Entered with cr0 = lt - * Generated code will 'blt epilogue', returning 0. - */ - li r_val, 0 - blr diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0ef3d9580e98..f1c95779843b 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 8*8 + * [ nv gpr save area ] 6*8 * [ tail_call_cnt ] 8 * [ local_tmp_var ] 8 * [ unused red zone ] 208 bytes protected @@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) -{ - /* - * Load skb->len and skb->data_len - * r3 points to skb - */ - PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); - PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); - /* header_len = len - data_len */ - PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); - - /* skb->data pointer */ - PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); -} - static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (bpf_is_seen_register(ctx, i)) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); - /* - * Save additional non-volatile regs if we cache skb - * Also, setup skb data - */ - if (ctx->seen & SEEN_SKB) { - PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); - PPC_BPF_STL(b2p[SKB_DATA_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); - bpf_jit_emit_skb_loads(image, ctx); - } - /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) PPC_ADDI(b2p[BPF_REG_FP], 1, @@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx if (bpf_is_seen_register(ctx, i)) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); - /* Restore non-volatile registers used for skb cache */ - if (ctx->seen & SEEN_SKB) { - PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); - PPC_BPF_LL(b2p[SKB_DATA_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); - } - /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); @@ -202,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) { + unsigned int i, ctx_idx = ctx->idx; + + /* Load function address into r12 */ + PPC_LI64(12, func); + + /* For bpf-to-bpf function calls, the callee's address is unknown + * until the last extra pass. As seen above, we use PPC_LI64() to + * load the callee's address, but this may optimize the number of + * instructions required based on the nature of the address. + * + * Since we don't want the number of instructions emitted to change, + * we pad the optimized PPC_LI64() call with NOPs to guarantee that + * we always have a five-instruction sequence, which is the maximum + * that PPC_LI64() can emit. + */ + for (i = ctx->idx - ctx_idx; i < 5; i++) + PPC_NOP(); + #ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2) * and since we don't use a TOC ourself. */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + PPC_BPF_LL(2, 12, 8); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(12, 12, 0); #endif + + PPC_MTLR(12); PPC_BLRL(); } @@ -291,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs) + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -747,29 +724,30 @@ emit_clear: break; /* - * Call kernel helper + * Call kernel helper or bpf function */ case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - func = (u8 *) __bpf_call_base + imm; - /* Save skb pointer if we need to re-cache skb data */ - if ((ctx->seen & SEEN_SKB) && - bpf_helper_changes_pkt_data(func)) - PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); + /* bpf function call */ + if (insn[i].src_reg == BPF_PSEUDO_CALL) + if (!extra_pass) + func = NULL; + else if (fp->aux->func && off < fp->aux->func_cnt) + /* use the subprog id from the off + * field to lookup the callee address + */ + func = (u8 *) fp->aux->func[off]->bpf_func; + else + return -EINVAL; + /* kernel helper call */ + else + func = (u8 *) __bpf_call_base + imm; bpf_jit_emit_func_call(image, ctx, (u64)func); /* move return value from r3 to BPF_REG_0 */ PPC_MR(b2p[BPF_REG_0], 3); - - /* refresh skb cache */ - if ((ctx->seen & SEEN_SKB) && - bpf_helper_changes_pkt_data(func)) { - /* reload skb pointer to r3 */ - PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); - bpf_jit_emit_skb_loads(image, ctx); - } break; /* @@ -887,65 +865,6 @@ cond_branch: break; /* - * Loads from packet header/data - * Assume 32-bit input value in imm and X (src_reg) - */ - - /* Absolute loads */ - case BPF_LD | BPF_W | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); - goto common_load_abs; - case BPF_LD | BPF_H | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); - goto common_load_abs; - case BPF_LD | BPF_B | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); -common_load_abs: - /* - * Load from [imm] - * Load into r4, which can just be passed onto - * skb load helpers as the second parameter - */ - PPC_LI32(4, imm); - goto common_load; - - /* Indirect loads */ - case BPF_LD | BPF_W | BPF_IND: - func = (u8 *)sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = (u8 *)sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = (u8 *)sk_load_byte; -common_load_ind: - /* - * Load from [src_reg + imm] - * Treat src_reg as a 32-bit value - */ - PPC_EXTSW(4, src_reg); - if (imm) { - if (imm >= -32768 && imm < 32768) - PPC_ADDI(4, 4, IMM_L(imm)); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(4, 4, b2p[TMP_REG_1]); - } - } - -common_load: - ctx->seen |= SEEN_SKB; - ctx->seen |= SEEN_FUNC; - bpf_jit_emit_func_call(image, ctx, (u64)func); - - /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in BPF_REG_0 - */ - PPC_BCC(COND_LT, exit_addr); - break; - - /* * Tail call */ case BPF_JMP | BPF_TAIL_CALL: @@ -971,6 +890,14 @@ common_load: return 0; } +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; @@ -978,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) u8 *image = NULL; u32 *code_base; u32 *addrs; + struct powerpc64_jit_data *jit_data; struct codegen_context cgctx; int pass; int flen; @@ -985,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *org_fp = fp; struct bpf_prog *tmp_fp; bool bpf_blinded = false; + bool extra_pass = false; if (!fp->jit_requested) return org_fp; @@ -998,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp = tmp_fp; } + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } + addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); if (addrs == NULL) { fp = org_fp; - goto out; + goto out_addrs; } memset(&cgctx, 0, sizeof(struct codegen_context)); @@ -1011,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { /* We hit something illegal or unsupported. */ fp = org_fp; - goto out; + goto out_addrs; } /* @@ -1032,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_fill_ill_insns); if (!bpf_hdr) { fp = org_fp; - goto out; + goto out_addrs; } +skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); /* Code generation passes 1-2 */ @@ -1042,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) @@ -1068,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } out: - kfree(addrs); - if (bpf_blinded) bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index ecc66d5f02c9..ad054dd0d666 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -7,6 +7,7 @@ * 2 of the License, or (at your option) any later version. **/ +#include <linux/compat_time.h> #include <linux/oprofile.h> #include <linux/sched.h> #include <asm/processor.h> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index cc892dcfa114..e6a1de521319 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -222,6 +222,7 @@ config PTE_64BIT config PHYS_64BIT bool 'Large physical address support' if E500 || PPC_86xx depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx + select PHYS_ADDR_T_64BIT ---help--- This option enables kernel support for larger than 32-bit physical addresses. This feature may not be available on all cores. diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 870c0a82d560..1e002e94d0f6 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -44,7 +44,7 @@ static void spufs_handle_event(struct spu_context *ctx, return; } - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); switch (type) { case SPE_EVENT_INVALID_DMA: diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index ccc421503363..c9ef3c532169 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1095,18 +1095,6 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -} - -static int spu_loadavg_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_spu_loadavg, NULL); -} - -static const struct file_operations spu_loadavg_fops = { - .open = spu_loadavg_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, }; int __init spu_sched_init(void) @@ -1135,7 +1123,7 @@ int __init spu_sched_init(void) mod_timer(&spuloadavg_timer, 0); - entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops); + entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg); if (!entry) goto out_stop_kthread; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8d0958cc83a8..f96df0a25d05 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -132,6 +132,7 @@ static void __init pnv_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); pnv_setup_rfi_flush(); + setup_stf_barrier(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 63b1f0d10ef0..139f0af6c3d9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -711,6 +711,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_rfi_flush(); + setup_stf_barrier(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); |