summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/boot/Makefile16
-rw-r--r--arch/alpha/boot/main.c1
-rw-r--r--arch/alpha/boot/stdio.c306
-rw-r--r--arch/alpha/boot/tools/objstrip.c3
-rw-r--r--arch/alpha/include/asm/cmpxchg.h2
-rw-r--r--arch/alpha/include/asm/types.h1
-rw-r--r--arch/alpha/include/asm/unistd.h2
-rw-r--r--arch/alpha/include/uapi/asm/unistd.h3
-rw-r--r--arch/alpha/kernel/err_ev6.c1
-rw-r--r--arch/alpha/kernel/irq.c1
-rw-r--r--arch/alpha/kernel/osf_sys.c3
-rw-r--r--arch/alpha/kernel/process.c7
-rw-r--r--arch/alpha/kernel/smp.c8
-rw-r--r--arch/alpha/kernel/srmcons.c3
-rw-r--r--arch/alpha/kernel/sys_marvel.c2
-rw-r--r--arch/alpha/kernel/systbls.S3
-rw-r--r--arch/alpha/kernel/traps.c1
-rw-r--r--arch/alpha/mm/fault.c5
-rw-r--r--arch/alpha/oprofile/op_model_ev4.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev5.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev6.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev67.c1
-rw-r--r--arch/arc/Kconfig.debug13
-rw-r--r--arch/arc/include/asm/atomic.h2
-rw-r--r--arch/arc/include/asm/futex.h10
-rw-r--r--arch/arc/mm/cache_arc700.c4
-rw-r--r--arch/arc/mm/fault.c2
-rw-r--r--arch/arm/boot/dts/Makefile2
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi19
-rw-r--r--arch/arm/boot/dts/am335x-boneblack.dts4
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts2
-rw-r--r--arch/arm/boot/dts/am35xx-clocks.dtsi14
-rw-r--r--arch/arm/boot/dts/am437x-sk-evm.dts4
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts11
-rw-r--r--arch/arm/boot/dts/armada-375.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-38x.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-39x.dtsi2
-rw-r--r--arch/arm/boot/dts/armada-xp-linksys-mamba.dts5
-rw-r--r--arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts4
-rw-r--r--arch/arm/boot/dts/dm816x.dtsi4
-rw-r--r--arch/arm/boot/dts/dove-cubox.dts1
-rw-r--r--arch/arm/boot/dts/dra7.dtsi10
-rw-r--r--arch/arm/boot/dts/exynos4412-odroid-common.dtsi3
-rw-r--r--arch/arm/boot/dts/exynos4412-trats2.dts2
-rw-r--r--arch/arm/boot/dts/exynos5250-snow.dts1
-rw-r--r--arch/arm/boot/dts/exynos5420-peach-pit.dts1
-rw-r--r--arch/arm/boot/dts/exynos5420-trip-points.dtsi2
-rw-r--r--arch/arm/boot/dts/exynos5420.dtsi1
-rw-r--r--arch/arm/boot/dts/exynos5440-trip-points.dtsi2
-rw-r--r--arch/arm/boot/dts/exynos5800-peach-pi.dts1
-rw-r--r--arch/arm/boot/dts/imx23-olinuxino.dts4
-rw-r--r--arch/arm/boot/dts/imx25.dtsi1
-rw-r--r--arch/arm/boot/dts/imx27.dtsi2
-rw-r--r--arch/arm/boot/dts/imx28.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabreauto.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts8
-rw-r--r--arch/arm/boot/dts/omap3.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi2
-rw-r--r--arch/arm/boot/dts/r8a7791-koelsch.dts2
-rw-r--r--arch/arm/boot/dts/ste-dbx5x0.dtsi17
-rw-r--r--arch/arm/boot/dts/ste-href.dtsi15
-rw-r--r--arch/arm/boot/dts/ste-snowball.dts13
-rw-r--r--arch/arm/boot/dts/tegra124.dtsi8
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts1
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca9.dts11
-rw-r--r--arch/arm/boot/dts/zynq-7000.dtsi4
-rw-r--r--arch/arm/configs/multi_v7_defconfig5
-rw-r--r--arch/arm/configs/omap2plus_defconfig2
-rw-r--r--arch/arm/include/asm/barrier.h2
-rw-r--r--arch/arm/include/asm/dma-iommu.h2
-rw-r--r--arch/arm/include/asm/futex.h13
-rw-r--r--arch/arm/include/asm/topology.h2
-rw-r--r--arch/arm/include/asm/xen/page.h1
-rw-r--r--arch/arm/kernel/entry-common.S4
-rw-r--r--arch/arm/kernel/perf_event_cpu.c12
-rw-r--r--arch/arm/mach-exynos/common.h2
-rw-r--r--arch/arm/mach-exynos/exynos.c27
-rw-r--r--arch/arm/mach-exynos/platsmp.c39
-rw-r--r--arch/arm/mach-exynos/pm_domains.c4
-rw-r--r--arch/arm/mach-exynos/suspend.c11
-rw-r--r--arch/arm/mach-gemini/common.h4
-rw-r--r--arch/arm/mach-gemini/reset.c4
-rw-r--r--arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c2
-rw-r--r--arch/arm/mach-imx/gpc.c16
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c68
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_43xx_data.c70
-rw-r--r--arch/arm/mach-omap2/prcm43xx.h3
-rw-r--r--arch/arm/mach-omap2/prm-regbits-34xx.h1
-rw-r--r--arch/arm/mach-omap2/prm-regbits-44xx.h1
-rw-r--r--arch/arm/mach-omap2/prminst44xx.c20
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S22
-rw-r--r--arch/arm/mach-omap2/timer.c13
-rw-r--r--arch/arm/mach-omap2/vc.c12
-rw-r--r--arch/arm/mach-omap2/vc.h2
-rw-r--r--arch/arm/mach-omap2/vc3xxx_data.c1
-rw-r--r--arch/arm/mach-omap2/vc44xx_data.c1
-rw-r--r--arch/arm/mach-pxa/Kconfig9
-rw-r--r--arch/arm/mach-pxa/Makefile1
-rw-r--r--arch/arm/mach-pxa/include/mach/lubbock.h7
-rw-r--r--arch/arm/mach-pxa/include/mach/mainstone.h6
-rw-r--r--arch/arm/mach-pxa/lubbock.c108
-rw-r--r--arch/arm/mach-pxa/mainstone.c115
-rw-r--r--arch/arm/mach-pxa/pxa_cplds_irqs.c200
-rw-r--r--arch/arm/mach-rockchip/pm.c7
-rw-r--r--arch/arm/mach-rockchip/pm.h4
-rw-r--r--arch/arm/mach-rockchip/rockchip.c19
-rw-r--r--arch/arm/mm/dma-mapping.c13
-rw-r--r--arch/arm/mm/fault.c2
-rw-r--r--arch/arm/mm/highmem.c3
-rw-r--r--arch/arm/mm/mmu.c20
-rw-r--r--arch/arm/mm/proc-arm1020.S2
-rw-r--r--arch/arm/mm/proc-arm1020e.S2
-rw-r--r--arch/arm/mm/proc-arm925.S3
-rw-r--r--arch/arm/mm/proc-feroceon.S1
-rw-r--r--arch/arm/net/bpf_jit_32.c42
-rw-r--r--arch/arm/xen/enlighten.c1
-rw-r--r--arch/arm/xen/mm.c15
-rw-r--r--arch/arm64/boot/dts/arm/juno-motherboard.dtsi31
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-evb.dts3
-rw-r--r--arch/arm64/crypto/crc32-arm64.c22
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c3
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c3
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/futex.h4
-rw-r--r--arch/arm64/include/asm/topology.h2
-rw-r--r--arch/arm64/kernel/alternative.c53
-rw-r--r--arch/arm64/kernel/perf_event.c8
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/avr32/include/asm/cmpxchg.h2
-rw-r--r--arch/avr32/include/asm/uaccess.h12
-rw-r--r--arch/avr32/mm/fault.c4
-rw-r--r--arch/blackfin/include/asm/io.h1
-rw-r--r--arch/cris/mm/fault.c6
-rw-r--r--arch/frv/mm/fault.c4
-rw-r--r--arch/frv/mm/highmem.c2
-rw-r--r--arch/hexagon/include/asm/cmpxchg.h1
-rw-r--r--arch/hexagon/include/asm/uaccess.h3
-rw-r--r--arch/ia64/include/asm/barrier.h7
-rw-r--r--arch/ia64/include/asm/topology.h2
-rw-r--r--arch/ia64/include/uapi/asm/cmpxchg.h2
-rw-r--r--arch/ia64/kernel/smpboot.c3
-rw-r--r--arch/ia64/mm/fault.c4
-rw-r--r--arch/ia64/pci/pci.c13
-rw-r--r--arch/m32r/include/asm/cmpxchg.h2
-rw-r--r--arch/m32r/include/asm/uaccess.h30
-rw-r--r--arch/m32r/kernel/smp.c6
-rw-r--r--arch/m32r/mm/fault.c8
-rw-r--r--arch/m68k/include/asm/cmpxchg.h1
-rw-r--r--arch/m68k/include/asm/irqflags.h3
-rw-r--r--arch/m68k/mm/fault.c4
-rw-r--r--arch/metag/include/asm/barrier.h2
-rw-r--r--arch/metag/include/asm/cmpxchg.h2
-rw-r--r--arch/metag/mm/fault.c2
-rw-r--r--arch/metag/mm/highmem.c4
-rw-r--r--arch/microblaze/include/asm/uaccess.h6
-rw-r--r--arch/microblaze/mm/fault.c8
-rw-r--r--arch/microblaze/mm/highmem.c4
-rw-r--r--arch/mips/Makefile2
-rw-r--r--arch/mips/ath79/prom.c3
-rw-r--r--arch/mips/ath79/setup.c2
-rw-r--r--arch/mips/cobalt/Makefile3
-rw-r--r--arch/mips/configs/fuloong2e_defconfig2
-rw-r--r--arch/mips/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/cmpxchg.h2
-rw-r--r--arch/mips/include/asm/elf.h4
-rw-r--r--arch/mips/include/asm/pgtable-bits.h14
-rw-r--r--arch/mips/include/asm/smp.h2
-rw-r--r--arch/mips/include/asm/switch_to.h2
-rw-r--r--arch/mips/include/asm/topology.h2
-rw-r--r--arch/mips/include/asm/uaccess.h45
-rw-r--r--arch/mips/kernel/cpu-probe.c3
-rw-r--r--arch/mips/kernel/elf.c32
-rw-r--r--arch/mips/kernel/irq.c4
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/mips/kernel/signal-common.h9
-rw-r--r--arch/mips/kernel/smp-bmips.c2
-rw-r--r--arch/mips/kernel/smp-cps.c2
-rw-r--r--arch/mips/kernel/smp.c6
-rw-r--r--arch/mips/kernel/traps.c1
-rw-r--r--arch/mips/kvm/emulate.c8
-rw-r--r--arch/mips/lib/strnlen_user.S15
-rw-r--r--arch/mips/loongson/common/Makefile4
-rw-r--r--arch/mips/loongson/loongson-3/smp.c2
-rw-r--r--arch/mips/math-emu/cp1emu.c4
-rw-r--r--arch/mips/mm/c-r4k.c2
-rw-r--r--arch/mips/mm/fault.c4
-rw-r--r--arch/mips/mm/highmem.c5
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlb-r4k.c2
-rw-r--r--arch/mips/net/bpf_jit.c6
-rw-r--r--arch/mips/ralink/ill_acc.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-platform.c4
-rw-r--r--arch/mn10300/include/asm/highmem.h3
-rw-r--r--arch/mn10300/mm/fault.c4
-rw-r--r--arch/nios2/mm/fault.c2
-rw-r--r--arch/parisc/include/asm/cacheflush.h2
-rw-r--r--arch/parisc/include/asm/cmpxchg.h2
-rw-r--r--arch/parisc/include/asm/elf.h4
-rw-r--r--arch/parisc/kernel/process.c10
-rw-r--r--arch/parisc/kernel/sys_parisc.c3
-rw-r--r--arch/parisc/kernel/traps.c4
-rw-r--r--arch/parisc/mm/fault.c4
-rw-r--r--arch/powerpc/include/asm/barrier.h3
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h1
-rw-r--r--arch/powerpc/include/asm/topology.h2
-rw-r--r--arch/powerpc/kernel/mce.c4
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c5
-rw-r--r--arch/powerpc/lib/vmx-helper.c11
-rw-r--r--arch/powerpc/mm/fault.c9
-rw-r--r--arch/powerpc/mm/highmem.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c25
-rw-r--r--arch/powerpc/mm/pgtable_64.c11
-rw-r--r--arch/powerpc/mm/tlb_nohash.c2
-rw-r--r--arch/s390/crypto/ghash_s390.c25
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h2
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/topology.h3
-rw-r--r--arch/s390/include/asm/uaccess.h15
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/net/bpf_jit.h4
-rw-r--r--arch/s390/net/bpf_jit_comp.c30
-rw-r--r--arch/score/include/asm/cmpxchg.h2
-rw-r--r--arch/score/include/asm/uaccess.h15
-rw-r--r--arch/score/lib/string.S2
-rw-r--r--arch/score/mm/fault.c3
-rw-r--r--arch/sh/include/asm/barrier.h2
-rw-r--r--arch/sh/include/asm/cmpxchg.h2
-rw-r--r--arch/sh/mm/fault.c5
-rw-r--r--arch/sparc/include/asm/barrier_64.h4
-rw-r--r--arch/sparc/include/asm/cmpxchg_32.h1
-rw-r--r--arch/sparc/include/asm/cmpxchg_64.h2
-rw-r--r--arch/sparc/include/asm/cpudata_64.h3
-rw-r--r--arch/sparc/include/asm/pgtable_64.h22
-rw-r--r--arch/sparc/include/asm/topology_64.h5
-rw-r--r--arch/sparc/include/asm/trap_block.h2
-rw-r--r--arch/sparc/kernel/entry.h2
-rw-r--r--arch/sparc/kernel/leon_pci_grpci2.c1
-rw-r--r--arch/sparc/kernel/mdesc.c136
-rw-r--r--arch/sparc/kernel/pci.c59
-rw-r--r--arch/sparc/kernel/setup_64.c21
-rw-r--r--arch/sparc/kernel/smp_64.c13
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S5
-rw-r--r--arch/sparc/mm/fault_32.c4
-rw-r--r--arch/sparc/mm/fault_64.c4
-rw-r--r--arch/sparc/mm/highmem.c4
-rw-r--r--arch/sparc/mm/init_64.c76
-rw-r--r--arch/tile/include/asm/atomic_64.h3
-rw-r--r--arch/tile/include/asm/topology.h2
-rw-r--r--arch/tile/include/asm/uaccess.h18
-rw-r--r--arch/tile/mm/fault.c4
-rw-r--r--arch/tile/mm/highmem.c3
-rw-r--r--arch/um/kernel/trap.c5
-rw-r--r--arch/unicore32/mm/fault.c2
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/Kconfig.debug12
-rw-r--r--arch/x86/boot/compressed/eboot.c2
-rw-r--r--arch/x86/boot/compressed/misc.h11
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c10
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c15
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c3
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/fpu.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c2
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c11
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c15
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c5
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c16
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c16
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c16
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c16
-rw-r--r--arch/x86/ia32/ia32_signal.c13
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/amd_nb.h11
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h46
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h626
-rw-r--r--arch/x86/include/asm/fpu/api.h48
-rw-r--r--arch/x86/include/asm/fpu/internal.h694
-rw-r--r--arch/x86/include/asm/fpu/regset.h21
-rw-r--r--arch/x86/include/asm/fpu/signal.h33
-rw-r--r--arch/x86/include/asm/fpu/types.h293
-rw-r--r--arch/x86/include/asm/fpu/xstate.h46
-rw-r--r--arch/x86/include/asm/hypervisor.h2
-rw-r--r--arch/x86/include/asm/i387.h108
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/include/asm/mmu_context.h13
-rw-r--r--arch/x86/include/asm/mpx.h74
-rw-r--r--arch/x86/include/asm/paravirt.h29
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/preempt.h8
-rw-r--r--arch/x86/include/asm/processor.h161
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/qspinlock.h57
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h6
-rw-r--r--arch/x86/include/asm/segment.h14
-rw-r--r--arch/x86/include/asm/simd.h2
-rw-r--r--arch/x86/include/asm/smp.h10
-rw-r--r--arch/x86/include/asm/spinlock.h7
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h2
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/trace/mpx.h132
-rw-r--r--arch/x86/include/asm/uaccess.h15
-rw-r--r--arch/x86/include/asm/uaccess_32.h6
-rw-r--r--arch/x86/include/asm/user.h12
-rw-r--r--arch/x86/include/asm/xcr.h49
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/include/asm/xor.h2
-rw-r--r--arch/x86/include/asm/xor_32.h2
-rw-r--r--arch/x86/include/asm/xor_avx.h2
-rw-r--r--arch/x86/include/asm/xsave.h257
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c5
-rw-r--r--arch/x86/kernel/amd_nb.c4
-rw-r--r--arch/x86/kernel/aperture_64.c8
-rw-r--r--arch/x86/kernel/asm-offsets.c19
-rw-r--r--arch/x86/kernel/asm-offsets_32.c18
-rw-r--r--arch/x86/kernel/asm-offsets_64.c21
-rw-r--r--arch/x86/kernel/cpu/amd.c35
-rw-r--r--arch/x86/kernel/cpu/bugs.c55
-rw-r--r--arch/x86/kernel/cpu/common.c74
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event.c174
-rw-r--r--arch/x86/kernel/cpu/perf_event.h47
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c348
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c108
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c321
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h20
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c32
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c6
-rw-r--r--arch/x86/kernel/cpu/proc.c3
-rw-r--r--arch/x86/kernel/fpu/Makefile5
-rw-r--r--arch/x86/kernel/fpu/bugs.c71
-rw-r--r--arch/x86/kernel/fpu/core.c523
-rw-r--r--arch/x86/kernel/fpu/init.c354
-rw-r--r--arch/x86/kernel/fpu/regset.c356
-rw-r--r--arch/x86/kernel/fpu/signal.c404
-rw-r--r--arch/x86/kernel/fpu/xstate.c461
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S33
-rw-r--r--arch/x86/kernel/head_64.S20
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c4
-rw-r--r--arch/x86/kernel/i387.c656
-rw-r--r--arch/x86/kernel/kvm.c43
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c24
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c22
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c22
-rw-r--r--arch/x86/kernel/pci-dma.c45
-rw-r--r--arch/x86/kernel/pci-swiotlb.c7
-rw-r--r--arch/x86/kernel/process.c61
-rw-r--r--arch/x86/kernel/process_32.c15
-rw-r--r--arch/x86/kernel/process_64.c13
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/setup.c12
-rw-r--r--arch/x86/kernel/signal.c38
-rw-r--r--arch/x86/kernel/smpboot.c45
-rw-r--r--arch/x86/kernel/traps.c134
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/uprobes.c10
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c4
-rw-r--r--arch/x86/kernel/xsave.c724
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/lapic.c26
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c94
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/mmx_32.c2
-rw-r--r--arch/x86/lib/thunk_32.S4
-rw-r--r--arch/x86/lib/thunk_64.S4
-rw-r--r--arch/x86/lib/usercopy_32.c6
-rw-r--r--arch/x86/math-emu/fpu_aux.c4
-rw-r--r--arch/x86/math-emu/fpu_entry.c20
-rw-r--r--arch/x86/math-emu/fpu_system.h2
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/highmem_32.c3
-rw-r--r--arch/x86/mm/iomap_32.c2
-rw-r--r--arch/x86/mm/ioremap.c15
-rw-r--r--arch/x86/mm/mpx.c519
-rw-r--r--arch/x86/net/bpf_jit_comp.c35
-rw-r--r--arch/x86/pci/acpi.c37
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/power/cpu.c11
-rw-r--r--arch/x86/um/asm/barrier.h3
-rw-r--r--arch/x86/vdso/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c29
-rw-r--r--arch/x86/xen/spinlock.c64
-rw-r--r--arch/x86/xen/suspend.c10
-rw-r--r--arch/xtensa/include/asm/dma-mapping.h13
-rw-r--r--arch/xtensa/mm/fault.c4
-rw-r--r--arch/xtensa/mm/highmem.c2
419 files changed, 7323 insertions, 4933 deletions
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile
index cd143887380a..8399bd0e68e8 100644
--- a/arch/alpha/boot/Makefile
+++ b/arch/alpha/boot/Makefile
@@ -14,6 +14,9 @@ targets := vmlinux.gz vmlinux \
tools/bootpzh bootloader bootpheader bootpzheader
OBJSTRIP := $(obj)/tools/objstrip
+HOSTCFLAGS := -Wall -I$(objtree)/usr/include
+BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
+
# SRM bootable image. Copy to offset 512 of a partition.
$(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
$(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
$(call if_changed,objstrip)
-LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax
-LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax
-LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax
+LDFLAGS_bootloader := -static -T # -N -relax
+LDFLAGS_bootloader := -static -T # -N -relax
+LDFLAGS_bootpheader := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
-OBJ_bootlx := $(obj)/head.o $(obj)/main.o
-OBJ_bootph := $(obj)/head.o $(obj)/bootp.o
-OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o
+OBJ_bootlx := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
$(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
$(call if_changed,ld)
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 3baf2d1e908d..dd6eb4a33582 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -19,7 +19,6 @@
#include "ksize.h"
-extern int vsprintf(char *, const char *, va_list);
extern unsigned long switch_to_osf_pal(unsigned long nr,
struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
unsigned long *vptb);
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644
index 000000000000..f844dae8a54a
--- /dev/null
+++ b/arch/alpha/boot/stdio.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+# define do_div(n, base) ({ \
+ unsigned int __base = (base); \
+ unsigned int __rem; \
+ __rem = ((unsigned long long)(n)) % __base; \
+ (n) = ((unsigned long long)(n)) / __base; \
+ __rem; \
+})
+
+
+static int skip_atoi(const char **s)
+{
+ int i, c;
+
+ for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+ i = i*10 + c - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SPECIAL 32 /* 0x */
+#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+ char c,sign,tmp[66];
+ const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+ int i;
+
+ if (type & LARGE)
+ digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if ((signed long long)num < 0) {
+ sign = '-';
+ num = - (signed long long)num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0) {
+ tmp[i++] = digits[do_div(num, base)];
+ }
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(ZEROPAD+LEFT)))
+ while(size-->0)
+ *str++ = ' ';
+ if (sign)
+ *str++ = sign;
+ if (type & SPECIAL) {
+ if (base==8)
+ *str++ = '0';
+ else if (base==16) {
+ *str++ = '0';
+ *str++ = digits[33];
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ *str++ = c;
+ while (i < precision--)
+ *str++ = '0';
+ while (i-- > 0)
+ *str++ = tmp[i];
+ while (size-- > 0)
+ *str++ = ' ';
+ return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long long num;
+ int i, base;
+ char * str;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+ for (str=buf ; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+ *str++ = *fmt;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-': flags |= LEFT; goto repeat;
+ case '+': flags |= PLUS; goto repeat;
+ case ' ': flags |= SPACE; goto repeat;
+ case '#': flags |= SPECIAL; goto repeat;
+ case '0': flags |= ZEROPAD; goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if ('0' <= *fmt && *fmt <= '9')
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if ('0' <= *fmt && *fmt <= '9')
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'l' && *(fmt + 1) == 'l') {
+ qualifier = 'q';
+ fmt += 2;
+ } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+ || *fmt == 'Z') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ *str++ = ' ';
+ *str++ = (unsigned char) va_arg(args, int);
+ while (--field_width > 0)
+ *str++ = ' ';
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ s = "<NULL>";
+
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ *str++ = ' ';
+ for (i = 0; i < len; ++i)
+ *str++ = *s++;
+ while (len < field_width--)
+ *str++ = ' ';
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str,
+ (unsigned long) va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+
+ case 'n':
+ if (qualifier == 'l') {
+ long * ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else if (qualifier == 'Z') {
+ size_t * ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ } else {
+ int * ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ *str++ = '%';
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ *str++ = '%';
+ if (*fmt)
+ *str++ = *fmt;
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l') {
+ num = va_arg(args, unsigned long);
+ if (flags & SIGN)
+ num = (signed long) num;
+ } else if (qualifier == 'q') {
+ num = va_arg(args, unsigned long long);
+ if (flags & SIGN)
+ num = (signed long long) num;
+ } else if (qualifier == 'Z') {
+ num = va_arg(args, size_t);
+ } else if (qualifier == 'h') {
+ num = (unsigned short) va_arg(args, int);
+ if (flags & SIGN)
+ num = (signed short) num;
+ } else {
+ num = va_arg(args, unsigned int);
+ if (flags & SIGN)
+ num = (signed int) num;
+ }
+ str = number(str, num, base, field_width, precision, flags);
+ }
+ *str = '\0';
+ return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=vsprintf(buf,fmt,args);
+ va_end(args);
+ return i;
+}
diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
index 367d53d031fc..dee82695f48b 100644
--- a/arch/alpha/boot/tools/objstrip.c
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -27,6 +27,9 @@
#include <linux/param.h>
#ifdef __ELF__
# include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
#endif
/* bootfile size must be multiple of BLOCK_SIZE: */
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 429e8cd0d78e..e5117766529e 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
#undef __ASM__MB
#undef ____cmpxchg
-#define __HAVE_ARCH_CMPXCHG 1
-
#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index f61e1a56c378..4cb4b6d3452c 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -2,6 +2,5 @@
#define _ALPHA_TYPES_H
#include <asm-generic/int-ll64.h>
-#include <uapi/asm/types.h>
#endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index c509d306db45..a56e608db2f9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,7 +3,7 @@
#include <uapi/asm/unistd.h>
-#define NR_SYSCALLS 511
+#define NR_SYSCALLS 514
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index d214a0358100..aa33bf5aacb6 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -472,5 +472,8 @@
#define __NR_sched_setattr 508
#define __NR_sched_getattr 509
#define __NR_renameat2 510
+#define __NR_getrandom 511
+#define __NR_memfd_create 512
+#define __NR_execveat 513
#endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c
index 253cf1a87481..51267ac5729b 100644
--- a/arch/alpha/kernel/err_ev6.c
+++ b/arch/alpha/kernel/err_ev6.c
@@ -6,7 +6,6 @@
* Error handling code supporting Alpha systems
*/
-#include <linux/init.h>
#include <linux/sched.h>
#include <asm/io.h>
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 7b2be251c30f..51f2c8654253 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -19,7 +19,6 @@
#include <linux/ptrace.h>
#include <linux/interrupt.h>
#include <linux/random.h>
-#include <linux/init.h>
#include <linux/irq.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index e51f578636a5..36dc91ace83a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
if (tv) {
if (get_tv32((struct timeval *)&kts, tv))
return -EFAULT;
+ kts.tv_nsec *= 1000;
}
if (tz) {
if (copy_from_user(&ktz, tz, sizeof(*tz)))
return -EFAULT;
}
- kts.tv_nsec *= 1000;
-
return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
}
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 1941a07b5811..84d13263ce46 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
}
/*
- * Copy an alpha thread..
+ * Copy architecture-specific thread state
*/
-
int
copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long arg,
+ unsigned long kthread_arg,
struct task_struct *p)
{
extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
childstack->r26 = (unsigned long) ret_from_kernel_thread;
childstack->r9 = usp; /* function */
- childstack->r10 = arg;
+ childstack->r10 = kthread_arg;
childregs->hae = alpha_mv.hae_cache,
childti->pcb.usp = 0;
return 0;
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 99ac36d5de4e..2f24447fef92 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -63,7 +63,6 @@ static struct {
enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
- IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
};
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
return -EINVAL;
}
-
static void
send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
{
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
generic_smp_call_function_interrupt();
break;
- case IPI_CALL_FUNC_SINGLE:
- generic_smp_call_function_single_interrupt();
- break;
-
case IPI_CPU_STOP:
halt();
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void arch_send_call_function_single_ipi(int cpu)
{
- send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+ send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
}
static void
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 6f01d9ad7b81..72b59511e59a 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -237,8 +237,7 @@ srmcons_init(void)
return -ENODEV;
}
-
-module_init(srmcons_init);
+device_initcall(srmcons_init);
/*
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index f21d61fab678..24e41bd7d3c9 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
irq = intline;
- msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ msi_loc = dev->msi_cap;
msg_ctl = 0;
if (msi_loc)
pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 24789713f1ea..9b62e3fd4f03 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -529,6 +529,9 @@ sys_call_table:
.quad sys_sched_setattr
.quad sys_sched_getattr
.quad sys_renameat2 /* 510 */
+ .quad sys_getrandom
+ .quad sys_memfd_create
+ .quad sys_execveat
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 9c4c189eb22f..74aceead06e9 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -14,7 +14,6 @@
#include <linux/tty.h>
#include <linux/delay.h>
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/kallsyms.h>
#include <linux/ratelimit.h>
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 9d0ac091a52a..4a905bd667e2 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -23,8 +23,7 @@
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/module.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
@@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
/* If we're in an interrupt context, or have no user context,
we must not take the fault. */
- if (!mm || in_atomic())
+ if (!mm || faulthandler_disabled())
goto no_context;
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c
index 18aa9b4f94f1..086a0d5445c5 100644
--- a/arch/alpha/oprofile/op_model_ev4.c
+++ b/arch/alpha/oprofile/op_model_ev4.c
@@ -8,7 +8,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c
index c32f8a0ad925..c300f5ef3482 100644
--- a/arch/alpha/oprofile/op_model_ev5.c
+++ b/arch/alpha/oprofile/op_model_ev5.c
@@ -8,7 +8,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c
index 1c84cc257fc7..02edf5971614 100644
--- a/arch/alpha/oprofile/op_model_ev6.c
+++ b/arch/alpha/oprofile/op_model_ev6.c
@@ -8,7 +8,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c
index 34a57a126553..adb1744d20f3 100644
--- a/arch/alpha/oprofile/op_model_ev67.c
+++ b/arch/alpha/oprofile/op_model_ev67.c
@@ -9,7 +9,6 @@
*/
#include <linux/oprofile.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug
index a7fc0da25650..ff6a4b5ce927 100644
--- a/arch/arc/Kconfig.debug
+++ b/arch/arc/Kconfig.debug
@@ -2,19 +2,6 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config EARLY_PRINTK
- bool "Early printk" if EMBEDDED
- default y
- help
- Write kernel log output directly into the VGA buffer or to a serial
- port.
-
- This is useful for kernel debugging when your machine crashes very
- early before the console code is initialized. For normal operation
- it is not recommended because it looks ugly and doesn't cooperate
- with klogd/syslogd or the X server. You should normally N here,
- unless you want to debug such a crash.
-
config 16KSTACKS
bool "Use 16Kb for kernel stacks instead of 8Kb"
help
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 067551b6920a..9917a45fc430 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -99,7 +99,7 @@ static inline void atomic_##op(int i, atomic_t *v) \
atomic_ops_unlock(flags); \
}
-#define ATOMIC_OP_RETURN(op, c_op) \
+#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned long flags; \
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 4dc64ddebece..05b5aaf5b0f9 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
+ pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
@@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
ret = -ENOSYS;
}
- pagefault_enable(); /* subsumes preempt_enable() */
+ pagefault_enable();
if (!ret) {
switch (cmp) {
@@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
return ret;
}
-/* Compare-xchg with preemption disabled.
+/* Compare-xchg with pagefaults disabled.
* Notes:
* -Best-Effort: Exchg happens only if compare succeeds.
* If compare fails, returns; leaving retry/looping to upper layers
@@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
+ pagefault_disable();
/* TBD : can use llock/scond */
__asm__ __volatile__(
@@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
: "cc", "memory");
- pagefault_enable(); /* subsumes preempt_enable() */
+ pagefault_enable();
*uval = val;
return val;
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 8c3a3e02ba92..12b2100db073 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -266,7 +266,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
* Machine specific helpers for Entire D-Cache or Per Line ops
*/
-static unsigned int __before_dc_op(const int op)
+static inline unsigned int __before_dc_op(const int op)
{
unsigned int reg = reg;
@@ -284,7 +284,7 @@ static unsigned int __before_dc_op(const int op)
return reg;
}
-static void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op, unsigned int reg)
{
if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 6a2e006cbcce..d948e4e9d89c 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 86217db2937a..992736b5229b 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
imx25-karo-tx25.dtb \
imx25-pdk.dtb
-dtb-$(CONFIG_SOC_IMX31) += \
+dtb-$(CONFIG_SOC_IMX27) += \
imx27-apf27.dtb \
imx27-apf27dev.dtb \
imx27-eukrea-mbimxsd27-baseboard.dtb \
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index c3255e0c90aa..dbb3f4d2bf84 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -223,6 +223,25 @@
/include/ "tps65217.dtsi"
&tps {
+ /*
+ * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
+ * mode") at poweroff. Most BeagleBone versions do not support RTC-only
+ * mode and risk hardware damage if this mode is entered.
+ *
+ * For details, see linux-omap mailing list May 2015 thread
+ * [PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
+ * In particular, messages:
+ * http://www.spinics.net/lists/linux-omap/msg118585.html
+ * http://www.spinics.net/lists/linux-omap/msg118615.html
+ *
+ * You can override this later with
+ * &tps { /delete-property/ ti,pmic-shutdown-controller; }
+ * if you want to use RTC-only mode and made sure you are not affected
+ * by the hardware problems. (Tip: double-check by performing a current
+ * measurement after shutdown: it should be less than 1 mA.)
+ */
+ ti,pmic-shutdown-controller;
+
regulators {
dcdc1_reg: regulator@0 {
regulator-name = "vdds_dpr";
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 5c42d259fa68..901739fcb85a 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -80,7 +80,3 @@
status = "okay";
};
};
-
-&rtc {
- system-power-controller;
-};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 87fc7a35e802..156d05efcb70 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -654,7 +654,7 @@
wlcore: wlcore@2 {
compatible = "ti,wl1271";
reg = <2>;
- interrupt-parent = <&gpio1>;
+ interrupt-parent = <&gpio0>;
interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
ref-clock-frequency = <38400000>;
};
diff --git a/arch/arm/boot/dts/am35xx-clocks.dtsi b/arch/arm/boot/dts/am35xx-clocks.dtsi
index 518b8fde88b0..18cc826e9db5 100644
--- a/arch/arm/boot/dts/am35xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am35xx-clocks.dtsi
@@ -12,7 +12,7 @@
#clock-cells = <0>;
compatible = "ti,am35xx-gate-clock";
clocks = <&ipss_ick>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <1>;
};
@@ -20,7 +20,7 @@
#clock-cells = <0>;
compatible = "ti,gate-clock";
clocks = <&rmii_ck>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <9>;
};
@@ -28,7 +28,7 @@
#clock-cells = <0>;
compatible = "ti,am35xx-gate-clock";
clocks = <&ipss_ick>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <2>;
};
@@ -36,7 +36,7 @@
#clock-cells = <0>;
compatible = "ti,gate-clock";
clocks = <&pclk_ck>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <10>;
};
@@ -44,7 +44,7 @@
#clock-cells = <0>;
compatible = "ti,am35xx-gate-clock";
clocks = <&ipss_ick>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <0>;
};
@@ -52,7 +52,7 @@
#clock-cells = <0>;
compatible = "ti,gate-clock";
clocks = <&sys_ck>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <8>;
};
@@ -60,7 +60,7 @@
#clock-cells = <0>;
compatible = "ti,am35xx-gate-clock";
clocks = <&sys_ck>;
- reg = <0x059c>;
+ reg = <0x032c>;
ti,bit-shift = <3>;
};
};
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 8ae29c955c11..c17097d2c167 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -49,7 +49,7 @@
pinctrl-0 = <&matrix_keypad_pins>;
debounce-delay-ms = <5>;
- col-scan-delay-us = <1500>;
+ col-scan-delay-us = <5>;
row-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH /* Bank5, pin5 */
&gpio5 6 GPIO_ACTIVE_HIGH>; /* Bank5, pin6 */
@@ -473,7 +473,7 @@
interrupt-parent = <&gpio0>;
interrupts = <31 0>;
- wake-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio1 28 GPIO_ACTIVE_LOW>;
touchscreen-size-x = <480>;
touchscreen-size-y = <272>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 15f198e4864d..7128fad991ac 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -18,6 +18,7 @@
aliases {
rtc0 = &mcp_rtc;
rtc1 = &tps659038_rtc;
+ rtc2 = &rtc;
};
memory {
@@ -83,7 +84,7 @@
gpio_fan: gpio_fan {
/* Based on 5v 500mA AFB02505HHB */
compatible = "gpio-fan";
- gpios = <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>;
+ gpios = <&tps659038_gpio 2 GPIO_ACTIVE_HIGH>;
gpio-fan,speed-map = <0 0>,
<13000 1>;
#cooling-cells = <2>;
@@ -130,8 +131,8 @@
uart3_pins_default: uart3_pins_default {
pinctrl-single,pins = <
- 0x248 (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd.rxd */
- 0x24c (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd.txd */
+ 0x3f8 (PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */
+ 0x3fc (PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */
>;
};
@@ -455,7 +456,7 @@
mcp_rtc: rtc@6f {
compatible = "microchip,mcp7941x";
reg = <0x6f>;
- interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>; /* IRQ_SYS_1N */
+ interrupts = <GIC_SPI 2 IRQ_TYPE_EDGE_RISING>; /* IRQ_SYS_1N */
pinctrl-names = "default";
pinctrl-0 = <&mcp79410_pins_default>;
@@ -478,7 +479,7 @@
&uart3 {
status = "okay";
interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
- <&dra7_pmx_core 0x248>;
+ <&dra7_pmx_core 0x3f8>;
pinctrl-names = "default";
pinctrl-0 = <&uart3_pins_default>;
diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index c675257f2377..f076ff856d8b 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -69,7 +69,7 @@
mainpll: mainpll {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <2000000000>;
+ clock-frequency = <1000000000>;
};
/* 25 MHz reference crystal */
refclk: oscillator {
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index ed2dd8ba4080..218a2acd36e5 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -585,7 +585,7 @@
mainpll: mainpll {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <2000000000>;
+ clock-frequency = <1000000000>;
};
/* 25 MHz reference crystal */
diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi
index 0e85fc15ceda..ecd1318109ba 100644
--- a/arch/arm/boot/dts/armada-39x.dtsi
+++ b/arch/arm/boot/dts/armada-39x.dtsi
@@ -502,7 +502,7 @@
mainpll: mainpll {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <2000000000>;
+ clock-frequency = <1000000000>;
};
};
};
diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index a2cf2154dcdb..fdd187c55aa5 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
@@ -95,6 +95,11 @@
internal-regs {
+ rtc@10300 {
+ /* No crystal connected to the internal RTC */
+ status = "disabled";
+ };
+
/* J10: VCC, NC, RX, NC, TX, GND */
serial@12000 {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index e3b08fb959e5..990e8a2100f0 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -105,6 +105,10 @@
};
internal-regs {
+ rtc@10300 {
+ /* No crystal connected to the internal RTC */
+ status = "disabled";
+ };
serial@12000 {
status = "okay";
};
diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index de8427be830a..289806adb343 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -382,7 +382,7 @@
ti,hwmods = "usb_otg_hs";
usb0: usb@47401000 {
- compatible = "ti,musb-am33xx";
+ compatible = "ti,musb-dm816";
reg = <0x47401400 0x400
0x47401000 0x200>;
reg-names = "mc", "control";
@@ -422,7 +422,7 @@
};
usb1: usb@47401800 {
- compatible = "ti,musb-am33xx";
+ compatible = "ti,musb-dm816";
reg = <0x47401c00 0x400
0x47401800 0x200>;
reg-names = "mc", "control";
diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts
index aae7efc09b0b..e6fa251e17b9 100644
--- a/arch/arm/boot/dts/dove-cubox.dts
+++ b/arch/arm/boot/dts/dove-cubox.dts
@@ -87,6 +87,7 @@
/* connect xtal input to 25MHz reference */
clocks = <&ref25>;
+ clock-names = "xtal";
/* connect xtal input as source of pll0 and pll1 */
silabs,pll-source = <0 0>, <1 0>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 5332b57b4950..f03a091cd076 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -911,7 +911,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07ddc 0x4>, <0x4ae07de0 0x4>,
- <0x4ae06014 0x4>, <0x4a003b20 0x8>,
+ <0x4ae06014 0x4>, <0x4a003b20 0xc>,
<0x4ae0c158 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -944,7 +944,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07e34 0x4>, <0x4ae07e24 0x4>,
- <0x4ae06010 0x4>, <0x4a0025cc 0x8>,
+ <0x4ae06010 0x4>, <0x4a0025cc 0xc>,
<0x4a002470 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -977,7 +977,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07e30 0x4>, <0x4ae07e20 0x4>,
- <0x4ae06010 0x4>, <0x4a0025e0 0x8>,
+ <0x4ae06010 0x4>, <0x4a0025e0 0xc>,
<0x4a00246c 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -1010,7 +1010,7 @@
ti,clock-cycles = <16>;
reg = <0x4ae07de4 0x4>, <0x4ae07de8 0x4>,
- <0x4ae06010 0x4>, <0x4a003b08 0x8>,
+ <0x4ae06010 0x4>, <0x4a003b08 0xc>,
<0x4ae0c154 0x4>;
reg-names = "setup-address", "control-address",
"int-address", "efuse-address",
@@ -1203,7 +1203,7 @@
status = "disabled";
};
- rtc@48838000 {
+ rtc: rtc@48838000 {
compatible = "ti,am3352-rtc";
reg = <0x48838000 0x100>;
interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index 8de12af7c276..d6b49e5b32e9 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -9,6 +9,7 @@
#include <dt-bindings/sound/samsung-i2s.h>
#include <dt-bindings/input/input.h>
+#include <dt-bindings/clock/maxim,max77686.h>
#include "exynos4412.dtsi"
/ {
@@ -105,6 +106,8 @@
rtc@10070000 {
status = "okay";
+ clocks = <&clock CLK_RTC>, <&max77686 MAX77686_CLK_AP>;
+ clock-names = "rtc", "rtc_src";
};
g2d@10800000 {
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 173ffa479ad3..792394dd0f2a 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -736,7 +736,7 @@
display-timings {
timing-0 {
- clock-frequency = <0>;
+ clock-frequency = <57153600>;
hactive = <720>;
vactive = <1280>;
hfront-porch = <5>;
diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts
index 2657e842e5a5..1eca97ee4bd6 100644
--- a/arch/arm/boot/dts/exynos5250-snow.dts
+++ b/arch/arm/boot/dts/exynos5250-snow.dts
@@ -567,6 +567,7 @@
num-slots = <1>;
broken-cd;
cap-sdio-irq;
+ keep-power-in-suspend;
card-detect-delay = <200>;
samsung,dw-mshc-ciu-div = <3>;
samsung,dw-mshc-sdr-timing = <2 3>;
diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts
index 0788d08fb43e..146e71118a72 100644
--- a/arch/arm/boot/dts/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts
@@ -711,6 +711,7 @@
num-slots = <1>;
broken-cd;
cap-sdio-irq;
+ keep-power-in-suspend;
card-detect-delay = <200>;
clock-frequency = <400000000>;
samsung,dw-mshc-ciu-div = <1>;
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
index 5d31fc140823..2180a0152c9b 100644
--- a/arch/arm/boot/dts/exynos5420-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
@@ -28,7 +28,7 @@ trips {
type = "active";
};
cpu-crit-0 {
- temperature = <1200000>; /* millicelsius */
+ temperature = <120000>; /* millicelsius */
hysteresis = <0>; /* millicelsius */
type = "critical";
};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index f67b23f303c3..45317538bbae 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -536,6 +536,7 @@
clock-names = "dp";
phys = <&dp_phy>;
phy-names = "dp";
+ power-domains = <&disp_pd>;
};
mipi_phy: video-phy@10040714 {
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
index 48adfa8f4300..356e963edf11 100644
--- a/arch/arm/boot/dts/exynos5440-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
@@ -18,7 +18,7 @@ trips {
type = "active";
};
cpu-crit-0 {
- temperature = <1050000>; /* millicelsius */
+ temperature = <105000>; /* millicelsius */
hysteresis = <0>; /* millicelsius */
type = "critical";
};
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 412f41d62686..02eb8b15374f 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -674,6 +674,7 @@
num-slots = <1>;
broken-cd;
cap-sdio-irq;
+ keep-power-in-suspend;
card-detect-delay = <200>;
clock-frequency = <400000000>;
samsung,dw-mshc-ciu-div = <1>;
diff --git a/arch/arm/boot/dts/imx23-olinuxino.dts b/arch/arm/boot/dts/imx23-olinuxino.dts
index 7e6eef2488e8..82045398bf1f 100644
--- a/arch/arm/boot/dts/imx23-olinuxino.dts
+++ b/arch/arm/boot/dts/imx23-olinuxino.dts
@@ -12,6 +12,7 @@
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
#include "imx23.dtsi"
/ {
@@ -93,6 +94,7 @@
ahb@80080000 {
usb0: usb@80080000 {
+ dr_mode = "host";
vbus-supply = <&reg_usb0_vbus>;
status = "okay";
};
@@ -122,7 +124,7 @@
user {
label = "green";
- gpios = <&gpio2 1 1>;
+ gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
};
};
};
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index e4d3aecc4ed2..677f81d9dcd5 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -428,6 +428,7 @@
pwm4: pwm@53fc8000 {
compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
+ #pwm-cells = <2>;
reg = <0x53fc8000 0x4000>;
clocks = <&clks 108>, <&clks 52>;
clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index 6951b66d1ab7..bc215e4b75fd 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -533,7 +533,7 @@
fec: ethernet@1002b000 {
compatible = "fsl,imx27-fec";
- reg = <0x1002b000 0x4000>;
+ reg = <0x1002b000 0x1000>;
interrupts = <50>;
clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
<&clks IMX27_CLK_FEC_AHB_GATE>;
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 25e25f82fbae..4e073e854742 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -913,7 +913,7 @@
80 81 68 69
70 71 72 73
74 75 76 77>;
- interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty",
+ interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
"saif0", "saif1", "i2c0", "i2c1",
"auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
"auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
index 19cc269a08d4..1ce6133b67f5 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
@@ -31,6 +31,7 @@
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
gpio = <&gpio4 15 0>;
+ enable-active-high;
};
reg_usb_h1_vbus: regulator@1 {
@@ -40,6 +41,7 @@
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
gpio = <&gpio1 0 0>;
+ enable-active-high;
};
};
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 46b2fed7c319..3b24b12651b2 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -185,7 +185,6 @@
&i2c3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c3>;
- pinctrl-assert-gpios = <&gpio5 4 GPIO_ACTIVE_HIGH>;
status = "okay";
max7310_a: gpio@30 {
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index 134d3f27a8ec..921de6605f07 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -110,6 +110,8 @@
nand@0,0 {
reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
nand-bus-width = <16>;
+ gpmc,device-width = <2>;
+ ti,nand-ecc-opt = "sw";
gpmc,sync-clk-ps = <0>;
gpmc,cs-on-ns = <0>;
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index a29315833ecd..5f5e0f3d5b64 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -498,6 +498,8 @@
DRVDD-supply = <&vmmc2>;
IOVDD-supply = <&vio>;
DVDD-supply = <&vio>;
+
+ ai3x-micbias-vg = <1>;
};
tlv320aic3x_aux: tlv320aic3x@19 {
@@ -509,6 +511,8 @@
DRVDD-supply = <&vmmc2>;
IOVDD-supply = <&vio>;
DVDD-supply = <&vio>;
+
+ ai3x-micbias-vg = <2>;
};
tsl2563: tsl2563@29 {
@@ -828,8 +832,8 @@
touchscreen-fuzz-x = <4>;
touchscreen-fuzz-y = <7>;
touchscreen-fuzz-pressure = <2>;
- touchscreen-max-x = <4096>;
- touchscreen-max-y = <4096>;
+ touchscreen-size-x = <4096>;
+ touchscreen-size-y = <4096>;
touchscreen-max-pressure = <2048>;
ti,x-plate-ohms = <280>;
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index d18a90f5eca3..69a40cfc1f29 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -456,6 +456,7 @@
};
mmu_isp: mmu@480bd400 {
+ #iommu-cells = <0>;
compatible = "ti,omap2-iommu";
reg = <0x480bd400 0x80>;
interrupts = <24>;
@@ -464,6 +465,7 @@
};
mmu_iva: mmu@5d000000 {
+ #iommu-cells = <0>;
compatible = "ti,omap2-iommu";
reg = <0x5d000000 0x80>;
interrupts = <28>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index efe5f737f39b..7d24ae0306b5 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -128,7 +128,7 @@
* hierarchy.
*/
ocp {
- compatible = "ti,omap4-l3-noc", "simple-bus";
+ compatible = "ti,omap5-l3-noc", "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
ranges;
diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts
index 74c3212f1f11..824ddab9c3ad 100644
--- a/arch/arm/boot/dts/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/r8a7791-koelsch.dts
@@ -545,7 +545,7 @@
compatible = "adi,adv7511w";
reg = <0x39>;
interrupt-parent = <&gpio3>;
- interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+ interrupts = <29 IRQ_TYPE_LEVEL_LOW>;
adi,input-depth = <8>;
adi,input-colorspace = "rgb";
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index bfd3f1c734b8..2201cd5da3bb 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -1017,23 +1017,6 @@
status = "disabled";
};
- vmmci: regulator-gpio {
- compatible = "regulator-gpio";
-
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <2900000>;
- regulator-name = "mmci-reg";
- regulator-type = "voltage";
-
- startup-delay-us = <100>;
- enable-active-high;
-
- states = <1800000 0x1
- 2900000 0x0>;
-
- status = "disabled";
- };
-
mcde@a0350000 {
compatible = "stericsson,mcde";
reg = <0xa0350000 0x1000>, /* MCDE */
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index bf8f0eddc2c0..744c1e3a744d 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -111,6 +111,21 @@
pinctrl-1 = <&i2c3_sleep_mode>;
};
+ vmmci: regulator-gpio {
+ compatible = "regulator-gpio";
+
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2900000>;
+ regulator-name = "mmci-reg";
+ regulator-type = "voltage";
+
+ startup-delay-us = <100>;
+ enable-active-high;
+
+ states = <1800000 0x1
+ 2900000 0x0>;
+ };
+
// External Micro SD slot
sdi0_per1@80126000 {
arm,primecell-periphid = <0x10480180>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 206826a855c0..1bc84ebdccaa 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -146,8 +146,21 @@
};
vmmci: regulator-gpio {
+ compatible = "regulator-gpio";
+
gpios = <&gpio7 4 0x4>;
enable-gpio = <&gpio6 25 0x4>;
+
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2900000>;
+ regulator-name = "mmci-reg";
+ regulator-type = "voltage";
+
+ startup-delay-us = <100>;
+ enable-active-high;
+
+ states = <1800000 0x1
+ 2900000 0x0>;
};
// External Micro SD slot
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index cf01c818b8ea..13cc7ca5e031 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -826,7 +826,7 @@
<&tegra_car TEGRA124_CLK_PLL_U>,
<&tegra_car TEGRA124_CLK_USBD>;
clock-names = "reg", "pll_u", "utmi-pads";
- resets = <&tegra_car 59>, <&tegra_car 22>;
+ resets = <&tegra_car 22>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
nvidia,hssync-start-delay = <0>;
nvidia,idle-wait-delay = <17>;
@@ -838,6 +838,7 @@
nvidia,hssquelch-level = <2>;
nvidia,hsdiscon-level = <5>;
nvidia,xcvr-hsslew = <12>;
+ nvidia,has-utmi-pad-registers;
status = "disabled";
};
@@ -862,7 +863,7 @@
<&tegra_car TEGRA124_CLK_PLL_U>,
<&tegra_car TEGRA124_CLK_USBD>;
clock-names = "reg", "pll_u", "utmi-pads";
- resets = <&tegra_car 22>, <&tegra_car 22>;
+ resets = <&tegra_car 58>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
nvidia,hssync-start-delay = <0>;
nvidia,idle-wait-delay = <17>;
@@ -874,7 +875,6 @@
nvidia,hssquelch-level = <2>;
nvidia,hsdiscon-level = <5>;
nvidia,xcvr-hsslew = <12>;
- nvidia,has-utmi-pad-registers;
status = "disabled";
};
@@ -899,7 +899,7 @@
<&tegra_car TEGRA124_CLK_PLL_U>,
<&tegra_car TEGRA124_CLK_USBD>;
clock-names = "reg", "pll_u", "utmi-pads";
- resets = <&tegra_car 58>, <&tegra_car 22>;
+ resets = <&tegra_car 59>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
nvidia,hssync-start-delay = <0>;
nvidia,idle-wait-delay = <17>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 7a2aeacd62c0..107395c32d82 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -191,6 +191,7 @@
compatible = "arm,cortex-a15-pmu";
interrupts = <0 68 4>,
<0 69 4>;
+ interrupt-affinity = <&cpu0>, <&cpu1>;
};
oscclk6a: oscclk6a {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
index 23662b5a5e9d..d949facba376 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
@@ -33,28 +33,28 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@0 {
+ A9_0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0>;
next-level-cache = <&L2>;
};
- cpu@1 {
+ A9_1: cpu@1 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <1>;
next-level-cache = <&L2>;
};
- cpu@2 {
+ A9_2: cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <2>;
next-level-cache = <&L2>;
};
- cpu@3 {
+ A9_3: cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <3>;
@@ -170,6 +170,7 @@
compatible = "arm,pl310-cache";
reg = <0x1e00a000 0x1000>;
interrupts = <0 43 4>;
+ cache-unified;
cache-level = <2>;
arm,data-latency = <1 1 1>;
arm,tag-latency = <1 1 1>;
@@ -181,6 +182,8 @@
<0 61 4>,
<0 62 4>,
<0 63 4>;
+ interrupt-affinity = <&A9_0>, <&A9_1>, <&A9_2>, <&A9_3>;
+
};
dcc {
diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
index a5cd2eda3edf..9ea54b3dba09 100644
--- a/arch/arm/boot/dts/zynq-7000.dtsi
+++ b/arch/arm/boot/dts/zynq-7000.dtsi
@@ -193,7 +193,7 @@
};
gem0: ethernet@e000b000 {
- compatible = "cdns,gem";
+ compatible = "cdns,zynq-gem";
reg = <0xe000b000 0x1000>;
status = "disabled";
interrupts = <0 22 4>;
@@ -204,7 +204,7 @@
};
gem1: ethernet@e000c000 {
- compatible = "cdns,gem";
+ compatible = "cdns,zynq-gem";
reg = <0xe000c000 0x1000>;
status = "disabled";
interrupts = <0 45 4>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ab86655c1f4b..fbbb1915c6a9 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -39,11 +39,14 @@ CONFIG_ARCH_HIP04=y
CONFIG_ARCH_KEYSTONE=y
CONFIG_ARCH_MESON=y
CONFIG_ARCH_MXC=y
+CONFIG_SOC_IMX50=y
CONFIG_SOC_IMX51=y
CONFIG_SOC_IMX53=y
CONFIG_SOC_IMX6Q=y
CONFIG_SOC_IMX6SL=y
+CONFIG_SOC_IMX6SX=y
CONFIG_SOC_VF610=y
+CONFIG_SOC_LS1021A=y
CONFIG_ARCH_OMAP3=y
CONFIG_ARCH_OMAP4=y
CONFIG_SOC_OMAP5=y
@@ -426,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
CONFIG_USB_EHCI_TEGRA=y
CONFIG_USB_EHCI_HCD_STI=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_STI=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 9ff7b54b2a83..3743ca221d40 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -393,7 +393,7 @@ CONFIG_TI_EDMA=y
CONFIG_DMA_OMAP=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXTCON=m
-CONFIG_EXTCON_GPIO=m
+CONFIG_EXTCON_USB_GPIO=m
CONFIG_EXTCON_PALMAS=m
CONFIG_TI_EMIF=m
CONFIG_PWM=y
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..6c2327e1c732 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
index 8e3fcb924db6..2ef282f96651 100644
--- a/arch/arm/include/asm/dma-iommu.h
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -25,7 +25,7 @@ struct dma_iommu_mapping {
};
struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size);
void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 4e78065a16aa..5eed82809d82 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
+ preempt_disable();
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
"1: " TUSER(ldr) " %1, [%4]\n"
" teq %1, %2\n"
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = val;
+ preempt_enable();
+
return ret;
}
@@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
+#ifndef CONFIG_SMP
+ preempt_disable();
+#endif
+ pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
@@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
ret = -ENOSYS;
}
- pagefault_enable(); /* subsumes preempt_enable() */
+ pagefault_enable();
+#ifndef CONFIG_SMP
+ preempt_enable();
+#endif
if (!ret) {
switch (cmp) {
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 2fe85fff5cca..370f7a732900 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 2f7e6ff67d51..0b579b2f4e0e 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21fa032..4e7f40c577e6 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -33,7 +33,9 @@ ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq @ disable interrupts
- ldr r1, [tsk, #TI_FLAGS]
+ ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
+ tst r1, #_TIF_SYSCALL_WORK
+ bne __sys_trace_return
tst r1, #_TIF_WORK_MASK
bne fast_work_pending
asm_trace_hardirqs_on
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 91c7ba182dcd..3b8c2833c537 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -303,9 +303,15 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int of_pmu_irq_cfg(struct platform_device *pdev)
{
- int i;
- int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+ int i, irq;
+ int *irqs;
+ /* Don't bother with PPIs; they're already affine */
+ irq = platform_get_irq(pdev, 0);
+ if (irq >= 0 && irq_is_percpu(irq))
+ return 0;
+
+ irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
if (!irqs)
return -ENOMEM;
@@ -317,7 +323,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev)
i);
if (!dn) {
pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
- of_node_full_name(dn), i);
+ of_node_full_name(pdev->dev.of_node), i);
break;
}
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index acd5b560b728..5f5cd562c593 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -159,6 +159,8 @@ extern void exynos_enter_aftr(void);
extern struct cpuidle_exynos_data cpuidle_coupled_exynos_data;
+extern void exynos_set_delayed_reset_assertion(bool enable);
+
extern void s5p_init_cpu(void __iomem *cpuid_addr);
extern unsigned int samsung_rev(void);
extern void __iomem *cpu_boot_reg_base(void);
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index bcde0dd668df..5917a30eee33 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -167,6 +167,33 @@ static void __init exynos_init_io(void)
}
/*
+ * Set or clear the USE_DELAYED_RESET_ASSERTION option. Used by smp code
+ * and suspend.
+ *
+ * This is necessary only on Exynos4 SoCs. When system is running
+ * USE_DELAYED_RESET_ASSERTION should be set so the ARM CLK clock down
+ * feature could properly detect global idle state when secondary CPU is
+ * powered down.
+ *
+ * However this should not be set when such system is going into suspend.
+ */
+void exynos_set_delayed_reset_assertion(bool enable)
+{
+ if (of_machine_is_compatible("samsung,exynos4")) {
+ unsigned int tmp, core_id;
+
+ for (core_id = 0; core_id < num_possible_cpus(); core_id++) {
+ tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
+ if (enable)
+ tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
+ else
+ tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
+ pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
+ }
+ }
+}
+
+/*
* Apparently, these SoCs are not able to wake-up from suspend using
* the PMU. Too bad. Should they suddenly become capable of such a
* feat, the matches below should be moved to suspend.c.
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index ebd135bb0995..a825bca2a2b6 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -34,30 +34,6 @@
extern void exynos4_secondary_startup(void);
-/*
- * Set or clear the USE_DELAYED_RESET_ASSERTION option, set on Exynos4 SoCs
- * during hot-(un)plugging CPUx.
- *
- * The feature can be cleared safely during first boot of secondary CPU.
- *
- * Exynos4 SoCs require setting USE_DELAYED_RESET_ASSERTION during powering
- * down a CPU so the CPU idle clock down feature could properly detect global
- * idle state when CPUx is off.
- */
-static void exynos_set_delayed_reset_assertion(u32 core_id, bool enable)
-{
- if (soc_is_exynos4()) {
- unsigned int tmp;
-
- tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
- if (enable)
- tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
- else
- tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
- pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
- }
-}
-
#ifdef CONFIG_HOTPLUG_CPU
static inline void cpu_leave_lowpower(u32 core_id)
{
@@ -73,8 +49,6 @@ static inline void cpu_leave_lowpower(u32 core_id)
: "=&r" (v)
: "Ir" (CR_C), "Ir" (0x40)
: "cc");
-
- exynos_set_delayed_reset_assertion(core_id, false);
}
static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
@@ -87,14 +61,6 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
/* Turn the CPU off on next WFI instruction. */
exynos_cpu_power_down(core_id);
- /*
- * Exynos4 SoCs require setting
- * USE_DELAYED_RESET_ASSERTION so the CPU idle
- * clock down feature could properly detect
- * global idle state when CPUx is off.
- */
- exynos_set_delayed_reset_assertion(core_id, true);
-
wfi();
if (pen_release == core_id) {
@@ -371,9 +337,6 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
udelay(10);
}
- /* No harm if this is called during first boot of secondary CPU */
- exynos_set_delayed_reset_assertion(core_id, false);
-
/*
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
@@ -420,6 +383,8 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
exynos_sysram_init();
+ exynos_set_delayed_reset_assertion(true);
+
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(scu_base_addr());
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index cbe56b35aea0..a9686535f9ed 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -188,7 +188,7 @@ no_clk:
args.np = np;
args.args_count = 0;
child_domain = of_genpd_get_from_provider(&args);
- if (!child_domain)
+ if (IS_ERR(child_domain))
continue;
if (of_parse_phandle_with_args(np, "power-domains",
@@ -196,7 +196,7 @@ no_clk:
continue;
parent_domain = of_genpd_get_from_provider(&args);
- if (!parent_domain)
+ if (IS_ERR(parent_domain))
continue;
if (pm_genpd_add_subdomain(parent_domain, child_domain))
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 3e6aea7f83af..7d23ce04cad5 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3;
static u32 exynos_irqwake_intmask = 0xffffffff;
static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
- { 105, BIT(1) }, /* RTC alarm */
- { 106, BIT(2) }, /* RTC tick */
+ { 73, BIT(1) }, /* RTC alarm */
+ { 74, BIT(2) }, /* RTC tick */
{ /* sentinel */ },
};
@@ -342,6 +342,8 @@ static void exynos_pm_enter_sleep_mode(void)
static void exynos_pm_prepare(void)
{
+ exynos_set_delayed_reset_assertion(false);
+
/* Set wake-up mask registers */
exynos_pm_set_wakeup_mask();
@@ -482,6 +484,7 @@ early_wakeup:
/* Clear SLEEP mode set in INFORM1 */
pmu_raw_writel(0x0, S5P_INFORM1);
+ exynos_set_delayed_reset_assertion(true);
}
static void exynos3250_pm_resume(void)
@@ -723,8 +726,10 @@ void __init exynos_pm_init(void)
return;
}
- if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL)))
+ if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+ return;
+ }
pm_data = (const struct exynos_pm_data *) match->data;
diff --git a/arch/arm/mach-gemini/common.h b/arch/arm/mach-gemini/common.h
index 38a45260a7c8..dd883698ff7e 100644
--- a/arch/arm/mach-gemini/common.h
+++ b/arch/arm/mach-gemini/common.h
@@ -12,6 +12,8 @@
#ifndef __GEMINI_COMMON_H__
#define __GEMINI_COMMON_H__
+#include <linux/reboot.h>
+
struct mtd_partition;
extern void gemini_map_io(void);
@@ -26,6 +28,6 @@ extern int platform_register_pflash(unsigned int size,
struct mtd_partition *parts,
unsigned int nr_parts);
-extern void gemini_restart(char mode, const char *cmd);
+extern void gemini_restart(enum reboot_mode mode, const char *cmd);
#endif /* __GEMINI_COMMON_H__ */
diff --git a/arch/arm/mach-gemini/reset.c b/arch/arm/mach-gemini/reset.c
index b26659759e27..21a6d6d4f9c4 100644
--- a/arch/arm/mach-gemini/reset.c
+++ b/arch/arm/mach-gemini/reset.c
@@ -14,7 +14,9 @@
#include <mach/hardware.h>
#include <mach/global_reg.h>
-void gemini_restart(char mode, const char *cmd)
+#include "common.h"
+
+void gemini_restart(enum reboot_mode mode, const char *cmd)
{
__raw_writel(RESET_GLOBAL | RESET_CPU1,
IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
diff --git a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
index fb8d4a2ad48c..a5edd7d60266 100644
--- a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
+++ b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010 Pengutronix, Wolfram Sang <w.sang@pengutronix.de>
+ * Copyright (C) 2010 Pengutronix, Wolfram Sang <kernel@pengutronix.de>
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 4d60005e9277..6d0893a3828e 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
- if (WARN_ON(!np ||
- !of_find_property(np, "interrupt-controller", NULL)))
- pr_warn("Outdated DT detected, system is about to crash!!!\n");
+ if (WARN_ON(!np))
+ return;
+
+ if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
+ pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+ /* map GPC, so that at least CPUidle and WARs keep working */
+ gpc_base = of_iomap(np, 0);
+ }
}
#ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
struct regulator *pu_reg;
int ret;
+ /* bail out if DT too old and doesn't provide the necessary info */
+ if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
+ return 0;
+
pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
if (PTR_ERR(pu_reg) == -ENODEV)
pu_reg = NULL;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 355b08936871..752969ff9de0 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -171,6 +171,12 @@
*/
#define LINKS_PER_OCP_IF 2
+/*
+ * Address offset (in bytes) between the reset control and the reset
+ * status registers: 4 bytes on OMAP4
+ */
+#define OMAP4_RST_CTRL_ST_OFFSET 4
+
/**
* struct omap_hwmod_soc_ops - fn ptrs for some SoC-specific operations
* @enable_module: function to enable a module (via MODULEMODE)
@@ -3016,10 +3022,12 @@ static int _omap4_deassert_hardreset(struct omap_hwmod *oh,
if (ohri->st_shift)
pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n",
oh->name, ohri->name);
- return omap_prm_deassert_hardreset(ohri->rst_shift, 0,
+ return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->rst_shift,
oh->clkdm->pwrdm.ptr->prcm_partition,
oh->clkdm->pwrdm.ptr->prcm_offs,
- oh->prcm.omap4.rstctrl_offs, 0);
+ oh->prcm.omap4.rstctrl_offs,
+ oh->prcm.omap4.rstctrl_offs +
+ OMAP4_RST_CTRL_ST_OFFSET);
}
/**
@@ -3048,27 +3056,6 @@ static int _omap4_is_hardreset_asserted(struct omap_hwmod *oh,
}
/**
- * _am33xx_assert_hardreset - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to assert hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_assert_hardreset() with parameters extracted
- * from the hwmod @oh and the hardreset line data @ohri. Only
- * intended for use as an soc_ops function pointer. Passes along the
- * return value from am33xx_prminst_assert_hardreset(). XXX This
- * function is scheduled for removal when the PRM code is moved into
- * drivers/.
- */
-static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
- struct omap_hwmod_rst_info *ohri)
-
-{
- return omap_prm_assert_hardreset(ohri->rst_shift, 0,
- oh->clkdm->pwrdm.ptr->prcm_offs,
- oh->prcm.omap4.rstctrl_offs);
-}
-
-/**
* _am33xx_deassert_hardreset - call AM33XX PRM hardreset fn with hwmod args
* @oh: struct omap_hwmod * to deassert hardreset
* @ohri: hardreset line data
@@ -3083,32 +3070,13 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
static int _am33xx_deassert_hardreset(struct omap_hwmod *oh,
struct omap_hwmod_rst_info *ohri)
{
- return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift, 0,
+ return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift,
+ oh->clkdm->pwrdm.ptr->prcm_partition,
oh->clkdm->pwrdm.ptr->prcm_offs,
oh->prcm.omap4.rstctrl_offs,
oh->prcm.omap4.rstst_offs);
}
-/**
- * _am33xx_is_hardreset_asserted - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to test hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_is_hardreset_asserted() with parameters
- * extracted from the hwmod @oh and the hardreset line data @ohri.
- * Only intended for use as an soc_ops function pointer. Passes along
- * the return value from am33xx_prminst_is_hardreset_asserted(). XXX
- * This function is scheduled for removal when the PRM code is moved
- * into drivers/.
- */
-static int _am33xx_is_hardreset_asserted(struct omap_hwmod *oh,
- struct omap_hwmod_rst_info *ohri)
-{
- return omap_prm_is_hardreset_asserted(ohri->rst_shift, 0,
- oh->clkdm->pwrdm.ptr->prcm_offs,
- oh->prcm.omap4.rstctrl_offs);
-}
-
/* Public functions */
u32 omap_hwmod_read(struct omap_hwmod *oh, u16 reg_offs)
@@ -3908,21 +3876,13 @@ void __init omap_hwmod_init(void)
soc_ops.init_clkdm = _init_clkdm;
soc_ops.update_context_lost = _omap4_update_context_lost;
soc_ops.get_context_lost = _omap4_get_context_lost;
- } else if (soc_is_am43xx()) {
+ } else if (cpu_is_ti816x() || soc_is_am33xx() || soc_is_am43xx()) {
soc_ops.enable_module = _omap4_enable_module;
soc_ops.disable_module = _omap4_disable_module;
soc_ops.wait_target_ready = _omap4_wait_target_ready;
soc_ops.assert_hardreset = _omap4_assert_hardreset;
- soc_ops.deassert_hardreset = _omap4_deassert_hardreset;
- soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
- soc_ops.init_clkdm = _init_clkdm;
- } else if (cpu_is_ti816x() || soc_is_am33xx()) {
- soc_ops.enable_module = _omap4_enable_module;
- soc_ops.disable_module = _omap4_disable_module;
- soc_ops.wait_target_ready = _omap4_wait_target_ready;
- soc_ops.assert_hardreset = _am33xx_assert_hardreset;
soc_ops.deassert_hardreset = _am33xx_deassert_hardreset;
- soc_ops.is_hardreset_asserted = _am33xx_is_hardreset_asserted;
+ soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
soc_ops.init_clkdm = _init_clkdm;
} else {
WARN(1, "omap_hwmod: unknown SoC type\n");
diff --git a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
index e2223148ba4d..17e8004fc20f 100644
--- a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c
@@ -544,6 +544,44 @@ static struct omap_hwmod am43xx_hdq1w_hwmod = {
},
};
+static struct omap_hwmod_class_sysconfig am43xx_vpfe_sysc = {
+ .rev_offs = 0x0,
+ .sysc_offs = 0x104,
+ .sysc_flags = SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE,
+ .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+ MSTANDBY_FORCE | MSTANDBY_SMART | MSTANDBY_NO),
+ .sysc_fields = &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class am43xx_vpfe_hwmod_class = {
+ .name = "vpfe",
+ .sysc = &am43xx_vpfe_sysc,
+};
+
+static struct omap_hwmod am43xx_vpfe0_hwmod = {
+ .name = "vpfe0",
+ .class = &am43xx_vpfe_hwmod_class,
+ .clkdm_name = "l3s_clkdm",
+ .prcm = {
+ .omap4 = {
+ .modulemode = MODULEMODE_SWCTRL,
+ .clkctrl_offs = AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET,
+ },
+ },
+};
+
+static struct omap_hwmod am43xx_vpfe1_hwmod = {
+ .name = "vpfe1",
+ .class = &am43xx_vpfe_hwmod_class,
+ .clkdm_name = "l3s_clkdm",
+ .prcm = {
+ .omap4 = {
+ .modulemode = MODULEMODE_SWCTRL,
+ .clkctrl_offs = AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET,
+ },
+ },
+};
+
/* Interfaces */
static struct omap_hwmod_ocp_if am43xx_l3_main__l4_hs = {
.master = &am33xx_l3_main_hwmod,
@@ -825,6 +863,34 @@ static struct omap_hwmod_ocp_if am43xx_l4_ls__hdq1w = {
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe0 = {
+ .master = &am43xx_vpfe0_hwmod,
+ .slave = &am33xx_l3_main_hwmod,
+ .clk = "l3_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe1 = {
+ .master = &am43xx_vpfe1_hwmod,
+ .slave = &am33xx_l3_main_hwmod,
+ .clk = "l3_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe0 = {
+ .master = &am33xx_l4_ls_hwmod,
+ .slave = &am43xx_vpfe0_hwmod,
+ .clk = "l4ls_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe1 = {
+ .master = &am33xx_l4_ls_hwmod,
+ .slave = &am43xx_vpfe1_hwmod,
+ .clk = "l4ls_gclk",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
&am33xx_l4_wkup__synctimer,
&am43xx_l4_ls__timer8,
@@ -925,6 +991,10 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
&am43xx_l4_ls__dss_dispc,
&am43xx_l4_ls__dss_rfbi,
&am43xx_l4_ls__hdq1w,
+ &am43xx_l3__vpfe0,
+ &am43xx_l3__vpfe1,
+ &am43xx_l4_ls__vpfe0,
+ &am43xx_l4_ls__vpfe1,
NULL,
};
diff --git a/arch/arm/mach-omap2/prcm43xx.h b/arch/arm/mach-omap2/prcm43xx.h
index 48df3b55057e..d0261996db6d 100644
--- a/arch/arm/mach-omap2/prcm43xx.h
+++ b/arch/arm/mach-omap2/prcm43xx.h
@@ -144,5 +144,6 @@
#define AM43XX_CM_PER_USBPHYOCP2SCP1_CLKCTRL_OFFSET 0x05C0
#define AM43XX_CM_PER_DSS_CLKCTRL_OFFSET 0x0a20
#define AM43XX_CM_PER_HDQ1W_CLKCTRL_OFFSET 0x04a0
-
+#define AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET 0x0068
+#define AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET 0x0070
#endif
diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h
index cbefbd7cfdb5..661d753df584 100644
--- a/arch/arm/mach-omap2/prm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-34xx.h
@@ -112,6 +112,7 @@
#define OMAP3430_VC_CMD_ONLP_SHIFT 16
#define OMAP3430_VC_CMD_RET_SHIFT 8
#define OMAP3430_VC_CMD_OFF_SHIFT 0
+#define OMAP3430_SREN_MASK (1 << 4)
#define OMAP3430_HSEN_MASK (1 << 3)
#define OMAP3430_MCODE_MASK (0x7 << 0)
#define OMAP3430_VALID_MASK (1 << 24)
diff --git a/arch/arm/mach-omap2/prm-regbits-44xx.h b/arch/arm/mach-omap2/prm-regbits-44xx.h
index b1c7a33e00e7..e794828dee55 100644
--- a/arch/arm/mach-omap2/prm-regbits-44xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-44xx.h
@@ -35,6 +35,7 @@
#define OMAP4430_GLOBAL_WARM_SW_RST_SHIFT 1
#define OMAP4430_GLOBAL_WUEN_MASK (1 << 16)
#define OMAP4430_HSMCODE_MASK (0x7 << 0)
+#define OMAP4430_SRMODEEN_MASK (1 << 4)
#define OMAP4430_HSMODEEN_MASK (1 << 3)
#define OMAP4430_HSSCLL_SHIFT 24
#define OMAP4430_ICEPICK_RST_SHIFT 9
diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c
index c4859c4d3646..d0b15dbafa2e 100644
--- a/arch/arm/mach-omap2/prminst44xx.c
+++ b/arch/arm/mach-omap2/prminst44xx.c
@@ -87,12 +87,6 @@ u32 omap4_prminst_rmw_inst_reg_bits(u32 mask, u32 bits, u8 part, s16 inst,
return v;
}
-/*
- * Address offset (in bytes) between the reset control and the reset
- * status registers: 4 bytes on OMAP4
- */
-#define OMAP4_RST_CTRL_ST_OFFSET 4
-
/**
* omap4_prminst_is_hardreset_asserted - read the HW reset line state of
* submodules contained in the hwmod module
@@ -141,11 +135,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
* omap4_prminst_deassert_hardreset - deassert a submodule hardreset line and
* wait
* @shift: register bit shift corresponding to the reset line to deassert
- * @st_shift: status bit offset, not used for OMAP4+
+ * @st_shift: status bit offset corresponding to the reset line
* @part: PRM partition
* @inst: PRM instance offset
* @rstctrl_offs: reset register offset
- * @st_offs: reset status register offset, not used for OMAP4+
+ * @rstst_offs: reset status register offset
*
* Some IPs like dsp, ipu or iva contain processors that require an HW
* reset line to be asserted / deasserted in order to fully enable the
@@ -157,11 +151,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
* of reset, or -EBUSY if the submodule did not exit reset promptly.
*/
int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
- u16 rstctrl_offs, u16 st_offs)
+ u16 rstctrl_offs, u16 rstst_offs)
{
int c;
u32 mask = 1 << shift;
- u16 rstst_offs = rstctrl_offs + OMAP4_RST_CTRL_ST_OFFSET;
+ u32 st_mask = 1 << st_shift;
/* Check the current status to avoid de-asserting the line twice */
if (omap4_prminst_is_hardreset_asserted(shift, part, inst,
@@ -169,13 +163,13 @@ int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
return -EEXIST;
/* Clear the reset status by writing 1 to the status bit */
- omap4_prminst_rmw_inst_reg_bits(0xffffffff, mask, part, inst,
+ omap4_prminst_rmw_inst_reg_bits(0xffffffff, st_mask, part, inst,
rstst_offs);
/* de-assert the reset control line */
omap4_prminst_rmw_inst_reg_bits(mask, 0, part, inst, rstctrl_offs);
/* wait the status to be set */
- omap_test_timeout(omap4_prminst_is_hardreset_asserted(shift, part, inst,
- rstst_offs),
+ omap_test_timeout(omap4_prminst_is_hardreset_asserted(st_shift, part,
+ inst, rstst_offs),
MAX_MODULE_HARDRESET_WAIT, c);
return (c == MAX_MODULE_HARDRESET_WAIT) ? -EBUSY : 0;
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index d1dedc8195ed..eafd120b53f1 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -203,23 +203,8 @@ save_context_wfi:
*/
ldr r1, kernel_flush
blx r1
- /*
- * The kernel doesn't interwork: v7_flush_dcache_all in particluar will
- * always return in Thumb state when CONFIG_THUMB2_KERNEL is enabled.
- * This sequence switches back to ARM. Note that .align may insert a
- * nop: bx pc needs to be word-aligned in order to work.
- */
- THUMB( .thumb )
- THUMB( .align )
- THUMB( bx pc )
- THUMB( nop )
- .arm
-
b omap3_do_wfi
-
-/*
- * Local variables
- */
+ENDPROC(omap34xx_cpu_suspend)
omap3_do_wfi_sram_addr:
.word omap3_do_wfi_sram
kernel_flush:
@@ -364,10 +349,7 @@ exit_nonoff_modes:
* ===================================
*/
ldmfd sp!, {r4 - r11, pc} @ restore regs and return
-
-/*
- * Local variables
- */
+ENDPROC(omap3_do_wfi)
sdrc_power:
.word SDRC_POWER_V
cm_idlest1_core:
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index cef67af9e9b8..cac46d852da1 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -298,14 +298,11 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
if (IS_ERR(src))
return PTR_ERR(src);
- if (clk_get_parent(timer->fclk) != src) {
- r = clk_set_parent(timer->fclk, src);
- if (r < 0) {
- pr_warn("%s: %s cannot set source\n", __func__,
- oh->name);
- clk_put(src);
- return r;
- }
+ r = clk_set_parent(timer->fclk, src);
+ if (r < 0) {
+ pr_warn("%s: %s cannot set source\n", __func__, oh->name);
+ clk_put(src);
+ return r;
}
clk_put(src);
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index be9ef834fa81..076fd20d7e5a 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -316,7 +316,8 @@ static void __init omap3_vc_init_pmic_signaling(struct voltagedomain *voltdm)
* idle. And we can also scale voltages to zero for off-idle.
* Note that no actual voltage scaling during off-idle will
* happen unless the board specific twl4030 PMIC scripts are
- * loaded.
+ * loaded. See also omap_vc_i2c_init for comments regarding
+ * erratum i531.
*/
val = voltdm->read(OMAP3_PRM_VOLTCTRL_OFFSET);
if (!(val & OMAP3430_PRM_VOLTCTRL_SEL_OFF)) {
@@ -704,9 +705,16 @@ static void __init omap_vc_i2c_init(struct voltagedomain *voltdm)
return;
}
+ /*
+ * Note that for omap3 OMAP3430_SREN_MASK clears SREN to work around
+ * erratum i531 "Extra Power Consumed When Repeated Start Operation
+ * Mode Is Enabled on I2C Interface Dedicated for Smart Reflex (I2C4)".
+ * Otherwise I2C4 eventually leads into about 23mW extra power being
+ * consumed even during off idle using VMODE.
+ */
i2c_high_speed = voltdm->pmic->i2c_high_speed;
if (i2c_high_speed)
- voltdm->rmw(vc->common->i2c_cfg_hsen_mask,
+ voltdm->rmw(vc->common->i2c_cfg_clear_mask,
vc->common->i2c_cfg_hsen_mask,
vc->common->i2c_cfg_reg);
diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h
index cdbdd78e755e..89b83b7ff3ec 100644
--- a/arch/arm/mach-omap2/vc.h
+++ b/arch/arm/mach-omap2/vc.h
@@ -34,6 +34,7 @@ struct voltagedomain;
* @cmd_ret_shift: RET field shift in PRM_VC_CMD_VAL_* register
* @cmd_off_shift: OFF field shift in PRM_VC_CMD_VAL_* register
* @i2c_cfg_reg: I2C configuration register offset
+ * @i2c_cfg_clear_mask: high-speed mode bit clear mask in I2C config register
* @i2c_cfg_hsen_mask: high-speed mode bit field mask in I2C config register
* @i2c_mcode_mask: MCODE field mask for I2C config register
*
@@ -52,6 +53,7 @@ struct omap_vc_common {
u8 cmd_ret_shift;
u8 cmd_off_shift;
u8 i2c_cfg_reg;
+ u8 i2c_cfg_clear_mask;
u8 i2c_cfg_hsen_mask;
u8 i2c_mcode_mask;
};
diff --git a/arch/arm/mach-omap2/vc3xxx_data.c b/arch/arm/mach-omap2/vc3xxx_data.c
index 75bc4aa22b3a..71d74c9172c1 100644
--- a/arch/arm/mach-omap2/vc3xxx_data.c
+++ b/arch/arm/mach-omap2/vc3xxx_data.c
@@ -40,6 +40,7 @@ static struct omap_vc_common omap3_vc_common = {
.cmd_onlp_shift = OMAP3430_VC_CMD_ONLP_SHIFT,
.cmd_ret_shift = OMAP3430_VC_CMD_RET_SHIFT,
.cmd_off_shift = OMAP3430_VC_CMD_OFF_SHIFT,
+ .i2c_cfg_clear_mask = OMAP3430_SREN_MASK | OMAP3430_HSEN_MASK,
.i2c_cfg_hsen_mask = OMAP3430_HSEN_MASK,
.i2c_cfg_reg = OMAP3_PRM_VC_I2C_CFG_OFFSET,
.i2c_mcode_mask = OMAP3430_MCODE_MASK,
diff --git a/arch/arm/mach-omap2/vc44xx_data.c b/arch/arm/mach-omap2/vc44xx_data.c
index 085e5d6a04fd..2abd5fa8a697 100644
--- a/arch/arm/mach-omap2/vc44xx_data.c
+++ b/arch/arm/mach-omap2/vc44xx_data.c
@@ -42,6 +42,7 @@ static const struct omap_vc_common omap4_vc_common = {
.cmd_ret_shift = OMAP4430_RET_SHIFT,
.cmd_off_shift = OMAP4430_OFF_SHIFT,
.i2c_cfg_reg = OMAP4_PRM_VC_CFG_I2C_MODE_OFFSET,
+ .i2c_cfg_clear_mask = OMAP4430_SRMODEEN_MASK | OMAP4430_HSMODEEN_MASK,
.i2c_cfg_hsen_mask = OMAP4430_HSMODEEN_MASK,
.i2c_mcode_mask = OMAP4430_HSMCODE_MASK,
};
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 8896e71586f5..f09683687963 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -691,4 +691,13 @@ config SHARPSL_PM_MAX1111
config PXA310_ULPI
bool
+config PXA_SYSTEMS_CPLDS
+ tristate "Motherboard cplds"
+ default ARCH_LUBBOCK || MACH_MAINSTONE
+ help
+ This driver supports the Lubbock and Mainstone multifunction chip
+ found on the pxa25x development platform system (Lubbock) and pxa27x
+ development platform system (Mainstone). This IO board supports the
+ interrupts handling, ethernet controller, flash chips, etc ...
+
endif
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index eb0bf7678a99..4087d334ecdf 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -90,4 +90,5 @@ obj-$(CONFIG_MACH_RAUMFELD_CONNECTOR) += raumfeld.o
obj-$(CONFIG_MACH_RAUMFELD_SPEAKER) += raumfeld.o
obj-$(CONFIG_MACH_ZIPIT2) += z2.o
+obj-$(CONFIG_PXA_SYSTEMS_CPLDS) += pxa_cplds_irqs.o
obj-$(CONFIG_TOSA_BT) += tosa-bt.o
diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h
index 958cd6af9384..1eecf794acd2 100644
--- a/arch/arm/mach-pxa/include/mach/lubbock.h
+++ b/arch/arm/mach-pxa/include/mach/lubbock.h
@@ -37,7 +37,9 @@
#define LUB_GP __LUB_REG(LUBBOCK_FPGA_PHYS + 0x100)
/* Board specific IRQs */
-#define LUBBOCK_IRQ(x) (IRQ_BOARD_START + (x))
+#define LUBBOCK_NR_IRQS IRQ_BOARD_START
+
+#define LUBBOCK_IRQ(x) (LUBBOCK_NR_IRQS + (x))
#define LUBBOCK_SD_IRQ LUBBOCK_IRQ(0)
#define LUBBOCK_SA1111_IRQ LUBBOCK_IRQ(1)
#define LUBBOCK_USB_IRQ LUBBOCK_IRQ(2) /* usb connect */
@@ -47,8 +49,7 @@
#define LUBBOCK_USB_DISC_IRQ LUBBOCK_IRQ(6) /* usb disconnect */
#define LUBBOCK_LAST_IRQ LUBBOCK_IRQ(6)
-#define LUBBOCK_SA1111_IRQ_BASE (IRQ_BOARD_START + 16)
-#define LUBBOCK_NR_IRQS (IRQ_BOARD_START + 16 + 55)
+#define LUBBOCK_SA1111_IRQ_BASE (LUBBOCK_NR_IRQS + 32)
#ifndef __ASSEMBLY__
extern void lubbock_set_misc_wr(unsigned int mask, unsigned int set);
diff --git a/arch/arm/mach-pxa/include/mach/mainstone.h b/arch/arm/mach-pxa/include/mach/mainstone.h
index 1bfc4e822a41..e82a7d31104e 100644
--- a/arch/arm/mach-pxa/include/mach/mainstone.h
+++ b/arch/arm/mach-pxa/include/mach/mainstone.h
@@ -120,7 +120,9 @@
#define MST_PCMCIA_PWR_VCC_50 0x4 /* voltage VCC = 5.0V */
/* board specific IRQs */
-#define MAINSTONE_IRQ(x) (IRQ_BOARD_START + (x))
+#define MAINSTONE_NR_IRQS IRQ_BOARD_START
+
+#define MAINSTONE_IRQ(x) (MAINSTONE_NR_IRQS + (x))
#define MAINSTONE_MMC_IRQ MAINSTONE_IRQ(0)
#define MAINSTONE_USIM_IRQ MAINSTONE_IRQ(1)
#define MAINSTONE_USBC_IRQ MAINSTONE_IRQ(2)
@@ -136,6 +138,4 @@
#define MAINSTONE_S1_STSCHG_IRQ MAINSTONE_IRQ(14)
#define MAINSTONE_S1_IRQ MAINSTONE_IRQ(15)
-#define MAINSTONE_NR_IRQS (IRQ_BOARD_START + 16)
-
#endif
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index d8a1be619f21..4ac9ab80d24b 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -12,6 +12,7 @@
* published by the Free Software Foundation.
*/
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -123,84 +124,6 @@ void lubbock_set_misc_wr(unsigned int mask, unsigned int set)
}
EXPORT_SYMBOL(lubbock_set_misc_wr);
-static unsigned long lubbock_irq_enabled;
-
-static void lubbock_mask_irq(struct irq_data *d)
-{
- int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
- LUB_IRQ_MASK_EN = (lubbock_irq_enabled &= ~(1 << lubbock_irq));
-}
-
-static void lubbock_unmask_irq(struct irq_data *d)
-{
- int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
- /* the irq can be acknowledged only if deasserted, so it's done here */
- LUB_IRQ_SET_CLR &= ~(1 << lubbock_irq);
- LUB_IRQ_MASK_EN = (lubbock_irq_enabled |= (1 << lubbock_irq));
-}
-
-static struct irq_chip lubbock_irq_chip = {
- .name = "FPGA",
- .irq_ack = lubbock_mask_irq,
- .irq_mask = lubbock_mask_irq,
- .irq_unmask = lubbock_unmask_irq,
-};
-
-static void lubbock_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
- unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
- do {
- /* clear our parent irq */
- desc->irq_data.chip->irq_ack(&desc->irq_data);
- if (likely(pending)) {
- irq = LUBBOCK_IRQ(0) + __ffs(pending);
- generic_handle_irq(irq);
- }
- pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
- } while (pending);
-}
-
-static void __init lubbock_init_irq(void)
-{
- int irq;
-
- pxa25x_init_irq();
-
- /* setup extra lubbock irqs */
- for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) {
- irq_set_chip_and_handler(irq, &lubbock_irq_chip,
- handle_level_irq);
- set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
- }
-
- irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), lubbock_irq_handler);
- irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void lubbock_irq_resume(void)
-{
- LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-}
-
-static struct syscore_ops lubbock_irq_syscore_ops = {
- .resume = lubbock_irq_resume,
-};
-
-static int __init lubbock_irq_device_init(void)
-{
- if (machine_is_lubbock()) {
- register_syscore_ops(&lubbock_irq_syscore_ops);
- return 0;
- }
- return -ENODEV;
-}
-
-device_initcall(lubbock_irq_device_init);
-
-#endif
-
static int lubbock_udc_is_connected(void)
{
return (LUB_MISC_RD & (1 << 9)) == 0;
@@ -383,11 +306,38 @@ static struct platform_device lubbock_flash_device[2] = {
},
};
+static struct resource lubbock_cplds_resources[] = {
+ [0] = {
+ .start = LUBBOCK_FPGA_PHYS + 0xc0,
+ .end = LUBBOCK_FPGA_PHYS + 0xe0 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = PXA_GPIO_TO_IRQ(0),
+ .end = PXA_GPIO_TO_IRQ(0),
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+ },
+ [2] = {
+ .start = LUBBOCK_IRQ(0),
+ .end = LUBBOCK_IRQ(6),
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device lubbock_cplds_device = {
+ .name = "pxa_cplds_irqs",
+ .id = -1,
+ .resource = &lubbock_cplds_resources[0],
+ .num_resources = 3,
+};
+
+
static struct platform_device *devices[] __initdata = {
&sa1111_device,
&smc91x_device,
&lubbock_flash_device[0],
&lubbock_flash_device[1],
+ &lubbock_cplds_device,
};
static struct pxafb_mode_info sharp_lm8v31_mode = {
@@ -648,7 +598,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)")
/* Maintainer: MontaVista Software Inc. */
.map_io = lubbock_map_io,
.nr_irqs = LUBBOCK_NR_IRQS,
- .init_irq = lubbock_init_irq,
+ .init_irq = pxa25x_init_irq,
.handle_irq = pxa25x_handle_irq,
.init_time = pxa_timer_init,
.init_machine = lubbock_init,
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 78b84c0dfc79..2c0658cf6be2 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -13,6 +13,7 @@
* published by the Free Software Foundation.
*/
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/syscore_ops.h>
@@ -122,92 +123,6 @@ static unsigned long mainstone_pin_config[] = {
GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
};
-static unsigned long mainstone_irq_enabled;
-
-static void mainstone_mask_irq(struct irq_data *d)
-{
- int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
- MST_INTMSKENA = (mainstone_irq_enabled &= ~(1 << mainstone_irq));
-}
-
-static void mainstone_unmask_irq(struct irq_data *d)
-{
- int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
- /* the irq can be acknowledged only if deasserted, so it's done here */
- MST_INTSETCLR &= ~(1 << mainstone_irq);
- MST_INTMSKENA = (mainstone_irq_enabled |= (1 << mainstone_irq));
-}
-
-static struct irq_chip mainstone_irq_chip = {
- .name = "FPGA",
- .irq_ack = mainstone_mask_irq,
- .irq_mask = mainstone_mask_irq,
- .irq_unmask = mainstone_unmask_irq,
-};
-
-static void mainstone_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
- unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled;
- do {
- /* clear useless edge notification */
- desc->irq_data.chip->irq_ack(&desc->irq_data);
- if (likely(pending)) {
- irq = MAINSTONE_IRQ(0) + __ffs(pending);
- generic_handle_irq(irq);
- }
- pending = MST_INTSETCLR & mainstone_irq_enabled;
- } while (pending);
-}
-
-static void __init mainstone_init_irq(void)
-{
- int irq;
-
- pxa27x_init_irq();
-
- /* setup extra Mainstone irqs */
- for(irq = MAINSTONE_IRQ(0); irq <= MAINSTONE_IRQ(15); irq++) {
- irq_set_chip_and_handler(irq, &mainstone_irq_chip,
- handle_level_irq);
- if (irq == MAINSTONE_IRQ(10) || irq == MAINSTONE_IRQ(14))
- set_irq_flags(irq, IRQF_VALID | IRQF_PROBE | IRQF_NOAUTOEN);
- else
- set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
- }
- set_irq_flags(MAINSTONE_IRQ(8), 0);
- set_irq_flags(MAINSTONE_IRQ(12), 0);
-
- MST_INTMSKENA = 0;
- MST_INTSETCLR = 0;
-
- irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), mainstone_irq_handler);
- irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void mainstone_irq_resume(void)
-{
- MST_INTMSKENA = mainstone_irq_enabled;
-}
-
-static struct syscore_ops mainstone_irq_syscore_ops = {
- .resume = mainstone_irq_resume,
-};
-
-static int __init mainstone_irq_device_init(void)
-{
- if (machine_is_mainstone())
- register_syscore_ops(&mainstone_irq_syscore_ops);
-
- return 0;
-}
-
-device_initcall(mainstone_irq_device_init);
-
-#endif
-
-
static struct resource smc91x_resources[] = {
[0] = {
.start = (MST_ETH_PHYS + 0x300),
@@ -487,11 +402,37 @@ static struct platform_device mst_gpio_keys_device = {
},
};
+static struct resource mst_cplds_resources[] = {
+ [0] = {
+ .start = MST_FPGA_PHYS + 0xc0,
+ .end = MST_FPGA_PHYS + 0xe0 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = PXA_GPIO_TO_IRQ(0),
+ .end = PXA_GPIO_TO_IRQ(0),
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+ },
+ [2] = {
+ .start = MAINSTONE_IRQ(0),
+ .end = MAINSTONE_IRQ(15),
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device mst_cplds_device = {
+ .name = "pxa_cplds_irqs",
+ .id = -1,
+ .resource = &mst_cplds_resources[0],
+ .num_resources = 3,
+};
+
static struct platform_device *platform_devices[] __initdata = {
&smc91x_device,
&mst_flash_device[0],
&mst_flash_device[1],
&mst_gpio_keys_device,
+ &mst_cplds_device,
};
static struct pxaohci_platform_data mainstone_ohci_platform_data = {
@@ -718,7 +659,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)")
.atag_offset = 0x100, /* BLOB boot parameter setting */
.map_io = mainstone_map_io,
.nr_irqs = MAINSTONE_NR_IRQS,
- .init_irq = mainstone_init_irq,
+ .init_irq = pxa27x_init_irq,
.handle_irq = pxa27x_handle_irq,
.init_time = pxa_timer_init,
.init_machine = mainstone_init,
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
new file mode 100644
index 000000000000..2385052b0ce1
--- /dev/null
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -0,0 +1,200 @@
+/*
+ * Intel Reference Systems cplds
+ *
+ * Copyright (C) 2014 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Cplds motherboard driver, supporting lubbock and mainstone SoC board.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#define FPGA_IRQ_MASK_EN 0x0
+#define FPGA_IRQ_SET_CLR 0x10
+
+#define CPLDS_NB_IRQ 32
+
+struct cplds {
+ void __iomem *base;
+ int irq;
+ unsigned int irq_mask;
+ struct gpio_desc *gpio0;
+ struct irq_domain *irqdomain;
+};
+
+static irqreturn_t cplds_irq_handler(int in_irq, void *d)
+{
+ struct cplds *fpga = d;
+ unsigned long pending;
+ unsigned int bit;
+
+ pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask;
+ for_each_set_bit(bit, &pending, CPLDS_NB_IRQ)
+ generic_handle_irq(irq_find_mapping(fpga->irqdomain, bit));
+
+ return IRQ_HANDLED;
+}
+
+static void cplds_irq_mask_ack(struct irq_data *d)
+{
+ struct cplds *fpga = irq_data_get_irq_chip_data(d);
+ unsigned int cplds_irq = irqd_to_hwirq(d);
+ unsigned int set, bit = BIT(cplds_irq);
+
+ fpga->irq_mask &= ~bit;
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+ set = readl(fpga->base + FPGA_IRQ_SET_CLR);
+ writel(set & ~bit, fpga->base + FPGA_IRQ_SET_CLR);
+}
+
+static void cplds_irq_unmask(struct irq_data *d)
+{
+ struct cplds *fpga = irq_data_get_irq_chip_data(d);
+ unsigned int cplds_irq = irqd_to_hwirq(d);
+ unsigned int bit = BIT(cplds_irq);
+
+ fpga->irq_mask |= bit;
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+}
+
+static struct irq_chip cplds_irq_chip = {
+ .name = "pxa_cplds",
+ .irq_mask_ack = cplds_irq_mask_ack,
+ .irq_unmask = cplds_irq_unmask,
+ .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int cplds_irq_domain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct cplds *fpga = d->host_data;
+
+ irq_set_chip_and_handler(irq, &cplds_irq_chip, handle_level_irq);
+ irq_set_chip_data(irq, fpga);
+
+ return 0;
+}
+
+static const struct irq_domain_ops cplds_irq_domain_ops = {
+ .xlate = irq_domain_xlate_twocell,
+ .map = cplds_irq_domain_map,
+};
+
+static int cplds_resume(struct platform_device *pdev)
+{
+ struct cplds *fpga = platform_get_drvdata(pdev);
+
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+
+ return 0;
+}
+
+static int cplds_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ struct cplds *fpga;
+ int ret;
+ int base_irq;
+ unsigned long irqflags = 0;
+
+ fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
+ if (!fpga)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (res) {
+ fpga->irq = (unsigned int)res->start;
+ irqflags = res->flags;
+ }
+ if (!fpga->irq)
+ return -ENODEV;
+
+ base_irq = platform_get_irq(pdev, 1);
+ if (base_irq < 0)
+ base_irq = 0;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ fpga->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fpga->base))
+ return PTR_ERR(fpga->base);
+
+ platform_set_drvdata(pdev, fpga);
+
+ writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+ writel(0, fpga->base + FPGA_IRQ_SET_CLR);
+
+ ret = devm_request_irq(&pdev->dev, fpga->irq, cplds_irq_handler,
+ irqflags, dev_name(&pdev->dev), fpga);
+ if (ret == -ENOSYS)
+ return -EPROBE_DEFER;
+
+ if (ret) {
+ dev_err(&pdev->dev, "couldn't request main irq%d: %d\n",
+ fpga->irq, ret);
+ return ret;
+ }
+
+ irq_set_irq_wake(fpga->irq, 1);
+ fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
+ CPLDS_NB_IRQ,
+ &cplds_irq_domain_ops, fpga);
+ if (!fpga->irqdomain)
+ return -ENODEV;
+
+ if (base_irq) {
+ ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0,
+ CPLDS_NB_IRQ);
+ if (ret) {
+ dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n",
+ base_irq, base_irq + CPLDS_NB_IRQ);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int cplds_remove(struct platform_device *pdev)
+{
+ struct cplds *fpga = platform_get_drvdata(pdev);
+
+ irq_set_chip_and_handler(fpga->irq, NULL, NULL);
+
+ return 0;
+}
+
+static const struct of_device_id cplds_id_table[] = {
+ { .compatible = "intel,lubbock-cplds-irqs", },
+ { .compatible = "intel,mainstone-cplds-irqs", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, cplds_id_table);
+
+static struct platform_driver cplds_driver = {
+ .driver = {
+ .name = "pxa_cplds_irqs",
+ .of_match_table = of_match_ptr(cplds_id_table),
+ },
+ .probe = cplds_probe,
+ .remove = cplds_remove,
+ .resume = cplds_resume,
+};
+
+module_platform_driver(cplds_driver);
+
+MODULE_DESCRIPTION("PXA Cplds interrupts driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c
index b07d88602073..b0dcbe28f78c 100644
--- a/arch/arm/mach-rockchip/pm.c
+++ b/arch/arm/mach-rockchip/pm.c
@@ -83,6 +83,13 @@ static void rk3288_slp_mode_set(int level)
SGRF_PCLK_WDT_GATE | SGRF_FAST_BOOT_EN
| SGRF_PCLK_WDT_GATE_WRITE | SGRF_FAST_BOOT_EN_WRITE);
+ /*
+ * The dapswjdp can not auto reset before resume, that cause it may
+ * access some illegal address during resume. Let's disable it before
+ * suspend, and the MASKROM will enable it back.
+ */
+ regmap_write(sgrf_regmap, RK3288_SGRF_CPU_CON0, SGRF_DAPDEVICEEN_WRITE);
+
/* booting address of resuming system is from this register value */
regmap_write(sgrf_regmap, RK3288_SGRF_FAST_BOOT_ADDR,
rk3288_bootram_phy);
diff --git a/arch/arm/mach-rockchip/pm.h b/arch/arm/mach-rockchip/pm.h
index 03ff31d8282d..3e8d39c0c3d5 100644
--- a/arch/arm/mach-rockchip/pm.h
+++ b/arch/arm/mach-rockchip/pm.h
@@ -55,6 +55,10 @@ static inline void rockchip_suspend_init(void)
#define SGRF_FAST_BOOT_EN BIT(8)
#define SGRF_FAST_BOOT_EN_WRITE BIT(24)
+#define RK3288_SGRF_CPU_CON0 (0x40)
+#define SGRF_DAPDEVICEEN BIT(0)
+#define SGRF_DAPDEVICEEN_WRITE BIT(16)
+
#define RK3288_CRU_MODE_CON 0x50
#define RK3288_CRU_SEL0_CON 0x60
#define RK3288_CRU_SEL1_CON 0x64
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index d360ec044b66..b6cf3b449428 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -30,11 +30,30 @@
#include "pm.h"
#define RK3288_GRF_SOC_CON0 0x244
+#define RK3288_TIMER6_7_PHYS 0xff810000
static void __init rockchip_timer_init(void)
{
if (of_machine_is_compatible("rockchip,rk3288")) {
struct regmap *grf;
+ void __iomem *reg_base;
+
+ /*
+ * Most/all uboot versions for rk3288 don't enable timer7
+ * which is needed for the architected timer to work.
+ * So make sure it is running during early boot.
+ */
+ reg_base = ioremap(RK3288_TIMER6_7_PHYS, SZ_16K);
+ if (reg_base) {
+ writel(0, reg_base + 0x30);
+ writel(0xffffffff, reg_base + 0x20);
+ writel(0xffffffff, reg_base + 0x24);
+ writel(1, reg_base + 0x30);
+ dsb();
+ iounmap(reg_base);
+ } else {
+ pr_err("rockchip: could not map timer7 registers\n");
+ }
/*
* Disable auto jtag/sdmmc switching that causes issues
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 09c5fe3d30c2..7e7583ddd607 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1878,7 +1878,7 @@ struct dma_map_ops iommu_coherent_ops = {
* arm_iommu_attach_device function.
*/
struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
{
unsigned int bits = size >> PAGE_SHIFT;
unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
@@ -1886,6 +1886,10 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
int extensions = 1;
int err = -ENOMEM;
+ /* currently only 32-bit DMA address space is supported */
+ if (size > DMA_BIT_MASK(32) + 1)
+ return ERR_PTR(-ERANGE);
+
if (!bitmap_size)
return ERR_PTR(-EINVAL);
@@ -2057,13 +2061,6 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
if (!iommu)
return false;
- /*
- * currently arm_iommu_create_mapping() takes a max of size_t
- * for size param. So check this limit for now.
- */
- if (size > SIZE_MAX)
- return false;
-
mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
if (IS_ERR(mapping)) {
pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 6333d9c17875..0d629b8f973f 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index b98895d9fe57..ee8dfa793989 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page)
void *kmap;
int type;
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr)
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
}
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
int idx, type;
struct page *page = pfn_to_page(pfn);
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e6ef896c619..7186382672b5 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
}
/*
- * Find the first non-section-aligned page, and point
+ * Find the first non-pmd-aligned page, and point
* memblock_limit at it. This relies on rounding the
- * limit down to be section-aligned, which happens at
- * the end of this function.
+ * limit down to be pmd-aligned, which happens at the
+ * end of this function.
*
* With this algorithm, the start or end of almost any
- * bank can be non-section-aligned. The only exception
- * is that the start of the bank 0 must be section-
+ * bank can be non-pmd-aligned. The only exception is
+ * that the start of the bank 0 must be section-
* aligned, since otherwise memory would need to be
* allocated when mapping the start of bank 0, which
* occurs before any free memory is mapped.
*/
if (!memblock_limit) {
- if (!IS_ALIGNED(block_start, SECTION_SIZE))
+ if (!IS_ALIGNED(block_start, PMD_SIZE))
memblock_limit = block_start;
- else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+ else if (!IS_ALIGNED(block_end, PMD_SIZE))
memblock_limit = arm_lowmem_limit;
}
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
high_memory = __va(arm_lowmem_limit - 1) + 1;
/*
- * Round the memblock limit down to a section size. This
+ * Round the memblock limit down to a pmd size. This
* helps to ensure that we will allocate memory from the
- * last full section, which should be mapped.
+ * last full pmd, which should be mapped.
*/
if (memblock_limit)
- memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+ memblock_limit = round_down(memblock_limit, PMD_SIZE);
if (!memblock_limit)
memblock_limit = arm_lowmem_limit;
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index aa0519eed698..774ef1323554 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -22,8 +22,6 @@
*
* These are the low level assembler for performing cache and TLB
* functions on the arm1020.
- *
- * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
*/
#include <linux/linkage.h>
#include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index bff4c7f70fd6..ae3c27b71594 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -22,8 +22,6 @@
*
* These are the low level assembler for performing cache and TLB
* functions on the arm1020e.
- *
- * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
*/
#include <linux/linkage.h>
#include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ede8c54ab4aa..32a47cc19076 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -441,9 +441,6 @@ ENTRY(cpu_arm925_set_pte_ext)
.type __arm925_setup, #function
__arm925_setup:
mov r0, #0
-#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE)
- orr r0,r0,#1 << 7
-#endif
/* Transparent on, D-cache clean & flush mode. See NOTE2 above */
orr r0,r0,#1 << 1 @ transparent mode on
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index e494d6d6acbe..92e08bf37aad 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -602,7 +602,6 @@ __\name\()_proc_info:
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
initfn __feroceon_setup, __\name\()_proc_info
- .long __feroceon_setup
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index e1268f905026..e0e23582c8b4 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -54,6 +54,7 @@
#define SEEN_DATA (1 << (BPF_MEMWORDS + 3))
#define FLAG_NEED_X_RESET (1 << 0)
+#define FLAG_IMM_OVERFLOW (1 << 1)
struct jit_ctx {
const struct bpf_prog *skf;
@@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
/* PC in ARM mode == address of the instruction + 8 */
imm = offset - (8 + ctx->idx * 4);
+ if (imm & ~0xfff) {
+ /*
+ * literal pool is too far, signal it into flags. we
+ * can only detect it on the second pass unfortunately.
+ */
+ ctx->flags |= FLAG_IMM_OVERFLOW;
+ return 0;
+ }
+
return imm;
}
@@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
return;
}
#endif
- if (rm != ARM_R0)
- emit(ARM_MOV_R(ARM_R0, rm), ctx);
+
+ /*
+ * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
+ * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
+ * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
+ * before using it as a source for ARM_R1.
+ *
+ * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
+ * ARM_R5 (r_X) so there is no particular register overlap
+ * issues.
+ */
if (rn != ARM_R1)
emit(ARM_MOV_R(ARM_R1, rn), ctx);
+ if (rm != ARM_R0)
+ emit(ARM_MOV_R(ARM_R0, rm), ctx);
ctx->seen |= SEEN_CALL;
emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
@@ -855,6 +876,14 @@ b_epilogue:
default:
return -1;
}
+
+ if (ctx->flags & FLAG_IMM_OVERFLOW)
+ /*
+ * this instruction generated an overflow when
+ * trying to access the literal pool, so
+ * delegate this filter to the kernel interpreter.
+ */
+ return -1;
}
/* compute offsets only during the first pass */
@@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp)
ctx.idx = 0;
build_prologue(&ctx);
- build_body(&ctx);
+ if (build_body(&ctx) < 0) {
+#if __LINUX_ARM_ARCH__ < 7
+ if (ctx.imm_count)
+ kfree(ctx.imms);
+#endif
+ bpf_jit_binary_free(header);
+ goto out;
+ }
build_epilogue(&ctx);
flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 224081ccc92f..7d0f07020c80 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { }
void xen_arch_post_suspend(int suspend_cancelled) { }
void xen_timer_resume(void) { }
void xen_arch_resume(void) { }
+void xen_arch_suspend(void) { }
/* In the hypervisor.S file. */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 793551d15f1d..498325074a06 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -4,6 +4,7 @@
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/export.h>
+#include <linux/memblock.h>
#include <linux/of_address.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -21,6 +22,20 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+ struct memblock_region *reg;
+ gfp_t flags = __GFP_NOWARN;
+
+ for_each_memblock(memory, reg) {
+ if (reg->base < (phys_addr_t)0xffffffff) {
+ flags |= __GFP_DMA;
+ break;
+ }
+ }
+ return __get_free_pages(flags, order);
+}
+
enum dma_cache_op {
DMA_UNMAP,
DMA_MAP,
diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
index c138b95a8356..351c95bda89e 100644
--- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
@@ -21,6 +21,20 @@
clock-output-names = "juno_mb:clk25mhz";
};
+ v2m_refclk1mhz: refclk1mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000>;
+ clock-output-names = "juno_mb:refclk1mhz";
+ };
+
+ v2m_refclk32khz: refclk32khz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "juno_mb:refclk32khz";
+ };
+
motherboard {
compatible = "arm,vexpress,v2p-p1", "simple-bus";
#address-cells = <2>; /* SMB chipselect number and offset */
@@ -66,6 +80,15 @@
#size-cells = <1>;
ranges = <0 3 0 0x200000>;
+ v2m_sysctl: sysctl@020000 {
+ compatible = "arm,sp810", "arm,primecell";
+ reg = <0x020000 0x1000>;
+ clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&mb_clk24mhz>;
+ clock-names = "refclk", "timclk", "apb_pclk";
+ #clock-cells = <1>;
+ clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+ };
+
mmci@050000 {
compatible = "arm,pl180", "arm,primecell";
reg = <0x050000 0x1000>;
@@ -106,16 +129,16 @@
compatible = "arm,sp804", "arm,primecell";
reg = <0x110000 0x10000>;
interrupts = <9>;
- clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
- clock-names = "timclken1", "apb_pclk";
+ clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&mb_clk24mhz>;
+ clock-names = "timclken1", "timclken2", "apb_pclk";
};
v2m_timer23: timer@120000 {
compatible = "arm,sp804", "arm,primecell";
reg = <0x120000 0x10000>;
interrupts = <9>;
- clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
- clock-names = "timclken1", "apb_pclk";
+ clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&mb_clk24mhz>;
+ clock-names = "timclken1", "timclken2", "apb_pclk";
};
rtc@170000 {
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 43d54017b779..d0ab012fa379 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -16,7 +16,8 @@
#include "mt8173.dtsi"
/ {
- model = "mediatek,mt8173-evb";
+ model = "MediaTek MT8173 evaluation board";
+ compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
aliases {
serial0 = &uart0;
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
index 9499199924ae..6a37c3c6b11d 100644
--- a/arch/arm64/crypto/crc32-arm64.c
+++ b/arch/arm64/crypto/crc32-arm64.c
@@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+ put_unaligned_le32(ctx->crc, out);
+ return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
- put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out);
+ put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
return 0;
}
@@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+ mctx->key = 0;
+ return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
mctx->key = ~0;
return 0;
}
@@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = {
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksumc_update,
- .final = chksum_final,
+ .final = chksumc_final,
.finup = chksumc_finup,
.digest = chksumc_digest,
.descsize = sizeof(struct chksum_desc_ctx),
@@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = {
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
- .cra_init = crc32_cra_init,
+ .cra_init = crc32c_cra_init,
}
};
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 114e7cc5de8c..aefda9868627 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+ sctx->finalize = 0;
kernel_neon_begin_partial(16);
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 1340e44c048b..7cd587564a41 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+ sctx->finalize = 0;
kernel_neon_begin_partial(28);
sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4dc0de..0fa47c4275cb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define nop() asm volatile("nop");
#define smp_mb__before_atomic() smp_mb()
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5f750dc96e0f..74069b3bd919 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
+ pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
@@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
ret = -ENOSYS;
}
- pagefault_enable(); /* subsumes preempt_enable() */
+ pagefault_enable();
if (!ret) {
switch (cmp) {
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 7ebcd31ce51c..225ec3524fbf 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -18,7 +18,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 21033bba9390..28f8365edc4c 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,7 +24,6 @@
#include <asm/cacheflush.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/insn.h>
#include <linux/stop_machine.h>
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -34,48 +33,6 @@ struct alt_region {
struct alt_instr *end;
};
-/*
- * Decode the imm field of a b/bl instruction, and return the byte
- * offset as a signed value (so it can be used when computing a new
- * branch target).
- */
-static s32 get_branch_offset(u32 insn)
-{
- s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
-
- /* sign-extend the immediate before turning it into a byte offset */
- return (imm << 6) >> 4;
-}
-
-static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
-{
- u32 insn;
-
- aarch64_insn_read(altinsnptr, &insn);
-
- /* Stop the world on instructions we don't support... */
- BUG_ON(aarch64_insn_is_cbz(insn));
- BUG_ON(aarch64_insn_is_cbnz(insn));
- BUG_ON(aarch64_insn_is_bcond(insn));
- /* ... and there is probably more. */
-
- if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
- enum aarch64_insn_branch_type type;
- unsigned long target;
-
- if (aarch64_insn_is_b(insn))
- type = AARCH64_INSN_BRANCH_NOLINK;
- else
- type = AARCH64_INSN_BRANCH_LINK;
-
- target = (unsigned long)altinsnptr + get_branch_offset(insn);
- insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
- target, type);
- }
-
- return insn;
-}
-
static int __apply_alternatives(void *alt_region)
{
struct alt_instr *alt;
@@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region)
u8 *origptr, *replptr;
for (alt = region->begin; alt < region->end; alt++) {
- u32 insn;
- int i;
-
if (!cpus_have_cap(alt->cpufeature))
continue;
@@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region)
origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-
- for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
- insn = get_alt_insn(origptr + i, replptr + i);
- aarch64_insn_write(origptr + i, insn);
- }
-
+ memcpy(origptr, replptr, alt->alt_len);
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + alt->alt_len));
}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 23f25acf43a9..cce18c85d2e8 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1315,15 +1315,15 @@ static int armpmu_device_probe(struct platform_device *pdev)
if (!cpu_pmu)
return -ENODEV;
- irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
- if (!irqs)
- return -ENOMEM;
-
/* Don't bother with PPIs; they're already affine */
irq = platform_get_irq(pdev, 0);
if (irq >= 0 && irq_is_percpu(irq))
return 0;
+ irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+ if (!irqs)
+ return -ENOMEM;
+
for (i = 0; i < pdev->num_resources; ++i) {
struct device_node *dn;
int cpu;
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 74c256744b25..f3d6221cd5bd 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -328,10 +328,12 @@ static int ptdump_init(void)
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
address_markers[VMEMMAP_START_NR].start_address =
(unsigned long)virt_to_page(PAGE_OFFSET);
address_markers[VMEMMAP_END_NR].start_address =
(unsigned long)virt_to_page(high_memory);
+#endif
pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
&ptdump_fops);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 96da13167d4a..0948d327d013 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
* If we're in an interrupt or have no user context, we must not take
* the fault.
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index edba042b2325..dc6a4842683a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -487,7 +487,7 @@ emit_cond_jmp:
return -EINVAL;
}
- imm64 = (u64)insn1.imm << 32 | imm;
+ imm64 = (u64)insn1.imm << 32 | (u32)imm;
emit_a64_mov_i64(dst, imm64, ctx);
return 1;
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 962a6aeab787..366bbeaeb405 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void);
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h
index a46f7cf3e1ea..68cf638faf48 100644
--- a/arch/avr32/include/asm/uaccess.h
+++ b/arch/avr32/include/asm/uaccess.h
@@ -97,7 +97,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -116,7 +117,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -136,7 +138,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -158,7 +161,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index d223a8b57c1e..c03533937a9f 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -14,11 +14,11 @@
#include <linux/pagemap.h>
#include <linux/kdebug.h>
#include <linux/kprobes.h>
+#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/sysreg.h>
#include <asm/tlb.h>
-#include <asm/uaccess.h>
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs, int trap)
@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
* If we're in an interrupt or have no user context, we must
* not take the fault...
*/
- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+ if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
goto no_context;
local_irq_enable();
diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h
index 4e8ad0523118..6abebe82d4e9 100644
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -10,6 +10,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/byteorder.h>
+#include <asm/def_LPBlackfin.h>
#define __raw_readb bfin_read8
#define __raw_readw bfin_read16
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 83f12f2ed9e3..3066d40a6db1 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -8,7 +8,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <arch/system.h>
extern int find_fixup_code(struct pt_regs *);
@@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
info.si_code = SEGV_MAPERR;
/*
- * If we're in an interrupt or "atomic" operation or have no
+ * If we're in an interrupt, have pagefaults disabled or have no
* user context, we must not take the fault.
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index ec4917ddf678..61d99767fe16 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -19,9 +19,9 @@
#include <linux/kernel.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
+#include <linux/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/uaccess.h>
#include <asm/gdb-stub.h>
/*****************************************************************************/
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(__frame))
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index bed9a9bd3c10..785344bbdc07 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
unsigned long paddr;
int type;
+ preempt_disable();
pagefault_disable();
type = kmap_atomic_idx_push();
paddr = page_to_phys(page);
@@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr)
}
kmap_atomic_idx_pop();
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 9e7802911a57..a6e34e2acbba 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
* looks just like atomic_cmpxchg on our arch currently with a bunch of
* variable casting.
*/
-#define __HAVE_ARCH_CMPXCHG 1
#define cmpxchg(ptr, old, new) \
({ \
diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
index e4127e4d6a5b..f000a382bc7f 100644
--- a/arch/hexagon/include/asm/uaccess.h
+++ b/arch/hexagon/include/asm/uaccess.h
@@ -36,7 +36,8 @@
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb2bbf9..843ba435e43b 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do { \
___p1; \
})
-/*
- * XXX check on this ---I suspect what Linus really wants here is
- * acquire vs release semantics but we can't discuss this stuff with
- * Linus just yet. Grrr...
- */
-#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
/*
* The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 6437ca21f61b..3ad8f6988363 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -53,7 +53,7 @@ void build_cpu_to_node_map(void);
#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id)
#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#endif
extern void arch_fix_phys_package_id(int num, u32 slot);
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index f35109b1d907..a0e3620f8f13 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
* indicated by comparing RETURN with OLD.
*/
-#define __HAVE_ARCH_CMPXCHG 1
-
/*
* This function doesn't exist, so you'll get a linker error
* if something tries to do an invalid cmpxchg().
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 15051e9c2c6f..b054c5c6e713 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -127,7 +127,7 @@ int smp_num_siblings = 1;
volatile int ia64_cpu_to_sapicid[NR_CPUS];
EXPORT_SYMBOL(ia64_cpu_to_sapicid);
-static volatile cpumask_t cpu_callin_map;
+static cpumask_t cpu_callin_map;
struct smp_boot_data smp_boot_data __initdata;
@@ -477,6 +477,7 @@ do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
for (timeout = 0; timeout < 100000; timeout++) {
if (cpumask_test_cpu(cpu, &cpu_callin_map))
break; /* It has booted */
+ barrier(); /* Make sure we re-read cpu_callin_map */
udelay(100);
}
Dprintk("\n");
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index ba5ba7accd0d..70b40d1205a6 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -11,10 +11,10 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/prefetch.h>
+#include <linux/uaccess.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
-#include <asm/uaccess.h>
extern int die(char *, struct pt_regs *, long);
@@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
/*
* If we're in an interrupt or have no user context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
#ifdef CONFIG_VIRTUAL_MEM_MAP
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index d4e162d35b34..7cc3be9fa7c6 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
- struct pci_controller *controller = bridge->bus->sysdata;
-
- ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+ /*
+ * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+ * here, pci_create_root_bus() has been called by someone else and
+ * sysdata is likely to be different from what we expect. Let it go in
+ * that case.
+ */
+ if (!bridge->dev.parent) {
+ struct pci_controller *controller = bridge->bus->sysdata;
+ ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+ }
return 0;
}
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index de651db20b43..14bf9b739dd2 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
sizeof(*(ptr))))
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
{
diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h
index 71adff209405..cac7014daef3 100644
--- a/arch/m32r/include/asm/uaccess.h
+++ b/arch/m32r/include/asm/uaccess.h
@@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s)
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
@@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_regs *regs);
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_regs *regs);
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_regs *regs);
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -274,7 +278,8 @@ do { \
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space.
*
@@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space.
*
@@ -677,7 +686,8 @@ unsigned long clear_user(void __user *mem, unsigned long len);
* strlen_user: - Get the size of a string in user space.
* @str: The string to measure.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Get the size of a NUL-terminated string in user space.
*
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index ce7aea34fdf4..c18ddc74ef9a 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -45,7 +45,7 @@ static volatile unsigned long flushcache_cpumask = 0;
/*
* For flush_tlb_others()
*/
-static volatile cpumask_t flush_cpumask;
+static cpumask_t flush_cpumask;
static struct mm_struct *flush_mm;
static struct vm_area_struct *flush_vma;
static volatile unsigned long flush_va;
@@ -415,7 +415,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
*/
send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
- while (!cpumask_empty((cpumask_t*)&flush_cpumask)) {
+ while (!cpumask_empty(&flush_cpumask)) {
/* nothing. lockup detection does not belong here */
mb();
}
@@ -468,7 +468,7 @@ void smp_invalidate_interrupt(void)
__flush_tlb_page(va);
}
}
- cpumask_clear_cpu(cpu_id, (cpumask_t*)&flush_cpumask);
+ cpumask_clear_cpu(cpu_id, &flush_cpumask);
}
/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index e3d4d4890104..8f9875b7933d 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -24,9 +24,9 @@
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/m32r.h>
-#include <asm/uaccess.h>
#include <asm/hardirq.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
mm = tsk->mm;
/*
- * If we're in an interrupt or have no user context or are running in an
- * atomic region then we must not take the fault..
+ * If we're in an interrupt or have no user context or have pagefaults
+ * disabled then we must not take the fault.
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto bad_area_nosemaphore;
if (error_code & ACE_USERMODE)
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index bc755bc620ad..83b1df80f0ac 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
* indicated by comparing RETURN with OLD.
*/
#ifdef CONFIG_RMW_INSNS
-#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
unsigned long new, int size)
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
index a823cd73dc09..b5941818346f 100644
--- a/arch/m68k/include/asm/irqflags.h
+++ b/arch/m68k/include/asm/irqflags.h
@@ -2,9 +2,6 @@
#define _M68K_IRQFLAGS_H
#include <linux/types.h>
-#ifdef CONFIG_MMU
-#include <linux/preempt_mask.h>
-#endif
#include <linux/preempt.h>
#include <asm/thread_info.h>
#include <asm/entry.h>
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index b2f04aee46ec..6a94cdd0c830 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -10,10 +10,10 @@
#include <linux/ptrace.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/setup.h>
#include <asm/traps.h>
-#include <asm/uaccess.h>
#include <asm/pgalloc.h>
extern void die_if_kernel(char *, struct pt_regs *, long);
@@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e26a65..5a696e507930 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
index b1bc1be8540f..be29e3e44321 100644
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
return old;
}
-#define __HAVE_ARCH_CMPXCHG 1
-
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index 2de5dc695a87..f57edca63609 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
mm = tsk->mm;
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
index d71f621a2c0b..807f1b1c4e65 100644
--- a/arch/metag/mm/highmem.c
+++ b/arch/metag/mm/highmem.c
@@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page)
unsigned long vaddr;
int type;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr)
}
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
@@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
unsigned long vaddr;
int type;
+ preempt_disable();
pagefault_disable();
type = kmap_atomic_idx_push();
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 62942fd12672..331b0d35f89c 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -178,7 +178,8 @@ extern long __user_bad(void);
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -290,7 +291,8 @@ extern long __user_bad(void);
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index d46a5ebb7570..177dfc003643 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
is_write = 0;
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(faulthandler_disabled() || !mm)) {
if (kernel_mode(regs))
goto bad_area_nosemaphore;
- /* in_atomic() in user mode is really bad,
+ /* faulthandler_disabled() in user mode is really bad,
as is current->mm == NULL. */
- pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
- mm);
+ pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
+ mm);
pr_emerg("r15 = %lx MSR = %lx\n",
regs->r15, regs->msr);
die("Weird page fault", regs, SIGSEGV);
diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c
index 5a92576fad92..2fcc5a52d84d 100644
--- a/arch/microblaze/mm/highmem.c
+++ b/arch/microblaze/mm/highmem.c
@@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
unsigned long vaddr;
int idx, type;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr)
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
+ preempt_enable();
return;
}
@@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr)
#endif
kmap_atomic_idx_pop();
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 5200f649dd4e..ae2dd59050f7 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -277,7 +277,7 @@ LDFLAGS += -m $(ld-emul)
ifdef CONFIG_MIPS
CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
- sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
+ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
ifdef CONFIG_64BIT
CHECKFLAGS += -m64
endif
diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c
index e1fe63051136..597899ad5438 100644
--- a/arch/mips/ath79/prom.c
+++ b/arch/mips/ath79/prom.c
@@ -1,6 +1,7 @@
/*
* Atheros AR71XX/AR724X/AR913X specific prom routines
*
+ * Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
* Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
* Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
*
@@ -25,12 +26,14 @@ void __init prom_init(void)
{
fw_init_cmdline();
+#ifdef CONFIG_BLK_DEV_INITRD
/* Read the initrd address from the firmware environment */
initrd_start = fw_getenvl("initrd_start");
if (initrd_start) {
initrd_start = KSEG0ADDR(initrd_start);
initrd_end = initrd_start + fw_getenvl("initrd_size");
}
+#endif
}
void __init prom_free_prom_memory(void)
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index a73c93c3d44a..7fc8397d16f2 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -225,7 +225,7 @@ void __init plat_time_init(void)
ddr_clk_rate = ath79_get_sys_clk_rate("ddr");
ref_clk_rate = ath79_get_sys_clk_rate("ref");
- pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz",
+ pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz\n",
cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000,
ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000,
ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000,
diff --git a/arch/mips/cobalt/Makefile b/arch/mips/cobalt/Makefile
index 558e94977942..68f0c5871adc 100644
--- a/arch/mips/cobalt/Makefile
+++ b/arch/mips/cobalt/Makefile
@@ -2,7 +2,6 @@
# Makefile for the Cobalt micro systems family specific parts of the kernel
#
-obj-y := buttons.o irq.o lcd.o led.o reset.o rtc.o serial.o setup.o time.o
+obj-y := buttons.o irq.o lcd.o led.o mtd.o reset.o rtc.o serial.o setup.o time.o
obj-$(CONFIG_PCI) += pci.o
-obj-$(CONFIG_MTD_PHYSMAP) += mtd.o
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 002680648dcb..b2a577ebce0b 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
CONFIG_USB_C67X00_HCD=m
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_ISP1760=m
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=m
CONFIG_USB_R8A66597_HCD=m
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbcb9be0..7ecba84656d4 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
#define __WEAK_LLSC_MB " \n"
#endif
-#define set_mb(var, value) \
- do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) \
+ do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 412f945f1f5e..b71ab4a5fd50 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
})
-#define __HAVE_ARCH_CMPXCHG 1
-
#define __cmpxchg_asm(ld, st, m, old, new) \
({ \
__typeof(*(m)) __ret; \
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index a594d8ed9698..f19e890b99d2 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -304,7 +304,7 @@ do { \
\
current->thread.abi = &mips_abi; \
\
- current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31; \
+ current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \
} while (0)
#endif /* CONFIG_32BIT */
@@ -366,7 +366,7 @@ do { \
else \
current->thread.abi = &mips_abi; \
\
- current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31; \
+ current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \
\
p = personality(current->personality); \
if (p != PER_LINUX32 && p != PER_LINUX) \
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 18ae5ddef118..c28a8499aec7 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -113,7 +113,7 @@
#define _PAGE_PRESENT_SHIFT 0
#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
/* R2 or later cores check for RI/XI support to determine _PAGE_READ */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
#define _PAGE_WRITE_SHIFT (_PAGE_PRESENT_SHIFT + 1)
#define _PAGE_WRITE (1 << _PAGE_WRITE_SHIFT)
#else
@@ -135,16 +135,16 @@
#define _PAGE_SPLITTING (1 << _PAGE_SPLITTING_SHIFT)
/* Only R2 or newer cores have the XI bit */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
#define _PAGE_NO_EXEC_SHIFT (_PAGE_SPLITTING_SHIFT + 1)
#else
#define _PAGE_GLOBAL_SHIFT (_PAGE_SPLITTING_SHIFT + 1)
#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT)
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
#endif /* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
/* XI - page cannot be executed */
#ifndef _PAGE_NO_EXEC_SHIFT
#define _PAGE_NO_EXEC_SHIFT (_PAGE_MODIFIED_SHIFT + 1)
@@ -160,10 +160,10 @@
#define _PAGE_GLOBAL_SHIFT (_PAGE_NO_READ_SHIFT + 1)
#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT)
-#else /* !CONFIG_CPU_MIPSR2 */
+#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
#define _PAGE_GLOBAL_SHIFT (_PAGE_MODIFIED_SHIFT + 1)
#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT)
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
#define _PAGE_VALID_SHIFT (_PAGE_GLOBAL_SHIFT + 1)
#define _PAGE_VALID (1 << _PAGE_VALID_SHIFT)
@@ -205,7 +205,7 @@
*/
static inline uint64_t pte_to_entrylo(unsigned long pte_val)
{
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
if (cpu_has_rixi) {
int sa;
#ifdef CONFIG_32BIT
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index bb02fac9b4fa..2b25d1ba1ea0 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_DUMP 0x8
#define SMP_ASK_C0COUNT 0x10
-extern volatile cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callin_map;
/* Mask of CPUs which are currently definitely operating coherently */
extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index e92d6c4b5ed1..7163cd7fdd69 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -104,7 +104,6 @@ do { \
if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
__fpsave = FP_SAVE_VECTOR; \
(last) = resume(prev, next, task_thread_info(next), __fpsave); \
- disable_msa(); \
} while (0)
#define finish_arch_switch(prev) \
@@ -122,6 +121,7 @@ do { \
if (cpu_has_userlocal) \
write_c0_userlocal(current_thread_info()->tp_value); \
__restore_watch(); \
+ disable_msa(); \
} while (0)
#endif /* _ASM_SWITCH_TO_H */
diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
index 3e307ec2afba..7afda4150a59 100644
--- a/arch/mips/include/asm/topology.h
+++ b/arch/mips/include/asm/topology.h
@@ -15,7 +15,7 @@
#define topology_physical_package_id(cpu) (cpu_data[cpu].package)
#define topology_core_id(cpu) (cpu_data[cpu].core)
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu) (&cpu_sibling_map[cpu])
+#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
#endif
#endif /* __ASM_TOPOLOGY_H */
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index bf8b32450ef6..9722357d2854 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -103,7 +103,8 @@ extern u64 __ua_limit;
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
@@ -138,7 +139,8 @@ extern u64 __ua_limit;
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -157,7 +159,8 @@ extern u64 __ua_limit;
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -177,7 +180,8 @@ extern u64 __ua_limit;
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -199,7 +203,8 @@ extern u64 __ua_limit;
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -498,7 +503,8 @@ extern void __put_user_unknown(void);
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -517,7 +523,8 @@ extern void __put_user_unknown(void);
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -537,7 +544,8 @@ extern void __put_user_unknown(void);
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -559,7 +567,8 @@ extern void __put_user_unknown(void);
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -815,7 +824,8 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n);
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -888,7 +898,8 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space.
*
@@ -1075,7 +1086,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -1107,7 +1119,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space.
*
@@ -1329,7 +1342,8 @@ strncpy_from_user(char *__to, const char __user *__from, long __len)
* strlen_user: - Get the size of a string in user space.
* @str: The string to measure.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Get the size of a NUL-terminated string in user space.
*
@@ -1398,7 +1412,8 @@ static inline long __strnlen_user(const char __user *s, long n)
* strnlen_user: - Get the size of a string in user space.
* @str: The string to measure.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Get the size of a NUL-terminated string in user space.
*
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index e36515dcd3b2..209e5b76c1bc 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -74,13 +74,12 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
{
unsigned long sr, mask, fcsr, fcsr0, fcsr1;
+ fcsr = c->fpu_csr31;
mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
sr = read_c0_status();
__enable_fpu(FPU_AS_IS);
- fcsr = read_32bit_cp1_register(CP1_STATUS);
-
fcsr0 = fcsr & mask;
write_32bit_cp1_register(CP1_STATUS, fcsr0);
fcsr0 = read_32bit_cp1_register(CP1_STATUS);
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index be4899f3c393..4a4d9e067c89 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
/* Lets see if this is an O32 ELF */
if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
- /* FR = 1 for N32 */
- if (ehdr32->e_flags & EF_MIPS_ABI2)
- state->overall_fp_mode = FP_FR1;
- else
- /* Set a good default FPU mode for O32 */
- state->overall_fp_mode = cpu_has_mips_r6 ?
- FP_FRE : FP_FR0;
-
if (ehdr32->e_flags & EF_MIPS_FP64) {
/*
* Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
@@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
(char *)&abiflags,
sizeof(abiflags));
} else {
- /* FR=1 is really the only option for 64-bit */
- state->overall_fp_mode = FP_FR1;
-
if (phdr64->p_type != PT_MIPS_ABIFLAGS)
return 0;
if (phdr64->p_filesz < sizeof(abiflags))
@@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
struct elf32_hdr *ehdr = _ehdr;
struct mode_req prog_req, interp_req;
int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
+ bool is_mips64;
if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
return 0;
@@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
abi0 = abi1 = fp_abi;
}
- /* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */
- max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) &&
- (!(ehdr->e_flags & EF_MIPS_ABI2))) ?
- MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT;
+ is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
+ (ehdr->e_flags & EF_MIPS_ABI2);
+
+ if (is_mips64) {
+ /* MIPS64 code always uses FR=1, thus the default is easy */
+ state->overall_fp_mode = FP_FR1;
+
+ /* Disallow access to the various FPXX & FP64 ABIs */
+ max_abi = MIPS_ABI_FP_SOFT;
+ } else {
+ /* Default to a mode capable of running code expecting FR=0 */
+ state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
+
+ /* Allow all ABIs we know about */
+ max_abi = MIPS_ABI_FP_64A;
+ }
if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
(abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN))
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index d2bfbc2e8995..3c8a18a00a65 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -29,7 +29,7 @@
int kgdb_early_setup;
#endif
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+static DECLARE_BITMAP(irq_map, NR_IRQS);
int allocate_irqno(void)
{
@@ -109,7 +109,7 @@ void __init init_IRQ(void)
#endif
}
-#ifdef DEBUG_STACKOVERFLOW
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
static inline void check_stack_overflow(void)
{
unsigned long sp;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d544e774eea6..e933a309f2ea 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -176,7 +176,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
__get_user(value, data + 64);
fcr31 = child->thread.fpu.fcr31;
- mask = current_cpu_data.fpu_msk31;
+ mask = boot_cpu_data.fpu_msk31;
child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
/* FIR may not be written. */
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
index 06805e09bcd3..0b85f827cd18 100644
--- a/arch/mips/kernel/signal-common.h
+++ b/arch/mips/kernel/signal-common.h
@@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
extern int fpcsr_pending(unsigned int __user *fpcsr);
/* Make sure we will not lose FPU ownership */
-#ifdef CONFIG_PREEMPT
-#define lock_fpu_owner() preempt_disable()
-#define unlock_fpu_owner() preempt_enable()
-#else
-#define lock_fpu_owner() pagefault_disable()
-#define unlock_fpu_owner() pagefault_enable()
-#endif
+#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
+#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
#endif /* __SIGNAL_COMMON_H */
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index fd528d7ea278..336708ae5c5b 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
static void bmips_wr_vec(unsigned long dst, char *start, char *end)
{
memcpy((void *)dst, start, end - start);
- dma_cache_wback((unsigned long)start, end - start);
+ dma_cache_wback(dst, end - start);
local_flush_icache_range(dst, dst + (end - start));
instruction_hazard();
}
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 7e011f95bb8e..4251d390b5b6 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -92,7 +92,7 @@ static void __init cps_smp_setup(void)
#ifdef CONFIG_MIPS_MT_FPAFF
/* If we have an FPU, enroll ourselves in the FPU-full mask */
if (cpu_has_fpu)
- cpu_set(0, mt_fpu_cpumask);
+ cpumask_set_cpu(0, &mt_fpu_cpumask);
#endif /* CONFIG_MIPS_MT_FPAFF */
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 193ace7955fb..faa46ebd9dda 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -43,7 +43,7 @@
#include <asm/time.h>
#include <asm/setup.h>
-volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
+cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
EXPORT_SYMBOL(__cpu_number_map);
@@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
/*
* Trust is futile. We should really have timeouts ...
*/
- while (!cpumask_test_cpu(cpu, &cpu_callin_map))
+ while (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
udelay(100);
+ schedule();
+ }
synchronise_count_master(cpu);
return 0;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ba32e48d4697..d2d1c1933bc9 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -269,7 +269,6 @@ static void __show_regs(const struct pt_regs *regs)
*/
printk("epc : %0*lx %pS\n", field, regs->cp0_epc,
(void *) regs->cp0_epc);
- printk(" %s\n", print_tainted());
printk("ra : %0*lx %pS\n", field, regs->regs[31],
(void *) regs->regs[31]);
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 6230f376a44e..d5fa3eaf39a1 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -2389,7 +2389,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
{
unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
enum emulation_result er = EMULATE_DONE;
- unsigned long curr_pc;
if (run->mmio.len > sizeof(*gpr)) {
kvm_err("Bad MMIO length: %d", run->mmio.len);
@@ -2397,11 +2396,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
goto done;
}
- /*
- * Update PC and hold onto current PC in case there is
- * an error and we want to rollback the PC
- */
- curr_pc = vcpu->arch.pc;
er = update_pc(vcpu, vcpu->arch.pending_load_cause);
if (er == EMULATE_FAIL)
return er;
@@ -2415,7 +2409,7 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
if (vcpu->mmio_needed == 2)
*gpr = *(int16_t *) run->mmio.data;
else
- *gpr = *(int16_t *) run->mmio.data;
+ *gpr = *(uint16_t *)run->mmio.data;
break;
case 1:
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 7d12c0dded3d..77e64942f004 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
FEXPORT(__strnlen_\func\()_nocheck_asm)
move v0, a0
PTR_ADDU a1, a0 # stop pointer
-1: beq v0, a1, 1f # limit reached?
+1:
+#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
+ .set noat
+ li AT, 1
+#endif
+ beq v0, a1, 1f # limit reached?
.ifeqs "\func", "kernel"
EX(lb, t0, (v0), .Lfault\@)
.else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
.endif
.set noreorder
bnez t0, 1b
-1: PTR_ADDIU v0, 1
+1:
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+ PTR_ADDIU v0, 1
+#else
+ PTR_ADDU v0, AT
+ .set at
+#endif
.set reorder
PTR_SUBU v0, a0
jr ra
diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile
index e70c33fdb881..f2e8153e44f5 100644
--- a/arch/mips/loongson/common/Makefile
+++ b/arch/mips/loongson/common/Makefile
@@ -3,15 +3,13 @@
#
obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
- bonito-irq.o mem.o machtype.o platform.o
+ bonito-irq.o mem.o machtype.o platform.o serial.o
obj-$(CONFIG_PCI) += pci.o
#
# Serial port support
#
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-loongson-serial-$(CONFIG_SERIAL_8250) := serial.o
-obj-y += $(loongson-serial-m) $(loongson-serial-y)
obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c
index e3c68b5da18d..509877c6e9d9 100644
--- a/arch/mips/loongson/loongson-3/smp.c
+++ b/arch/mips/loongson/loongson-3/smp.c
@@ -272,7 +272,7 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
if (action & SMP_ASK_C0COUNT) {
BUG_ON(cpu != 0);
c0count = read_c0_count();
- for (i = 1; i < loongson_sysconf.nr_cpus; i++)
+ for (i = 1; i < num_possible_cpus(); i++)
per_cpu(core0_c0count, i) = c0count;
}
}
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d31c537ace1d..22b9b2cb9219 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -889,7 +889,7 @@ static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
break;
case FPCREG_RID:
- value = current_cpu_data.fpu_id;
+ value = boot_cpu_data.fpu_id;
break;
default:
@@ -921,7 +921,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
(void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
/* Preserve read-only bits. */
- mask = current_cpu_data.fpu_msk31;
+ mask = boot_cpu_data.fpu_msk31;
fcr31 = (value & ~mask) | (fcr31 & mask);
break;
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 0dbb65a51ce5..2e03ab173591 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1372,7 +1372,7 @@ static int probe_scache(void)
scache_size = addr;
c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22);
c->scache.ways = 1;
- c->dcache.waybit = 0; /* does not matter */
+ c->scache.waybit = 0; /* does not matter */
return 1;
}
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 7ff8637e530d..36c0f26fac6b 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -21,10 +21,10 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/perf_event.h>
+#include <linux/uaccess.h>
#include <asm/branch.h>
#include <asm/mmu_context.h>
-#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/highmem.h> /* For VMALLOC_END */
#include <linux/kdebug.h>
@@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto bad_area_nosemaphore;
if (user_mode(regs))
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index da815d295239..11661cbc11a8 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page)
unsigned long vaddr;
int idx, type;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr)
if (vaddr < FIXADDR_START) { // FIXME
pagefault_enable();
+ preempt_enable();
return;
}
@@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr)
#endif
kmap_atomic_idx_pop();
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
@@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
unsigned long vaddr;
int idx, type;
+ preempt_disable();
pagefault_disable();
type = kmap_atomic_idx_push();
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index faa5c9822ecc..198a3147dd7d 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
BUG_ON(Page_dcache_dirty(page));
+ preempt_disable();
pagefault_disable();
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
idx += in_interrupt() ? FIX_N_COLOURS : 0;
@@ -152,6 +153,7 @@ void kunmap_coherent(void)
write_c0_entryhi(old_ctx);
local_irq_restore(flags);
pagefault_enable();
+ preempt_enable();
}
void copy_user_highpage(struct page *to, struct page *from,
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index a27a088e6f9f..08318ecb803a 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -495,7 +495,7 @@ static void r4k_tlb_configure(void)
if (cpu_has_rixi) {
/*
- * Enable the no read, no exec bits, and enable large virtual
+ * Enable the no read, no exec bits, and enable large physical
* address.
*/
#ifdef CONFIG_64BIT
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 5d6139390bf8..e23fdf2a9c80 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -681,11 +681,7 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
sp_off += config_enabled(CONFIG_64BIT) ?
(ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
- /*
- * Subtract the bytes for the last registers since we only care about
- * the location on the stack pointer.
- */
- return sp_off - RSIZE;
+ return sp_off;
}
static void build_prologue(struct jit_ctx *ctx)
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index e20b02e3ae28..e10d10b9e82a 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -41,7 +41,7 @@ static irqreturn_t ill_acc_irq_handler(int irq, void *_priv)
addr, (type >> ILL_ACC_OFF_S) & ILL_ACC_OFF_M,
type & ILL_ACC_LEN_M);
- rt_memc_w32(REG_ILL_ACC_TYPE, REG_ILL_ACC_TYPE);
+ rt_memc_w32(ILL_INT_STATUS, REG_ILL_ACC_TYPE);
return IRQ_HANDLED;
}
diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c
index 0134db2ad0a8..5a2a82148d8d 100644
--- a/arch/mips/sgi-ip32/ip32-platform.c
+++ b/arch/mips/sgi-ip32/ip32-platform.c
@@ -130,9 +130,9 @@ struct platform_device ip32_rtc_device = {
.resource = ip32_rtc_resources,
};
-+static int __init sgio2_rtc_devinit(void)
+static __init int sgio2_rtc_devinit(void)
{
return platform_device_register(&ip32_rtc_device);
}
-device_initcall(sgio2_cmos_devinit);
+device_initcall(sgio2_rtc_devinit);
diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h
index 2fbbe4d920aa..1ddea5afba09 100644
--- a/arch/mn10300/include/asm/highmem.h
+++ b/arch/mn10300/include/asm/highmem.h
@@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page)
unsigned long vaddr;
int idx, type;
+ preempt_disable();
pagefault_disable();
if (page < highmem_start_page)
return page_address(page);
@@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
if (vaddr < FIXADDR_START) { /* FIXME */
pagefault_enable();
+ preempt_enable();
return;
}
@@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
kmap_atomic_idx_pop();
pagefault_enable();
+ preempt_enable();
}
#endif /* __KERNEL__ */
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 0c2cc5d39c8e..4a1d181ed32f 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -23,8 +23,8 @@
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/hardirq.h>
#include <asm/cpu-regs.h>
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 0c9b6afe69e9..b51878b0c6b8 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto bad_area_nosemaphore;
if (user_mode(regs))
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index de65f66ea64e..ec2df4bab302 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -142,6 +142,7 @@ static inline void kunmap(struct page *page)
static inline void *kmap_atomic(struct page *page)
{
+ preempt_disable();
pagefault_disable();
return page_address(page);
}
@@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void *addr)
{
flush_kernel_dcache_page_addr(addr);
pagefault_enable();
+ preempt_enable();
}
#define kmap_atomic_prot(page, prot) kmap_atomic(page)
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index dbd13354ec41..0a90b965cccb 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
#define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
-#define __HAVE_ARCH_CMPXCHG 1
-
/* bug catcher for when unsupported size is used - won't link */
extern void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 3391d061eccc..78c9fd32c554 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -348,6 +348,10 @@ struct pt_regs; /* forward declaration... */
#define ELF_HWCAP 0
+#define STACK_RND_MASK (is_32bit_task() ? \
+ 0x7ff >> (PAGE_SHIFT - 12) : \
+ 0x3ffff >> (PAGE_SHIFT - 12))
+
struct mm_struct;
extern unsigned long arch_randomize_brk(struct mm_struct *);
#define arch_randomize_brk arch_randomize_brk
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8a488c22a99f..809905a811ed 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -181,9 +181,12 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
return 1;
}
+/*
+ * Copy architecture-specific thread state
+ */
int
copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long arg, struct task_struct *p)
+ unsigned long kthread_arg, struct task_struct *p)
{
struct pt_regs *cregs = &(p->thread.regs);
void *stack = task_stack_page(p);
@@ -195,11 +198,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
extern void * const child_return;
if (unlikely(p->flags & PF_KTHREAD)) {
+ /* kernel thread */
memset(cregs, 0, sizeof(struct pt_regs));
if (!usp) /* idle thread */
return 0;
-
- /* kernel thread */
/* Must exit via ret_from_kernel_thread in order
* to call schedule_tail()
*/
@@ -215,7 +217,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
#else
cregs->gr[26] = usp;
#endif
- cregs->gr[25] = arg;
+ cregs->gr[25] = kthread_arg;
} else {
/* user thread */
/* usp must be word aligned. This also prevents users from
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index e1ffea2f9a0b..5aba01ac457f 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -77,6 +77,9 @@ static unsigned long mmap_upper_limit(void)
if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX;
+ /* Add space for stack randomization. */
+ stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
return PAGE_ALIGN(STACK_TOP - stack_base);
}
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 47ee620d15d2..6548fd1d2e62 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -26,9 +26,9 @@
#include <linux/console.h>
#include <linux/bug.h>
#include <linux/ratelimit.h>
+#include <linux/uaccess.h>
#include <asm/assembly.h>
-#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/traps.h>
@@ -800,7 +800,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
* unless pagefault_disable() was called before.
*/
- if (fault_space == 0 && !in_atomic())
+ if (fault_space == 0 && !faulthandler_disabled())
{
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
parisc_terminate("Kernel Fault", regs, code, fault_address);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e5120e653240..15503adddf4f 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -15,8 +15,8 @@
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/traps.h>
/* Various important other fields */
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
int fault;
unsigned int flags;
- if (in_atomic())
+ if (pagefault_disabled())
goto no_context;
tsk = current;
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be111ff..51ccc7232042 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
@@ -89,5 +89,6 @@ do { \
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
+#define smp_mb__before_spinlock() smp_mb()
#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68fe7f0..ad6263cffb0f 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
*/
-#define __HAVE_ARCH_CMPXCHG 1
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 5f1048eaa5b6..8b3b46b7b0f2 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -87,7 +87,7 @@ static inline int prrn_is_enabled(void)
#include <asm/smp.h>
#define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu))
-#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
#endif
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 15c99b649b04..b2eb4686bd8f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
uint64_t nip, uint64_t addr)
{
uint64_t srr1;
- int index = __this_cpu_inc_return(mce_nest_count);
+ int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
/*
@@ -184,7 +184,7 @@ void machine_check_queue_event(void)
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
- index = __this_cpu_inc_return(mce_queue_count);
+ index = __this_cpu_inc_return(mce_queue_count) - 1;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
__this_cpu_dec(mce_queue_count);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index f096e72262f4..1db685104ffc 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -213,6 +213,7 @@ SECTIONS
*(.opd)
}
+ . = ALIGN(256);
.got : AT(ADDR(.got) - LOAD_OFFSET) {
__toc_start = .;
#ifndef CONFIG_RELOCATABLE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d2ecc9..df81caab7383 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
*/
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu, *vnext;
int i;
int srcu_idx;
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+ arch.run_list) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 3cf529ceec5b..ac93a3bd2730 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -27,11 +27,11 @@ int enter_vmx_usercopy(void)
if (in_interrupt())
return 0;
- /* This acts as preempt_disable() as well and will make
- * enable_kernel_altivec(). We need to disable page faults
- * as they can call schedule and thus make us lose the VMX
- * context. So on page faults, we just fail which will cause
- * a fallback to the normal non-vmx copy.
+ preempt_disable();
+ /*
+ * We need to disable page faults as they can call schedule and
+ * thus make us lose the VMX context. So on page faults, we just
+ * fail which will cause a fallback to the normal non-vmx copy.
*/
pagefault_disable();
@@ -47,6 +47,7 @@ int enter_vmx_usercopy(void)
int exit_vmx_usercopy(void)
{
pagefault_enable();
+ preempt_enable();
return 0;
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b396868d2aa7..6d535973b200 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -33,13 +33,13 @@
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <linux/hugetlb.h>
+#include <linux/uaccess.h>
#include <asm/firmware.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
@@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- if (in_atomic() || mm == NULL) {
+ if (faulthandler_disabled() || mm == NULL) {
if (!user_mode(regs)) {
rc = SIGSEGV;
goto bail;
}
- /* in_atomic() in user mode is really bad,
+ /* faulthandler_disabled() in user mode is really bad,
as is current->mm == NULL. */
printk(KERN_EMERG "Page fault in user mode with "
- "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+ "faulthandler_disabled() = %d mm = %p\n",
+ faulthandler_disabled(), mm);
printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
regs->nip, regs->msr);
die("Weird page fault", regs, SIGSEGV);
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index e7450bdbe83a..e292c8a60952 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
unsigned long vaddr;
int idx, type;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr)
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
+ preempt_enable();
return;
}
@@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr)
kmap_atomic_idx_pop();
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ce968b00b7c..3385e3d0506e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
- pte_t *ptep;
- struct page *page;
+ pte_t *ptep, pte;
unsigned shift;
unsigned long mask, flags;
+ struct page *page = ERR_PTR(-EINVAL);
+
+ local_irq_save(flags);
+ ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+ if (!ptep)
+ goto no_page;
+ pte = READ_ONCE(*ptep);
/*
+ * Verify it is a huge page else bail.
* Transparent hugepages are handled by generic code. We can skip them
* here.
*/
- local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+ if (!shift || pmd_trans_huge(__pmd(pte_val(pte))))
+ goto no_page;
- /* Verify it is a huge page else bail. */
- if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
- local_irq_restore(flags);
- return ERR_PTR(-EINVAL);
+ if (!pte_present(pte)) {
+ page = NULL;
+ goto no_page;
}
mask = (1UL << shift) - 1;
- page = pte_page(*ptep);
+ page = pte_page(pte);
if (page)
page += (address & mask) / PAGE_SIZE;
+no_page:
local_irq_restore(flags);
return page;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 59daa5eeec25..6bfadf1aa5cb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
* hash fault look at them.
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
+ /*
+ * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * find_linux_pte_or_hugepage to finish.
+ */
+ kick_all_cpus_sync();
return old_pmd;
}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index cbd3d069897f..723a099f6be3 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -217,7 +217,7 @@ static DEFINE_RAW_SPINLOCK(tlbivax_lock);
static int mm_is_core_local(struct mm_struct *mm)
{
return cpumask_subset(mm_cpumask(mm),
- topology_thread_cpumask(smp_processor_id()));
+ topology_sibling_cpumask(smp_processor_id()));
}
struct tlb_flush_param {
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 7940dc90e80b..b258110da952 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -16,11 +16,12 @@
#define GHASH_DIGEST_SIZE 16
struct ghash_ctx {
- u8 icv[16];
- u8 key[16];
+ u8 key[GHASH_BLOCK_SIZE];
};
struct ghash_desc_ctx {
+ u8 icv[GHASH_BLOCK_SIZE];
+ u8 key[GHASH_BLOCK_SIZE];
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
memset(dctx, 0, sizeof(*dctx));
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
return 0;
}
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
}
memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
- memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
return 0;
}
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
unsigned int n;
u8 *buf = dctx->buffer;
int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
src += n;
if (!dctx->bytes) {
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
n = srclen & ~(GHASH_BLOCK_SIZE - 1);
if (n) {
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
if (ret != n)
return -EIO;
src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
return 0;
}
-static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static int ghash_flush(struct ghash_desc_ctx *dctx)
{
u8 *buf = dctx->buffer;
int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
memset(pos, 0, dctx->bytes);
- ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+ ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
+
+ dctx->bytes = 0;
}
- dctx->bytes = 0;
return 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
int ret;
- ret = ghash_flush(ctx, dctx);
+ ret = ghash_flush(dctx);
if (!ret)
- memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
return ret;
}
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 1f374b39a4ec..9d5192c94963 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
/* fill page with urandom bytes */
get_random_bytes(pg, PAGE_SIZE);
/* exor page with stckf values */
- for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) {
+ for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
u64 *p = ((u64 *)pg) + n;
*p ^= get_tod_clock_fast();
}
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..e6f8615a11eb 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec466b8c..411464f4c97a 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
__old; \
})
-#define __HAVE_ARCH_CMPXCHG
-
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \
register __typeof__(*(p1)) __old1 asm("2") = (o1); \
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fc642399b489..ef24a212eeb7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
}
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
{
unsigned long origin_mask;
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index b1453a2ae1ca..4990f6c66288 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
#define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_sibling_cpumask(cpu) \
+ (&per_cpu(cpu_topology, cpu).thread_mask)
#define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id)
#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d64a7a62164f..9dd4cc47ddc7 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from,
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn));
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space.
*
@@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct")
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space.
*
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n
* strlen_user: - Get the size of a string in user space.
* @str: The string to measure.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Get the size of a NUL-terminated string in user space.
*
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 76515bcea2f1..4c8f5d7f9c23 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
* user context.
*/
fault = VM_FAULT_BADCONTEXT;
- if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
goto out;
address = trans_exc_code & __FAIL_ADDR_MASK;
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index ba8593a515ba..de156ba3bd71 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -48,7 +48,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* We get 160 bytes stack space from calling function, but only use
* 11 * 8 byte (old backchain + r15 - r6) for storing registers.
*/
-#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED (160 - 11 * 8)
+#define STK_OFF (STK_SPACE - STK_160_UNUSED)
#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7690dc8e1ab5..55423d8be580 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -384,13 +384,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
}
/* Setup stack and backchain */
if (jit->seen & SEEN_STACK) {
- /* lgr %bfp,%r15 (BPF frame pointer) */
- EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+ if (jit->seen & SEEN_FUNC)
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
+ /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
if (jit->seen & SEEN_FUNC)
- /* stg %bfp,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
/*
@@ -443,8 +446,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
/*
* Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
*/
-static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
@@ -588,8 +594,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
- case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -602,10 +608,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* llgfr %dst,%src (u32 cast) */
- EMIT4(0xb9160000, dst_reg, src_reg);
/* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, dst_reg);
+ EMIT4(0xb9870000, REG_W0, src_reg);
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -632,8 +636,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
- case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -649,7 +653,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
EMIT4(0xb9040000, REG_W1, dst_reg);
/* dlg %w0,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64((u32) imm));
+ EMIT_CONST_U64(imm));
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h
index f384839c3ee5..cc3f6420b71c 100644
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
(unsigned long)(o), \
(unsigned long)(n)))
-#define __HAVE_ARCH_CMPXCHG 1
-
#include <asm-generic/cmpxchg-local.h>
#endif /* _ASM_SCORE_CMPXCHG_H */
diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h
index ab66ddde777b..20a3591225cc 100644
--- a/arch/score/include/asm/uaccess.h
+++ b/arch/score/include/asm/uaccess.h
@@ -36,7 +36,8 @@
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
@@ -61,7 +62,8 @@
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -79,7 +81,8 @@
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -98,7 +101,8 @@
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -119,7 +123,8 @@
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
diff --git a/arch/score/lib/string.S b/arch/score/lib/string.S
index 00b7d3a2fc60..16efa3ad037f 100644
--- a/arch/score/lib/string.S
+++ b/arch/score/lib/string.S
@@ -175,10 +175,10 @@ ENTRY(__clear_user)
br r3
.section .fixup, "ax"
+99:
br r3
.previous
.section __ex_table, "a"
.align 2
-99:
.word 0b, 99b
.previous
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 6860beb2a280..37a6c2e0e969 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/uaccess.h>
/*
* This routine handles page faults. It determines the address,
@@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (pagefault_disabled() || !mm)
goto bad_area_nosemaphore;
if (user_mode(regs))
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 43715308b068..bf91037db4e0 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#endif
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index f6bd1406b897..85c97b188d71 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
* if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void);
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
unsigned long new, int size)
{
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index a58fec9b55e0..79d8276377d1 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -17,6 +17,7 @@
#include <linux/kprobes.h>
#include <linux/perf_event.h>
#include <linux/kdebug.h>
+#include <linux/uaccess.h>
#include <asm/io_trapped.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
/*
* If we're in an interrupt, have no user context or are running
- * in an atomic region then we must not take the fault:
+ * with pagefaults disabled then we must not take the fault:
*/
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(faulthandler_disabled() || !mm)) {
bad_area_nosemaphore(regs, error_code, address);
return;
}
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 76648941fea7..809941e33e12 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define dma_rmb() rmb()
#define dma_wmb() wmb()
-#define set_mb(__var, __value) \
- do { __var = __value; membar_safe("#StoreLoad"); } while(0)
+#define smp_store_mb(__var, __value) \
+ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
#ifdef CONFIG_SMP
#define smp_mb() mb()
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index d38b52dca216..83ffb83c5397 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
*
* Cribbed from <asm-parisc/atomic.h>
*/
-#define __HAVE_ARCH_CMPXCHG 1
/* bug catcher for when unsupported size is used - won't link */
void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 0e1ed6cfbf68..faa2f61058c2 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
#include <asm-generic/cmpxchg-local.h>
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long
__cmpxchg_u32(volatile int *m, int old, int new)
{
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6e424d185d0..a6cfdabb6054 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,7 +24,8 @@ typedef struct {
unsigned int icache_line_size;
unsigned int ecache_size;
unsigned int ecache_line_size;
- int core_id;
+ unsigned short sock_id;
+ unsigned short core_id;
int proc_id;
} cpuinfo_sparc;
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index dc165ebdf05a..2a52c91d2c8a 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
" sllx %1, 32, %1\n"
" or %0, %1, %0\n"
" .previous\n"
+ " .section .sun_m7_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sethi %%hi(%4), %0\n"
+ " .word 662b\n"
+ " or %1, %%ulo(%4), %1\n"
+ " or %0, %%lo(%4), %0\n"
+ " .word 663b\n"
+ " sllx %1, 32, %1\n"
+ " or %0, %1, %0\n"
+ " .previous\n"
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
_PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
+ _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V),
+ "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+ _PAGE_CP_4V | _PAGE_E_4V |
_PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
@@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
" andn %0, %4, %0\n"
" or %0, %5, %0\n"
" .previous\n"
+ " .section .sun_m7_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " andn %0, %6, %0\n"
+ " or %0, %5, %0\n"
+ " .previous\n"
: "=r" (val)
: "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
- "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V));
+ "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V),
+ "i" (_PAGE_CP_4V));
return __pgprot(val);
}
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index ed8f071132e4..01d17046225a 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
#ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
-#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
+#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#endif /* CONFIG_SMP */
extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_map[NR_CPUS];
static inline const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_core_map[cpu];
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 6fd4436d32f0..ec9c04de3664 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry {
};
extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
__sun4v_2insn_patch_end;
+extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch,
+ __sun_m7_2insn_patch_end;
#endif /* !(__ASSEMBLY__) */
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 07cc49e541f4..0f679421b468 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
struct sun4v_1insn_patch_entry *);
void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
struct sun4v_2insn_patch_entry *);
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+ struct sun4v_2insn_patch_entry *);
extern unsigned int dcache_parity_tl1_occurred;
extern unsigned int icache_parity_tl1_occurred;
diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
index 94e392bdee7d..814fb1729b12 100644
--- a/arch/sparc/kernel/leon_pci_grpci2.c
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev)
err = -ENOMEM;
goto err1;
}
- memset(grpci2priv, 0, sizeof(*grpci2priv));
priv->regs = regs;
priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */
priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT;
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 26c80e18d7b1..6f80936e0eea 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp)
}
}
-static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
+static void find_back_node_value(struct mdesc_handle *hp, u64 node,
+ char *srch_val,
+ void (*func)(struct mdesc_handle *, u64, int),
+ u64 val, int depth)
{
- u64 a;
-
- mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
- u64 t = mdesc_arc_target(hp, a);
- const char *name;
- const u64 *id;
+ u64 arc;
- name = mdesc_node_name(hp, t);
- if (!strcmp(name, "cpu")) {
- id = mdesc_get_property(hp, t, "id", NULL);
- if (*id < NR_CPUS)
- cpu_data(*id).core_id = core_id;
- } else {
- u64 j;
+ /* Since we have an estimate of recursion depth, do a sanity check. */
+ if (depth == 0)
+ return;
- mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
- u64 n = mdesc_arc_target(hp, j);
- const char *n_name;
+ mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) {
+ u64 n = mdesc_arc_target(hp, arc);
+ const char *name = mdesc_node_name(hp, n);
- n_name = mdesc_node_name(hp, n);
- if (strcmp(n_name, "cpu"))
- continue;
+ if (!strcmp(srch_val, name))
+ (*func)(hp, n, val);
- id = mdesc_get_property(hp, n, "id", NULL);
- if (*id < NR_CPUS)
- cpu_data(*id).core_id = core_id;
- }
- }
+ find_back_node_value(hp, n, srch_val, func, val, depth-1);
}
}
+static void __mark_core_id(struct mdesc_handle *hp, u64 node,
+ int core_id)
+{
+ const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+ if (*id < num_possible_cpus())
+ cpu_data(*id).core_id = core_id;
+}
+
+static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
+ int sock_id)
+{
+ const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+ if (*id < num_possible_cpus())
+ cpu_data(*id).sock_id = sock_id;
+}
+
+static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
+ int core_id)
+{
+ find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
+}
+
+static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
+ int sock_id)
+{
+ find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+}
+
static void set_core_ids(struct mdesc_handle *hp)
{
int idx;
u64 mp;
idx = 1;
+
+ /* Identify unique cores by looking for cpus backpointed to by
+ * level 1 instruction caches.
+ */
mdesc_for_each_node_by_name(hp, mp, "cache") {
const u64 *level;
const char *type;
@@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp)
continue;
mark_core_ids(hp, mp, idx);
+ idx++;
+ }
+}
+
+static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+{
+ u64 mp;
+ int idx = 1;
+ int fnd = 0;
+
+ /* Identify unique sockets by looking for cpus backpointed to by
+ * shared level n caches.
+ */
+ mdesc_for_each_node_by_name(hp, mp, "cache") {
+ const u64 *cur_lvl;
+
+ cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
+ if (*cur_lvl != level)
+ continue;
+
+ mark_sock_ids(hp, mp, idx);
+ idx++;
+ fnd = 1;
+ }
+ return fnd;
+}
+
+static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp)
+{
+ int idx = 1;
+ mdesc_for_each_node_by_name(hp, mp, "socket") {
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
+ const u64 *id;
+
+ name = mdesc_node_name(hp, t);
+ if (strcmp(name, "cpu"))
+ continue;
+
+ id = mdesc_get_property(hp, t, "id", NULL);
+ if (*id < num_possible_cpus())
+ cpu_data(*id).sock_id = idx;
+ }
idx++;
}
}
+static void set_sock_ids(struct mdesc_handle *hp)
+{
+ u64 mp;
+
+ /* If machine description exposes sockets data use it.
+ * Otherwise fallback to use shared L3 or L2 caches.
+ */
+ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
+ if (mp != MDESC_NODE_NULL)
+ return set_sock_ids_by_socket(hp, mp);
+
+ if (!set_sock_ids_by_cache(hp, 3))
+ set_sock_ids_by_cache(hp, 2);
+}
+
static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
{
u64 a;
@@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
continue;
mark_proc_ids(hp, mp, idx);
-
idx++;
}
}
@@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask)
set_core_ids(hp);
set_proc_ids(hp);
+ set_sock_ids(hp);
mdesc_release(hp);
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 6f7251fd2eab..c928bc64b4ba 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1002,6 +1002,38 @@ static int __init pcibios_init(void)
subsys_initcall(pcibios_init);
#ifdef CONFIG_SYSFS
+
+#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */
+
+static void pcie_bus_slot_names(struct pci_bus *pbus)
+{
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+
+ list_for_each_entry(pdev, &pbus->devices, bus_list) {
+ char name[SLOT_NAME_SIZE];
+ struct pci_slot *pci_slot;
+ const u32 *slot_num;
+ int len;
+
+ slot_num = of_get_property(pdev->dev.of_node,
+ "physical-slot#", &len);
+
+ if (slot_num == NULL || len != 4)
+ continue;
+
+ snprintf(name, sizeof(name), "%u", slot_num[0]);
+ pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL);
+
+ if (IS_ERR(pci_slot))
+ pr_err("PCI: pci_create_slot returned %ld.\n",
+ PTR_ERR(pci_slot));
+ }
+
+ list_for_each_entry(bus, &pbus->children, node)
+ pcie_bus_slot_names(bus);
+}
+
static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus)
{
const struct pci_slot_names {
@@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void)
while ((pbus = pci_find_next_bus(pbus)) != NULL) {
struct device_node *node;
+ struct pci_dev *pdev;
+
+ pdev = list_first_entry(&pbus->devices, struct pci_dev,
+ bus_list);
- if (pbus->self) {
- /* PCI->PCI bridge */
- node = pbus->self->dev.of_node;
+ if (pdev && pci_is_pcie(pdev)) {
+ pcie_bus_slot_names(pbus);
} else {
- struct pci_pbm_info *pbm = pbus->sysdata;
- /* Host PCI controller */
- node = pbm->op->dev.of_node;
- }
+ if (pbus->self) {
+
+ /* PCI->PCI bridge */
+ node = pbus->self->dev.of_node;
+
+ } else {
+ struct pci_pbm_info *pbm = pbus->sysdata;
- pci_bus_slot_names(node, pbus);
+ /* Host PCI controller */
+ node = pbm->op->dev.of_node;
+ }
+
+ pci_bus_slot_names(node, pbus);
+ }
}
return 0;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index c38d19fc27ba..f7b261749383 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
}
}
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+ struct sun4v_2insn_patch_entry *end)
+{
+ while (start < end) {
+ unsigned long addr = start->addr;
+
+ *(unsigned int *) (addr + 0) = start->insns[0];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 0));
+
+ *(unsigned int *) (addr + 4) = start->insns[1];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 4));
+
+ start++;
+ }
+}
+
static void __init sun4v_patch(void)
{
extern void sun4v_hvapi_init(void);
@@ -267,6 +285,9 @@ static void __init sun4v_patch(void)
sun4v_patch_2insn_range(&__sun4v_2insn_patch,
&__sun4v_2insn_patch_end);
+ if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7)
+ sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
+ &__sun_m7_2insn_patch_end);
sun4v_hvapi_init();
}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 61139d9924ca..19cd08d18672 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL(cpu_core_sib_map);
static cpumask_t smp_commenced_mask;
@@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void)
}
}
+ for_each_present_cpu(i) {
+ unsigned int j;
+
+ for_each_present_cpu(j) {
+ if (cpu_data(i).sock_id == cpu_data(j).sock_id)
+ cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+ }
+ }
+
for_each_present_cpu(i) {
unsigned int j;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 09243057cb0b..f1a2f688b28a 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -138,6 +138,11 @@ SECTIONS
*(.pause_3insn_patch)
__pause_3insn_patch_end = .;
}
+ .sun_m7_2insn_patch : {
+ __sun_m7_2insn_patch = .;
+ *(.sun_m7_2insn_patch)
+ __sun_m7_2insn_patch_end = .;
+ }
PERCPU_SECTION(SMP_CACHE_BYTES)
. = ALIGN(PAGE_SIZE);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 70d817154fe8..c399e7b3b035 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -21,6 +21,7 @@
#include <linux/perf_event.h>
#include <linux/interrupt.h>
#include <linux/kdebug.h>
+#include <linux/uaccess.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -29,7 +30,6 @@
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/traps.h>
-#include <asm/uaccess.h>
#include "mm_32.h"
@@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (pagefault_disabled() || !mm)
goto no_context;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 479823249429..e9268ea1a68d 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -22,12 +22,12 @@
#include <linux/kdebug.h>
#include <linux/percpu.h>
#include <linux/context_tracking.h>
+#include <linux/uaccess.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/openprom.h>
#include <asm/oplib.h>
-#include <asm/uaccess.h>
#include <asm/asi.h>
#include <asm/lsu.h>
#include <asm/sections.h>
@@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto intr_or_no_mm;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 449f864f0cef..a454ec5ff07a 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page)
unsigned long vaddr;
long idx, type;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr)
if (vaddr < FIXADDR_START) { // FIXME
pagefault_enable();
+ preempt_enable();
return;
}
@@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr)
kmap_atomic_idx_pop();
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4ca0d6ba5ec8..c5d08b89a96c 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -54,6 +54,7 @@
#include "init_64.h"
unsigned long kern_linear_pte_xor[4] __read_mostly;
+static unsigned long page_cache4v_flag;
/* A bitmap, two bits for every 256MB of physical memory. These two
* bits determine what page size we use for kernel linear
@@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void)
static void __init sun4v_linear_pte_xor_finalize(void)
{
+ unsigned long pagecv_flag;
+
+ /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
+ * enables MCD error. Do not set bit 9 on M7 processor.
+ */
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_SPARC_M7:
+ pagecv_flag = 0x00;
+ break;
+ default:
+ pagecv_flag = _PAGE_CV_4V;
+ break;
+ }
#ifndef CONFIG_DEBUG_PAGEALLOC
if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
PAGE_OFFSET;
- kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
@@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
PAGE_OFFSET;
- kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
@@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
PAGE_OFFSET;
- kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
@@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void)
return available;
}
+#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
+#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
+#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
+#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
+#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
+#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
+
/* We need to exclude reserved regions. This exclusion will include
* vmlinux and initrd. To be more precise the initrd size could be used to
* compute a new lower limit because it is freed later during initialization.
@@ -2034,6 +2055,25 @@ void __init paging_init(void)
memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
#endif
+ /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
+ * bit on M7 processor. This is a conflicting usage of the same
+ * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
+ * Detection error on all pages and this will lead to problems
+ * later. Kernel does not run with MCD enabled and hence rest
+ * of the required steps to fully configure memory corruption
+ * detection are not taken. We need to ensure TTE.mcde is not
+ * set on M7 processor. Compute the value of cacheability
+ * flag for use later taking this into consideration.
+ */
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_SPARC_M7:
+ page_cache4v_flag = _PAGE_CP_4V;
+ break;
+ default:
+ page_cache4v_flag = _PAGE_CACHE_4V;
+ break;
+ }
+
if (tlb_type == hypervisor)
sun4v_pgprot_init();
else
@@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
#endif
-#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
-#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
-#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
-#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
-#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
-#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
-
pgprot_t PAGE_KERNEL __read_mostly;
EXPORT_SYMBOL(PAGE_KERNEL);
@@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
_PAGE_P_4U | _PAGE_W_4U);
if (tlb_type == hypervisor)
pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
- _PAGE_CP_4V | _PAGE_CV_4V |
- _PAGE_P_4V | _PAGE_W_4V);
+ page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
pte_base |= _PAGE_PMD_HUGE;
@@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void)
int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
- _PAGE_CACHE_4V | _PAGE_P_4V |
+ page_cache4v_flag | _PAGE_P_4V |
__ACCESS_BITS_4V | __DIRTY_BITS_4V |
_PAGE_EXEC_4V);
PAGE_KERNEL_LOCKED = PAGE_KERNEL;
_PAGE_IE = _PAGE_IE_4V;
_PAGE_E = _PAGE_E_4V;
- _PAGE_CACHE = _PAGE_CACHE_4V;
+ _PAGE_CACHE = page_cache4v_flag;
#ifdef CONFIG_DEBUG_PAGEALLOC
kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
@@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void)
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
PAGE_OFFSET;
#endif
- kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
- _PAGE_P_4V | _PAGE_W_4V);
+ kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
+ _PAGE_W_4V);
for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
@@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void)
_PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
_PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
- page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
- page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
+ page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
__ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
- page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
__ACCESS_BITS_4V | _PAGE_EXEC_4V);
- page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
__ACCESS_BITS_4V | _PAGE_EXEC_4V);
page_exec_bit = _PAGE_EXEC_4V;
@@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr)
_PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
if (tlb_type == hypervisor)
val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
- _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
+ page_cache4v_flag | _PAGE_P_4V |
_PAGE_EXEC_4V | _PAGE_W_4V);
return val | paddr;
@@ -2706,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs)
struct mm_struct *mm = current->mm;
struct tsb_config *tp;
- if (in_atomic() || !mm) {
+ if (faulthandler_disabled() || !mm) {
const struct exception_table_entry *entry;
entry = search_exception_tables(regs->tpc);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5fadd42..0496970cef82 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-/* Define this to indicate that cmpxchg is an efficient operation. */
-#define __HAVE_ARCH_CMPXCHG
-
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 938311844233..76b0d0ebb244 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -55,7 +55,7 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define topology_physical_package_id(cpu) ((void)(cpu), 0)
#define topology_core_id(cpu) (cpu)
#define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask)
-#define topology_thread_cpumask(cpu) cpumask_of(cpu)
+#define topology_sibling_cpumask(cpu) cpumask_of(cpu)
#endif
#endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index f41cb53cf645..a33276bf5ca1 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsigned long size);
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
@@ -192,7 +193,8 @@ extern int __get_user_bad(void)
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -274,7 +276,8 @@ extern int __put_user_bad(void)
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -330,7 +333,8 @@ extern int __put_user_bad(void)
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -437,7 +442,8 @@ static inline unsigned long __must_check copy_from_user(void *to,
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to user space. Caller must check
* the specified blocks with access_ok() before calling this function.
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index e83cc999da02..3f4f58d34a92 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs,
/*
* If we're in an interrupt, have no user context or are running in an
- * atomic region then we must not take the fault.
+ * region with pagefaults disabled then we must not take the fault.
*/
- if (in_atomic() || !mm) {
+ if (pagefault_disabled() || !mm) {
vma = NULL; /* happy compiler */
goto bad_area_nosemaphore;
}
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 6aa2f2625447..fcd545014e79 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
int idx, type;
pte_t *pte;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
/* Avoid icache flushes by disallowing atomic executable mappings. */
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
}
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 8e4daf44e980..47ff9b7f3e5d 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/hardirq.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -35,10 +36,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
*code_out = SEGV_MAPERR;
/*
- * If the fault was during atomic operation, don't take the fault, just
+ * If the fault was with pagefaults disabled, don't take the fault, just
* fail.
*/
- if (in_atomic())
+ if (faulthandler_disabled())
goto out_nosemaphore;
if (is_user)
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 0dc922dba915..afccef5529cc 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 226d5696e1d1..4e986e809861 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,7 +127,8 @@ config X86
select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32
select ARCH_USE_BUILTIN_BSWAP
- select ARCH_USE_QUEUE_RWLOCK
+ select ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_USE_QUEUED_RWLOCKS
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select OLD_SIGACTION if X86_32
select COMPAT_OLD_SIGACTION if IA32_EMULATION
@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks"
depends on PARAVIRT && SMP
- select UNINLINE_SPIN_UNLOCK
+ select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
---help---
Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 72484a645f05..2fd3ebbb4e33 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -332,4 +332,16 @@ config X86_DEBUG_STATIC_CPU_HAS
If unsure, say N.
+config X86_DEBUG_FPU
+ bool "Debug the x86 FPU code"
+ depends on DEBUG_KERNEL
+ default y
+ ---help---
+ If this option is enabled then there will be extra sanity
+ checks and (boot time) debug printouts added to the kernel.
+ This debugging adds some small amount of runtime overhead
+ to the kernel.
+
+ If unsure, say N.
+
endmenu
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ef17683484e9..48304b89b601 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1109,6 +1109,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
if (!cmdline_ptr)
goto fail;
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+ /* Fill in upper bits of command line address, NOP on 32 bit */
+ boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
hdr->ramdisk_image = 0;
hdr->ramdisk_size = 0;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 89dd0d78013a..805d25ca5f1d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -2,15 +2,14 @@
#define BOOT_COMPRESSED_MISC_H
/*
- * we have to be careful, because no indirections are allowed here, and
- * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
- * we just keep it from happening
+ * Special hack: we have to be careful, because no indirections are allowed here,
+ * and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening. (This list needs to be extended when new
+ * paravirt and debugging variants are added.)
*/
#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
-#ifdef CONFIG_X86_32
-#define _ASM_X86_DESC_H 1
-#endif
#include <linux/linkage.h>
#include <linux/screen_info.h>
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 112cefacf2af..b419f43ce0c5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -32,7 +32,7 @@
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/cpu_device_id.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/aes.h>
#include <crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index baf0ac21ace5..4c65c70e628b 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -19,8 +19,7 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
@@ -561,16 +560,15 @@ static struct crypto_alg cmll_algs[10] = { {
static int __init camellia_aesni_init(void)
{
- u64 xcr0;
+ const char *feature_name;
if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX2 detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 78818a1e73e3..80a0e4389c9a 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -19,8 +19,7 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
@@ -553,16 +552,10 @@ static struct crypto_alg cmll_algs[10] = { {
static int __init camellia_aesni_init(void)
{
- u64 xcr0;
+ const char *feature_name;
- if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
- pr_info("AVX or AES-NI instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 236c80974457..be00aa48b2b5 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -31,8 +31,7 @@
#include <crypto/cast5.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/glue_helper.h>
#define CAST5_PARALLEL_BLOCKS 16
@@ -468,16 +467,10 @@ static struct crypto_alg cast5_algs[6] = { {
static int __init cast5_init(void)
{
- u64 xcr0;
+ const char *feature_name;
- if (!cpu_has_avx || !cpu_has_osxsave) {
- pr_info("AVX instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index f448810ca4ac..5dbba7224221 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -36,8 +36,7 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/glue_helper.h>
#define CAST6_PARALLEL_BLOCKS 8
@@ -590,16 +589,10 @@ static struct crypto_alg cast6_algs[10] = { {
static int __init cast6_init(void)
{
- u64 xcr0;
+ const char *feature_name;
- if (!cpu_has_avx || !cpu_has_osxsave) {
- pr_info("AVX instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 1937fc1d8763..07d2c6c86a54 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -35,7 +35,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 28640c3d6af7..81a595d75cf5 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -32,8 +32,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index b6c67bf30fdf..a3fcfc97a311 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -29,7 +29,7 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index f368ba261739..5a2f30f9f52d 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -18,7 +18,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/crypto.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
struct crypto_fpu_ctx {
struct crypto_blkcipher *child;
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 2079baf06bdd..64d7cf1b50e1 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -19,7 +19,7 @@
#include <crypto/cryptd.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 2f63dc89e7a9..7d838dc4d888 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -20,8 +20,7 @@
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <crypto/serpent.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/serpent-avx.h>
#include <asm/crypto/glue_helper.h>
@@ -537,16 +536,14 @@ static struct crypto_alg srp_algs[10] = { {
static int __init init(void)
{
- u64 xcr0;
+ const char *feature_name;
if (!cpu_has_avx2 || !cpu_has_osxsave) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index c8d478af8456..da7dafc9b16d 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -36,8 +36,7 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/serpent-avx.h>
#include <asm/crypto/glue_helper.h>
@@ -596,16 +595,10 @@ static struct crypto_alg serpent_algs[10] = { {
static int __init serpent_init(void)
{
- u64 xcr0;
+ const char *feature_name;
- if (!cpu_has_avx || !cpu_has_osxsave) {
- printk(KERN_INFO "AVX instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- printk(KERN_INFO "AVX detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index e510b1c5d690..f53ed1dc88ea 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -65,11 +65,8 @@
#include <crypto/mcryptd.h>
#include <crypto/crypto_wq.h>
#include <asm/byteorder.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
#include <linux/hardirq.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
#include "sha_mb_ctx.h"
#define FLUSH_INTERVAL 1000 /* in usec */
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 33d1b9dc14cc..7c48e8b20848 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -29,9 +29,7 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
@@ -123,15 +121,9 @@ static struct shash_alg alg = {
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
- u64 xcr0;
-
- if (!cpu_has_avx || !cpu_has_osxsave)
- return false;
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
-
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+ if (cpu_has_avx)
+ pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index ccc338881ee8..f8097fc0d1d1 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -37,9 +37,7 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <linux/string.h>
asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
@@ -132,15 +130,9 @@ static struct shash_alg algs[] = { {
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
- u64 xcr0;
-
- if (!cpu_has_avx || !cpu_has_osxsave)
- return false;
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
-
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+ if (cpu_has_avx)
+ pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index d9fa4c1e063f..2edad7b81870 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -35,9 +35,7 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <linux/string.h>
@@ -131,15 +129,9 @@ static struct shash_alg algs[] = { {
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
- u64 xcr0;
-
- if (!cpu_has_avx || !cpu_has_osxsave)
- return false;
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
-
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+ if (cpu_has_avx)
+ pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index b5e2d5651851..c2bd0ce718ee 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -36,9 +36,7 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
+#include <asm/fpu/api.h>
#include <asm/crypto/twofish.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/scatterwalk.h>
@@ -558,16 +556,10 @@ static struct crypto_alg twofish_algs[10] = { {
static int __init twofish_init(void)
{
- u64 xcr0;
+ const char *feature_name;
- if (!cpu_has_avx || !cpu_has_osxsave) {
- printk(KERN_INFO "AVX instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- printk(KERN_INFO "AVX detected but unusable.\n");
+ if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index c81d35e6c7f1..ae3a29ae875b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -21,8 +21,8 @@
#include <linux/binfmts.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
#include <asm/user32.h>
@@ -198,7 +198,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
} get_user_catch(err);
- err |= restore_xstate_sig(buf, 1);
+ err |= fpu__restore_sig(buf, 1);
force_iret();
@@ -308,6 +308,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
size_t frame_size,
void __user **fpstate)
{
+ struct fpu *fpu = &current->thread.fpu;
unsigned long sp;
/* Default to using normal stack */
@@ -322,12 +323,12 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
- if (used_math()) {
+ if (fpu->fpstate_active) {
unsigned long fx_aligned, math_size;
- sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
+ sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
*fpstate = (struct _fpstate_ia32 __user *) sp;
- if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
+ if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
math_size) < 0)
return (void __user *) -1L;
}
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ba32af062f61..7bfc85bbb8ff 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -52,6 +52,12 @@ struct alt_instr {
u8 padlen; /* length of build-time padding */
} __packed;
+/*
+ * Debug flag that can be tested to see whether alternative
+ * instructions were patched in already:
+ */
+extern int alternatives_patched;
+
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index aaac3b2fb746..1a5da2e63aee 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -98,11 +98,22 @@ static inline u16 amd_get_node_id(struct pci_dev *pdev)
return 0;
}
+static inline bool amd_gart_present(void)
+{
+ /* GART present only on Fam15h, upto model 0fh */
+ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+ (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
+ return true;
+
+ return false;
+}
+
#else
#define amd_nb_num(x) 0
#define amd_nb_has_feature(x) false
#define node_to_amd_nb(x) NULL
+#define amd_gart_present(x) false
#endif
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b81fe2..e51a8f803f55 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
#define smp_mb() mb()
#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#endif /* SMP */
#define read_barrier_depends() do { } while (0)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99c105d78b7e..ad19841eddfe 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
#include <linux/compiler.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
-#define __HAVE_ARCH_CMPXCHG 1
-
/*
* Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 1eef55596e82..03bb1065c335 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <linux/crypto.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <crypto/b128ops.h>
typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 808dae63eeea..1f5b7287d1ad 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -127,50 +127,14 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-static inline void *
+void *
dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *memory;
-
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
- return memory;
-
- if (!dev)
- dev = &x86_dma_fallback_dev;
-
- if (!is_device_dma_capable(dev))
- return NULL;
-
- if (!ops->alloc)
- return NULL;
-
- memory = ops->alloc(dev, size, dma_handle,
- dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
-
- return memory;
-}
+ gfp_t gfp, struct dma_attrs *attrs);
#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- WARN_ON(irqs_disabled()); /* for portability */
-
- if (dma_release_from_coherent(dev, get_order(size), vaddr))
- return;
-
- debug_dma_free_coherent(dev, size, vaddr, bus);
- if (ops->free)
- ops->free(dev, size, vaddr, bus, attrs);
-}
+void dma_free_attrs(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t bus,
+ struct dma_attrs *attrs);
#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 3738b138b843..155162ea0e00 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/pgtable.h>
/*
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
deleted file mode 100644
index da5e96756570..000000000000
--- a/arch/x86/include/asm/fpu-internal.h
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _FPU_INTERNAL_H
-#define _FPU_INTERNAL_H
-
-#include <linux/kernel_stat.h>
-#include <linux/regset.h>
-#include <linux/compat.h>
-#include <linux/slab.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/uaccess.h>
-#include <asm/xsave.h>
-#include <asm/smap.h>
-
-#ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
-# include <asm/user32.h>
-struct ksignal;
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
-#else
-# define user_i387_ia32_struct user_i387_struct
-# define user32_fxsr_struct user_fxsr_struct
-# define ia32_setup_frame __setup_frame
-# define ia32_setup_rt_frame __setup_rt_frame
-#endif
-
-extern unsigned int mxcsr_feature_mask;
-extern void fpu_init(void);
-extern void eager_fpu_init(void);
-
-DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
-
-extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
- struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
- const struct user_i387_ia32_struct *env);
-
-extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
- xstateregs_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
- xstateregs_set;
-
-/*
- * xstateregs_active == fpregs_active. Please refer to the comment
- * at the definition of fpregs_active.
- */
-#define xstateregs_active fpregs_active
-
-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
- per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-/*
- * Used to indicate that the FPU state in memory is newer than the FPU
- * state in registers, and the FPU state should be reloaded next time the
- * task is run. Only safe on the current task, or non-running tasks.
- */
-static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
-{
- tsk->thread.fpu.last_cpu = ~0;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
- return new == this_cpu_read_stable(fpu_owner_task) &&
- cpu == new->thread.fpu.last_cpu;
-}
-
-static inline int is_ia32_compat_frame(void)
-{
- return config_enabled(CONFIG_IA32_EMULATION) &&
- test_thread_flag(TIF_IA32);
-}
-
-static inline int is_ia32_frame(void)
-{
- return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
-}
-
-static inline int is_x32_frame(void)
-{
- return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
-}
-
-#define X87_FSW_ES (1 << 7) /* Exception Summary */
-
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
-}
-
-static __always_inline __pure bool use_xsaveopt(void)
-{
- return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
- return static_cpu_has_safe(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
- return static_cpu_has_safe(X86_FEATURE_FXSR);
-}
-
-static inline void fx_finit(struct i387_fxsave_struct *fx)
-{
- fx->cwd = 0x37f;
- fx->mxcsr = MXCSR_DEFAULT;
-}
-
-extern void __sanitize_i387_state(struct task_struct *);
-
-static inline void sanitize_i387_state(struct task_struct *tsk)
-{
- if (!use_xsaveopt())
- return;
- __sanitize_i387_state(tsk);
-}
-
-#define user_insn(insn, output, input...) \
-({ \
- int err; \
- asm volatile(ASM_STAC "\n" \
- "1:" #insn "\n\t" \
- "2: " ASM_CLAC "\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-#define check_insn(insn, output, input...) \
-({ \
- int err; \
- asm volatile("1:" #insn "\n\t" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-static inline int fsave_user(struct i387_fsave_struct __user *fx)
-{
- return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
-}
-
-static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
-{
- if (config_enabled(CONFIG_X86_32))
- return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
-
- /* See comment in fpu_fxsave() below. */
- return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
-}
-
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
- if (config_enabled(CONFIG_X86_32))
- return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in fpu_fxsave() below. */
- return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
-}
-
-static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
-{
- if (config_enabled(CONFIG_X86_32))
- return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in fpu_fxsave() below. */
- return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
-}
-
-static inline int frstor_checking(struct i387_fsave_struct *fx)
-{
- return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int frstor_user(struct i387_fsave_struct __user *fx)
-{
- return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
- if (config_enabled(CONFIG_X86_32))
- asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
- else {
- /* Using "rex64; fxsave %0" is broken because, if the memory
- * operand uses any extended registers for addressing, a second
- * REX prefix will be generated (to the assembler, rex64
- * followed by semicolon is a separate instruction), and hence
- * the 64-bitness is lost.
- *
- * Using "fxsaveq %0" would be the ideal choice, but is only
- * supported starting with gas 2.16.
- *
- * Using, as a workaround, the properly prefixed form below
- * isn't accepted by any binutils version so far released,
- * complaining that the same type of prefix is used twice if
- * an extended register is needed for addressing (fix submitted
- * to mainline 2005-11-21).
- *
- * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
- *
- * This, however, we can work around by forcing the compiler to
- * select an addressing mode that doesn't require extended
- * registers.
- */
- asm volatile( "rex64/fxsave (%[fx])"
- : "=m" (fpu->state->fxsave)
- : [fx] "R" (&fpu->state->fxsave));
- }
-}
-
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact.
- */
-static inline int fpu_save_init(struct fpu *fpu)
-{
- if (use_xsave()) {
- fpu_xsave(fpu);
-
- /*
- * xsave header may indicate the init state of the FP.
- */
- if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
- return 1;
- } else if (use_fxsr()) {
- fpu_fxsave(fpu);
- } else {
- asm volatile("fnsave %[fx]; fwait"
- : [fx] "=m" (fpu->state->fsave));
- return 0;
- }
-
- /*
- * If exceptions are pending, we need to clear them so
- * that we don't randomly get exceptions later.
- *
- * FIXME! Is this perhaps only true for the old-style
- * irq13 case? Maybe we could leave the x87 state
- * intact otherwise?
- */
- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
- asm volatile("fnclex");
- return 0;
- }
- return 1;
-}
-
-static inline int __save_init_fpu(struct task_struct *tsk)
-{
- return fpu_save_init(&tsk->thread.fpu);
-}
-
-static inline int fpu_restore_checking(struct fpu *fpu)
-{
- if (use_xsave())
- return fpu_xrstor_checking(&fpu->state->xsave);
- else if (use_fxsr())
- return fxrstor_checking(&fpu->state->fxsave);
- else
- return frstor_checking(&fpu->state->fsave);
-}
-
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
- /*
- * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
- * pending. Clear the x87 state here by setting it to fixed values.
- * "m" is a random variable that should be in L1.
- */
- if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
- asm volatile(
- "fnclex\n\t"
- "emms\n\t"
- "fildl %P[addr]" /* set F?P to defined value */
- : : [addr] "m" (tsk->thread.fpu.has_fpu));
- }
-
- return fpu_restore_checking(&tsk->thread.fpu);
-}
-
-/*
- * Software FPU state helpers. Careful: these need to
- * be preemption protection *and* they need to be
- * properly paired with the CR0.TS changes!
- */
-static inline int __thread_has_fpu(struct task_struct *tsk)
-{
- return tsk->thread.fpu.has_fpu;
-}
-
-/* Must be paired with an 'stts' after! */
-static inline void __thread_clear_has_fpu(struct task_struct *tsk)
-{
- tsk->thread.fpu.has_fpu = 0;
- this_cpu_write(fpu_owner_task, NULL);
-}
-
-/* Must be paired with a 'clts' before! */
-static inline void __thread_set_has_fpu(struct task_struct *tsk)
-{
- tsk->thread.fpu.has_fpu = 1;
- this_cpu_write(fpu_owner_task, tsk);
-}
-
-/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void __thread_fpu_end(struct task_struct *tsk)
-{
- __thread_clear_has_fpu(tsk);
- if (!use_eager_fpu())
- stts();
-}
-
-static inline void __thread_fpu_begin(struct task_struct *tsk)
-{
- if (!use_eager_fpu())
- clts();
- __thread_set_has_fpu(tsk);
-}
-
-static inline void drop_fpu(struct task_struct *tsk)
-{
- /*
- * Forget coprocessor state..
- */
- preempt_disable();
- tsk->thread.fpu_counter = 0;
-
- if (__thread_has_fpu(tsk)) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- __thread_fpu_end(tsk);
- }
-
- clear_stopped_child_used_math(tsk);
- preempt_enable();
-}
-
-static inline void restore_init_xstate(void)
-{
- if (use_xsave())
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
-}
-
-/*
- * Reset the FPU state in the eager case and drop it in the lazy case (later use
- * will reinit it).
- */
-static inline void fpu_reset_state(struct task_struct *tsk)
-{
- if (!use_eager_fpu())
- drop_fpu(tsk);
- else
- restore_init_xstate();
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
- *
- * - switch_fpu_finish() restores the new state as
- * necessary.
- */
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
-{
- fpu_switch_t fpu;
-
- /*
- * If the task has used the math, pre-load the FPU on xsave processors
- * or if the past 5 consecutive context-switches used math.
- */
- fpu.preload = tsk_used_math(new) &&
- (use_eager_fpu() || new->thread.fpu_counter > 5);
-
- if (__thread_has_fpu(old)) {
- if (!__save_init_fpu(old))
- task_disable_lazy_fpu_restore(old);
- else
- old->thread.fpu.last_cpu = cpu;
-
- /* But leave fpu_owner_task! */
- old->thread.fpu.has_fpu = 0;
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new->thread.fpu_counter++;
- __thread_set_has_fpu(new);
- prefetch(new->thread.fpu.state);
- } else if (!use_eager_fpu())
- stts();
- } else {
- old->thread.fpu_counter = 0;
- task_disable_lazy_fpu_restore(old);
- if (fpu.preload) {
- new->thread.fpu_counter++;
- if (fpu_lazy_restore(new, cpu))
- fpu.preload = 0;
- else
- prefetch(new->thread.fpu.state);
- __thread_fpu_begin(new);
- }
- }
- return fpu;
-}
-
-/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
- */
-static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
-{
- if (fpu.preload) {
- if (unlikely(restore_fpu_checking(new)))
- fpu_reset_state(new);
- }
-}
-
-/*
- * Signal frame handlers...
- */
-extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
-extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
-
-static inline int xstate_sigframe_size(void)
-{
- return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
-}
-
-static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
-{
- void __user *buf_fx = buf;
- int size = xstate_sigframe_size();
-
- if (ia32_frame && use_fxsr()) {
- buf_fx = buf + sizeof(struct i387_fsave_struct);
- size += sizeof(struct i387_fsave_struct);
- }
-
- return __restore_xstate_sig(buf, buf_fx, size);
-}
-
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
- preempt_disable();
- if (!user_has_fpu())
- __thread_fpu_begin(current);
- preempt_enable();
-}
-
-static inline void __save_fpu(struct task_struct *tsk)
-{
- if (use_xsave()) {
- if (unlikely(system_state == SYSTEM_BOOTING))
- xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
- else
- xsave_state(&tsk->thread.fpu.state->xsave, -1);
- } else
- fpu_fxsave(&tsk->thread.fpu);
-}
-
-/*
- * i387 state interaction
- */
-static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.cwd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.swd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
- if (cpu_has_xmm) {
- return tsk->thread.fpu.state->fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
-static bool fpu_allocated(struct fpu *fpu)
-{
- return fpu->state != NULL;
-}
-
-static inline int fpu_alloc(struct fpu *fpu)
-{
- if (fpu_allocated(fpu))
- return 0;
- fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
- if (!fpu->state)
- return -ENOMEM;
- WARN_ON((unsigned long)fpu->state & 15);
- return 0;
-}
-
-static inline void fpu_free(struct fpu *fpu)
-{
- if (fpu->state) {
- kmem_cache_free(task_xstate_cachep, fpu->state);
- fpu->state = NULL;
- }
-}
-
-static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
-{
- if (use_eager_fpu()) {
- memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
- __save_fpu(dst);
- } else {
- struct fpu *dfpu = &dst->thread.fpu;
- struct fpu *sfpu = &src->thread.fpu;
-
- unlazy_fpu(src);
- memcpy(dfpu->state, sfpu->state, xstate_size);
- }
-}
-
-static inline unsigned long
-alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
- unsigned long *size)
-{
- unsigned long frame_size = xstate_sigframe_size();
-
- *buf_fx = sp = round_down(sp - frame_size, 64);
- if (ia32_frame && use_fxsr()) {
- frame_size += sizeof(struct i387_fsave_struct);
- sp -= sizeof(struct i387_fsave_struct);
- }
-
- *size = frame_size;
- return sp;
-}
-
-#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
new file mode 100644
index 000000000000..1429a7c736db
--- /dev/null
+++ b/arch/x86/include/asm/fpu/api.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_FPU_API_H
+#define _ASM_X86_FPU_API_H
+
+/*
+ * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
+ * and they don't touch the preempt state on their own.
+ * If you enable preemption after __kernel_fpu_begin(), preempt notifier
+ * should call the __kernel_fpu_end() to prevent the kernel/user FPU
+ * state from getting corrupted. KVM for example uses this model.
+ *
+ * All other cases use kernel_fpu_begin/end() which disable preemption
+ * during kernel FPU usage.
+ */
+extern void __kernel_fpu_begin(void);
+extern void __kernel_fpu_end(void);
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+extern bool irq_fpu_usable(void);
+
+/*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
+ * should use them only in the context of irq_ts_save/restore()
+ */
+extern int irq_ts_save(void);
+extern void irq_ts_restore(int TS_state);
+
+/*
+ * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
+ *
+ * If 'feature_name' is set then put a human-readable description of
+ * the feature there as well - this can be used to print error (or success)
+ * messages.
+ */
+extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
+
+#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
new file mode 100644
index 000000000000..3c3550c3a4a3
--- /dev/null
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_FPU_INTERNAL_H
+#define _ASM_X86_FPU_INTERNAL_H
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/user.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/xstate.h>
+
+/*
+ * High level FPU state handling functions:
+ */
+extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__activate_fpstate_read(struct fpu *fpu);
+extern void fpu__activate_fpstate_write(struct fpu *fpu);
+extern void fpu__save(struct fpu *fpu);
+extern void fpu__restore(struct fpu *fpu);
+extern int fpu__restore_sig(void __user *buf, int ia32_frame);
+extern void fpu__drop(struct fpu *fpu);
+extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
+extern void fpu__clear(struct fpu *fpu);
+extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
+extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
+
+/*
+ * Boot time FPU initialization functions:
+ */
+extern void fpu__init_cpu(void);
+extern void fpu__init_system_xstate(void);
+extern void fpu__init_cpu_xstate(void);
+extern void fpu__init_system(struct cpuinfo_x86 *c);
+extern void fpu__init_check_bugs(void);
+extern void fpu__resume_cpu(void);
+
+/*
+ * Debugging facility:
+ */
+#ifdef CONFIG_X86_DEBUG_FPU
+# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
+#else
+# define WARN_ON_FPU(x) ({ (void)(x); 0; })
+#endif
+
+/*
+ * FPU related CPU feature flag helper routines:
+ */
+static __always_inline __pure bool use_eager_fpu(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+}
+
+static __always_inline __pure bool use_xsaveopt(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+}
+
+static __always_inline __pure bool use_xsave(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_XSAVE);
+}
+
+static __always_inline __pure bool use_fxsr(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_FXSR);
+}
+
+/*
+ * fpstate handling functions:
+ */
+
+extern union fpregs_state init_fpstate;
+
+extern void fpstate_init(union fpregs_state *state);
+#ifdef CONFIG_MATH_EMULATION
+extern void fpstate_init_soft(struct swregs_state *soft);
+#else
+static inline void fpstate_init_soft(struct swregs_state *soft) {}
+#endif
+static inline void fpstate_init_fxstate(struct fxregs_state *fx)
+{
+ fx->cwd = 0x37f;
+ fx->mxcsr = MXCSR_DEFAULT;
+}
+extern void fpstate_sanitize_xstate(struct fpu *fpu);
+
+#define user_insn(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile(ASM_STAC "\n" \
+ "1:" #insn "\n\t" \
+ "2: " ASM_CLAC "\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+#define check_insn(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+static inline int copy_fregs_to_user(struct fregs_state __user *fx)
+{
+ return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
+}
+
+static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
+{
+ if (config_enabled(CONFIG_X86_32))
+ return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
+
+ /* See comment in copy_fxregs_to_kernel() below. */
+ return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
+}
+
+static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
+{
+ int err;
+
+ if (config_enabled(CONFIG_X86_32)) {
+ err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ } else {
+ if (config_enabled(CONFIG_AS_FXSAVEQ)) {
+ err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+ } else {
+ /* See comment in copy_fxregs_to_kernel() below. */
+ err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+ }
+ }
+ /* Copying from a kernel buffer to FPU registers should never fail: */
+ WARN_ON_FPU(err);
+}
+
+static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
+{
+ if (config_enabled(CONFIG_X86_32))
+ return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+ /* See comment in copy_fxregs_to_kernel() below. */
+ return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
+ "m" (*fx));
+}
+
+static inline void copy_kernel_to_fregs(struct fregs_state *fx)
+{
+ int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+ WARN_ON_FPU(err);
+}
+
+static inline int copy_user_to_fregs(struct fregs_state __user *fx)
+{
+ return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline void copy_fxregs_to_kernel(struct fpu *fpu)
+{
+ if (config_enabled(CONFIG_X86_32))
+ asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ else {
+ /* Using "rex64; fxsave %0" is broken because, if the memory
+ * operand uses any extended registers for addressing, a second
+ * REX prefix will be generated (to the assembler, rex64
+ * followed by semicolon is a separate instruction), and hence
+ * the 64-bitness is lost.
+ *
+ * Using "fxsaveq %0" would be the ideal choice, but is only
+ * supported starting with gas 2.16.
+ *
+ * Using, as a workaround, the properly prefixed form below
+ * isn't accepted by any binutils version so far released,
+ * complaining that the same type of prefix is used twice if
+ * an extended register is needed for addressing (fix submitted
+ * to mainline 2005-11-21).
+ *
+ * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
+ *
+ * This, however, we can work around by forcing the compiler to
+ * select an addressing mode that doesn't require extended
+ * registers.
+ */
+ asm volatile( "rex64/fxsave (%[fx])"
+ : "=m" (fpu->state.fxsave)
+ : [fx] "R" (&fpu->state.fxsave));
+ }
+}
+
+/* These macros all use (%edi)/(%rdi) as the single memory argument. */
+#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
+#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
+#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
+#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
+
+/* xstate instruction fault handler: */
+#define xstate_fault(__err) \
+ \
+ ".section .fixup,\"ax\"\n" \
+ \
+ "3: movl $-2,%[_err]\n" \
+ " jmp 2b\n" \
+ \
+ ".previous\n" \
+ \
+ _ASM_EXTABLE(1b, 3b) \
+ : [_err] "=r" (__err)
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
+{
+ u64 mask = -1;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ asm volatile("1:"XSAVES"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+ else
+ asm volatile("1:"XSAVE"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+
+ /* We should never fault when copying to a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
+{
+ u64 mask = -1;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ asm volatile("1:"XRSTORS"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+ else
+ asm volatile("1:"XRSTOR"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+
+ /* We should never fault when copying from a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Save processor xstate to xsave area.
+ */
+static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
+{
+ u64 mask = -1;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(!alternatives_patched);
+
+ /*
+ * If xsaves is enabled, xsaves replaces xsaveopt because
+ * it supports compact format and supervisor states in addition to
+ * modified optimization in xsaveopt.
+ *
+ * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
+ * because xsaveopt supports modified optimization which is not
+ * supported by xsave.
+ *
+ * If none of xsaves and xsaveopt is enabled, use xsave.
+ */
+ alternative_input_2(
+ "1:"XSAVE,
+ XSAVEOPT,
+ X86_FEATURE_XSAVEOPT,
+ XSAVES,
+ X86_FEATURE_XSAVES,
+ [xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
+ "memory");
+ asm volatile("2:\n\t"
+ xstate_fault(err)
+ : "0" (err)
+ : "memory");
+
+ /* We should never fault when copying to a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Restore processor xstate from xsave area.
+ */
+static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ /*
+ * Use xrstors to restore context if it is enabled. xrstors supports
+ * compacted format of xsave area which is not supported by xrstor.
+ */
+ alternative_input(
+ "1: " XRSTOR,
+ XRSTORS,
+ X86_FEATURE_XSAVES,
+ "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
+ : "memory");
+
+ asm volatile("2:\n"
+ xstate_fault(err)
+ : "0" (err)
+ : "memory");
+
+ /* We should never fault when copying from a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Save xstate to user space xsave area.
+ *
+ * We don't use modified optimization because xrstor/xrstors might track
+ * a different application.
+ *
+ * We don't use compacted format xsave area for
+ * backward compatibility for old applications which don't understand
+ * compacted format of xsave area.
+ */
+static inline int copy_xregs_to_user(struct xregs_state __user *buf)
+{
+ int err;
+
+ /*
+ * Clear the xsave header first, so that reserved fields are
+ * initialized to zero.
+ */
+ err = __clear_user(&buf->header, sizeof(buf->header));
+ if (unlikely(err))
+ return -EFAULT;
+
+ __asm__ __volatile__(ASM_STAC "\n"
+ "1:"XSAVE"\n"
+ "2: " ASM_CLAC "\n"
+ xstate_fault(err)
+ : "D" (buf), "a" (-1), "d" (-1), "0" (err)
+ : "memory");
+ return err;
+}
+
+/*
+ * Restore xstate from user space xsave area.
+ */
+static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
+{
+ struct xregs_state *xstate = ((__force struct xregs_state *)buf);
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ __asm__ __volatile__(ASM_STAC "\n"
+ "1:"XRSTOR"\n"
+ "2: " ASM_CLAC "\n"
+ xstate_fault(err)
+ : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory"); /* memory required? */
+ return err;
+}
+
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact and we can
+ * keep registers active.
+ *
+ * The legacy FNSAVE instruction cleared all FPU state
+ * unconditionally, so registers are essentially destroyed.
+ * Modern FPU state can be kept in registers, if there are
+ * no pending FP exceptions.
+ */
+static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
+{
+ if (likely(use_xsave())) {
+ copy_xregs_to_kernel(&fpu->state.xsave);
+ return 1;
+ }
+
+ if (likely(use_fxsr())) {
+ copy_fxregs_to_kernel(fpu);
+ return 1;
+ }
+
+ /*
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
+ * so we have to mark them inactive:
+ */
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
+
+ return 0;
+}
+
+static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
+{
+ if (use_xsave()) {
+ copy_kernel_to_xregs(&fpstate->xsave, -1);
+ } else {
+ if (use_fxsr())
+ copy_kernel_to_fxregs(&fpstate->fxsave);
+ else
+ copy_kernel_to_fregs(&fpstate->fsave);
+ }
+}
+
+static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
+{
+ /*
+ * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
+ * pending. Clear the x87 state here by setting it to fixed values.
+ * "m" is a random variable that should be in L1.
+ */
+ if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+ asm volatile(
+ "fnclex\n\t"
+ "emms\n\t"
+ "fildl %P[addr]" /* set F?P to defined value */
+ : : [addr] "m" (fpstate));
+ }
+
+ __copy_kernel_to_fpregs(fpstate);
+}
+
+extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+
+/*
+ * FPU context switch related helper methods:
+ */
+
+DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+/*
+ * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+}
+
+static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
+{
+ return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+}
+
+
+/*
+ * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
+ * idiom, which is then paired with the sw-flag (fpregs_active) later on:
+ */
+
+static inline void __fpregs_activate_hw(void)
+{
+ if (!use_eager_fpu())
+ clts();
+}
+
+static inline void __fpregs_deactivate_hw(void)
+{
+ if (!use_eager_fpu())
+ stts();
+}
+
+/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
+static inline void __fpregs_deactivate(struct fpu *fpu)
+{
+ WARN_ON_FPU(!fpu->fpregs_active);
+
+ fpu->fpregs_active = 0;
+ this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+}
+
+/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
+static inline void __fpregs_activate(struct fpu *fpu)
+{
+ WARN_ON_FPU(fpu->fpregs_active);
+
+ fpu->fpregs_active = 1;
+ this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+}
+
+/*
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
+ */
+static inline int fpregs_active(void)
+{
+ return current->thread.fpu.fpregs_active;
+}
+
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void fpregs_activate(struct fpu *fpu)
+{
+ __fpregs_activate_hw();
+ __fpregs_activate(fpu);
+}
+
+static inline void fpregs_deactivate(struct fpu *fpu)
+{
+ __fpregs_deactivate(fpu);
+ __fpregs_deactivate_hw();
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state and
+ * sets the new state of the CR0.TS bit. This is
+ * done within the context of the old process.
+ *
+ * - switch_fpu_finish() restores the new state as
+ * necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+static inline fpu_switch_t
+switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+{
+ fpu_switch_t fpu;
+
+ /*
+ * If the task has used the math, pre-load the FPU on xsave processors
+ * or if the past 5 consecutive context-switches used math.
+ */
+ fpu.preload = new_fpu->fpstate_active &&
+ (use_eager_fpu() || new_fpu->counter > 5);
+
+ if (old_fpu->fpregs_active) {
+ if (!copy_fpregs_to_fpstate(old_fpu))
+ old_fpu->last_cpu = -1;
+ else
+ old_fpu->last_cpu = cpu;
+
+ /* But leave fpu_fpregs_owner_ctx! */
+ old_fpu->fpregs_active = 0;
+
+ /* Don't change CR0.TS if we just switch! */
+ if (fpu.preload) {
+ new_fpu->counter++;
+ __fpregs_activate(new_fpu);
+ prefetch(&new_fpu->state);
+ } else {
+ __fpregs_deactivate_hw();
+ }
+ } else {
+ old_fpu->counter = 0;
+ old_fpu->last_cpu = -1;
+ if (fpu.preload) {
+ new_fpu->counter++;
+ if (fpu_want_lazy_restore(new_fpu, cpu))
+ fpu.preload = 0;
+ else
+ prefetch(&new_fpu->state);
+ fpregs_activate(new_fpu);
+ }
+ }
+ return fpu;
+}
+
+/*
+ * Misc helper functions:
+ */
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+{
+ if (fpu_switch.preload)
+ copy_kernel_to_fpregs(&new_fpu->state);
+}
+
+/*
+ * Needs to be preemption-safe.
+ *
+ * NOTE! user_fpu_begin() must be used only immediately before restoring
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
+ */
+static inline void user_fpu_begin(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ preempt_disable();
+ if (!fpregs_active())
+ fpregs_activate(fpu);
+ preempt_enable();
+}
+
+/*
+ * MXCSR and XCR definitions:
+ */
+
+extern unsigned int mxcsr_feature_mask;
+
+#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
+ : : "a" (eax), "d" (edx), "c" (index));
+}
+
+#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
new file mode 100644
index 000000000000..39d3107ac6c7
--- /dev/null
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -0,0 +1,21 @@
+/*
+ * FPU regset handling methods:
+ */
+#ifndef _ASM_X86_FPU_REGSET_H
+#define _ASM_X86_FPU_REGSET_H
+
+#include <linux/regset.h>
+
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+ xstateregs_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
+ xstateregs_set;
+
+/*
+ * xstateregs_active == regset_fpregs_active. Please refer to the comment
+ * at the definition of regset_fpregs_active.
+ */
+#define xstateregs_active regset_fpregs_active
+
+#endif /* _ASM_X86_FPU_REGSET_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
new file mode 100644
index 000000000000..7358e9d61f1e
--- /dev/null
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -0,0 +1,33 @@
+/*
+ * x86 FPU signal frame handling methods:
+ */
+#ifndef _ASM_X86_FPU_SIGNAL_H
+#define _ASM_X86_FPU_SIGNAL_H
+
+#ifdef CONFIG_X86_64
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
+struct ksignal;
+int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
+ compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct ksignal *ksig,
+ compat_sigset_t *set, struct pt_regs *regs);
+#else
+# define user_i387_ia32_struct user_i387_struct
+# define user32_fxsr_struct user_fxsr_struct
+# define ia32_setup_frame __setup_frame
+# define ia32_setup_rt_frame __setup_rt_frame
+#endif
+
+extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
+ struct task_struct *tsk);
+extern void convert_to_fxsr(struct task_struct *tsk,
+ const struct user_i387_ia32_struct *env);
+
+unsigned long
+fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
+ unsigned long *buf_fx, unsigned long *size);
+
+extern void fpu__init_prepare_fx_sw_frame(void);
+
+#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
new file mode 100644
index 000000000000..0637826292de
--- /dev/null
+++ b/arch/x86/include/asm/fpu/types.h
@@ -0,0 +1,293 @@
+/*
+ * FPU data structures:
+ */
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+/*
+ * The legacy x87 FPU state format, as saved by FSAVE and
+ * restored by the FRSTOR instructions:
+ */
+struct fregs_state {
+ u32 cwd; /* FPU Control Word */
+ u32 swd; /* FPU Status Word */
+ u32 twd; /* FPU Tag Word */
+ u32 fip; /* FPU IP Offset */
+ u32 fcs; /* FPU IP Selector */
+ u32 foo; /* FPU Operand Pointer Offset */
+ u32 fos; /* FPU Operand Pointer Selector */
+
+ /* 8*10 bytes for each FP-reg = 80 bytes: */
+ u32 st_space[20];
+
+ /* Software status information [not touched by FSAVE]: */
+ u32 status;
+};
+
+/*
+ * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
+ * restored by the FXRSTOR instructions. It's similar to the FSAVE
+ * format, but differs in some areas, plus has extensions at
+ * the end for the XMM registers.
+ */
+struct fxregs_state {
+ u16 cwd; /* Control Word */
+ u16 swd; /* Status Word */
+ u16 twd; /* Tag Word */
+ u16 fop; /* Last Instruction Opcode */
+ union {
+ struct {
+ u64 rip; /* Instruction Pointer */
+ u64 rdp; /* Data Pointer */
+ };
+ struct {
+ u32 fip; /* FPU IP Offset */
+ u32 fcs; /* FPU IP Selector */
+ u32 foo; /* FPU Operand Offset */
+ u32 fos; /* FPU Operand Selector */
+ };
+ };
+ u32 mxcsr; /* MXCSR Register State */
+ u32 mxcsr_mask; /* MXCSR Mask */
+
+ /* 8*16 bytes for each FP-reg = 128 bytes: */
+ u32 st_space[32];
+
+ /* 16*16 bytes for each XMM-reg = 256 bytes: */
+ u32 xmm_space[64];
+
+ u32 padding[12];
+
+ union {
+ u32 padding1[12];
+ u32 sw_reserved[12];
+ };
+
+} __attribute__((aligned(16)));
+
+/* Default value for fxregs_state.mxcsr: */
+#define MXCSR_DEFAULT 0x1f80
+
+/*
+ * Software based FPU emulation state. This is arbitrary really,
+ * it matches the x87 format to make it easier to understand:
+ */
+struct swregs_state {
+ u32 cwd;
+ u32 swd;
+ u32 twd;
+ u32 fip;
+ u32 fcs;
+ u32 foo;
+ u32 fos;
+ /* 8*10 bytes for each FP-reg = 80 bytes: */
+ u32 st_space[20];
+ u8 ftop;
+ u8 changed;
+ u8 lookahead;
+ u8 no_update;
+ u8 rm;
+ u8 alimit;
+ struct math_emu_info *info;
+ u32 entry_eip;
+};
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+ XSTATE_BIT_FP,
+ XSTATE_BIT_SSE,
+ XSTATE_BIT_YMM,
+ XSTATE_BIT_BNDREGS,
+ XSTATE_BIT_BNDCSR,
+ XSTATE_BIT_OPMASK,
+ XSTATE_BIT_ZMM_Hi256,
+ XSTATE_BIT_Hi16_ZMM,
+
+ XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+
+/*
+ * There are 16x 256-bit AVX registers named YMM0-YMM15.
+ * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
+ * and are stored in 'struct fxregs_state::xmm_space[]'.
+ *
+ * The high 128 bits are stored here:
+ * 16x 128 bits == 256 bytes.
+ */
+struct ymmh_struct {
+ u8 ymmh_space[256];
+};
+
+/* We don't support LWP yet: */
+struct lwp_struct {
+ u8 reserved[128];
+};
+
+/* Intel MPX support: */
+struct bndreg {
+ u64 lower_bound;
+ u64 upper_bound;
+} __packed;
+
+struct bndcsr {
+ u64 bndcfgu;
+ u64 bndstatus;
+} __packed;
+
+struct mpx_struct {
+ struct bndreg bndreg[4];
+ struct bndcsr bndcsr;
+};
+
+struct xstate_header {
+ u64 xfeatures;
+ u64 xcomp_bv;
+ u64 reserved[6];
+} __attribute__((packed));
+
+/* New processor state extensions should be added here: */
+#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
+ sizeof(struct lwp_struct) + \
+ sizeof(struct mpx_struct) )
+/*
+ * This is our most modern FPU state format, as saved by the XSAVE
+ * and restored by the XRSTOR instructions.
+ *
+ * It consists of a legacy fxregs portion, an xstate header and
+ * subsequent fixed size areas as defined by the xstate header.
+ * Not all CPUs support all the extensions.
+ */
+struct xregs_state {
+ struct fxregs_state i387;
+ struct xstate_header header;
+ u8 __reserved[XSTATE_RESERVE];
+} __attribute__ ((packed, aligned (64)));
+
+/*
+ * This is a union of all the possible FPU state formats
+ * put together, so that we can pick the right one runtime.
+ *
+ * The size of the structure is determined by the largest
+ * member - which is the xsave area:
+ */
+union fpregs_state {
+ struct fregs_state fsave;
+ struct fxregs_state fxsave;
+ struct swregs_state soft;
+ struct xregs_state xsave;
+};
+
+/*
+ * Highest level per task FPU state data structure that
+ * contains the FPU register state plus various FPU
+ * state fields:
+ */
+struct fpu {
+ /*
+ * @state:
+ *
+ * In-memory copy of all FPU registers that we save/restore
+ * over context switches. If the task is using the FPU then
+ * the registers in the FPU are more recent than this state
+ * copy. If the task context-switches away then they get
+ * saved here and represent the FPU state.
+ *
+ * After context switches there may be a (short) time period
+ * during which the in-FPU hardware registers are unchanged
+ * and still perfectly match this state, if the tasks
+ * scheduled afterwards are not using the FPU.
+ *
+ * This is the 'lazy restore' window of optimization, which
+ * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+ *
+ * We detect whether a subsequent task uses the FPU via setting
+ * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+ *
+ * During this window, if the task gets scheduled again, we
+ * might be able to skip having to do a restore from this
+ * memory buffer to the hardware registers - at the cost of
+ * incurring the overhead of #NM fault traps.
+ *
+ * Note that on modern CPUs that support the XSAVEOPT (or other
+ * optimized XSAVE instructions), we don't use #NM traps anymore,
+ * as the hardware can track whether FPU registers need saving
+ * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+ * logic, which unconditionally saves/restores all FPU state
+ * across context switches. (if FPU state exists.)
+ */
+ union fpregs_state state;
+
+ /*
+ * @last_cpu:
+ *
+ * Records the last CPU on which this context was loaded into
+ * FPU registers. (In the lazy-restore case we might be
+ * able to reuse FPU registers across multiple context switches
+ * this way, if no intermediate task used the FPU.)
+ *
+ * A value of -1 is used to indicate that the FPU state in context
+ * memory is newer than the FPU state in registers, and that the
+ * FPU state should be reloaded next time the task is run.
+ */
+ unsigned int last_cpu;
+
+ /*
+ * @fpstate_active:
+ *
+ * This flag indicates whether this context is active: if the task
+ * is not running then we can restore from this context, if the task
+ * is running then we should save into this context.
+ */
+ unsigned char fpstate_active;
+
+ /*
+ * @fpregs_active:
+ *
+ * This flag determines whether a given context is actively
+ * loaded into the FPU's registers and that those registers
+ * represent the task's current FPU state.
+ *
+ * Note the interaction with fpstate_active:
+ *
+ * # task does not use the FPU:
+ * fpstate_active == 0
+ *
+ * # task uses the FPU and regs are active:
+ * fpstate_active == 1 && fpregs_active == 1
+ *
+ * # the regs are inactive but still match fpstate:
+ * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
+ *
+ * The third state is what we use for the lazy restore optimization
+ * on lazy-switching CPUs.
+ */
+ unsigned char fpregs_active;
+
+ /*
+ * @counter:
+ *
+ * This counter contains the number of consecutive context switches
+ * during which the FPU stays used. If this is over a threshold, the
+ * lazy FPU restore logic becomes eager, to save the trap overhead.
+ * This is an unsigned char so that after 256 iterations the counter
+ * wraps and the context switch behavior turns lazy again; this is to
+ * deal with bursty apps that only use the FPU for a short time:
+ */
+ unsigned char counter;
+};
+
+#endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
new file mode 100644
index 000000000000..4656b25bb9a7
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -0,0 +1,46 @@
+#ifndef __ASM_X86_XSAVE_H
+#define __ASM_X86_XSAVE_H
+
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+
+/* Bit 63 of XCR0 is reserved for future expansion */
+#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
+
+#define XSTATE_CPUID 0x0000000d
+
+#define FXSAVE_SIZE 512
+
+#define XSAVE_HDR_SIZE 64
+#define XSAVE_HDR_OFFSET FXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE 256
+#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+
+/* Supported features which support lazy state saving */
+#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
+ | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+
+/* Supported features which require eager state saving */
+#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
+
+/* All currently supported features */
+#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
+
+#ifdef CONFIG_X86_64
+#define REX_PREFIX "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+extern unsigned int xstate_size;
+extern u64 xfeatures_mask;
+extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
+
+extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+
+void *get_xsave_addr(struct xregs_state *xsave, int xstate);
+const void *get_xsave_field_ptr(int xstate_field);
+
+#endif
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e42f758a0fbd..055ea9941dd5 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
/* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
deleted file mode 100644
index 6eb6fcb83f63..000000000000
--- a/arch/x86/include/asm/i387.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/sched.h>
-#include <linux/hardirq.h>
-
-struct pt_regs;
-struct user_i387_struct;
-
-extern int init_fpu(struct task_struct *child);
-extern void fpu_finit(struct fpu *fpu);
-extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
-extern void math_state_restore(void);
-
-extern bool irq_fpu_usable(void);
-
-/*
- * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
- * and they don't touch the preempt state on their own.
- * If you enable preemption after __kernel_fpu_begin(), preempt notifier
- * should call the __kernel_fpu_end() to prevent the kernel/user FPU
- * state from getting corrupted. KVM for example uses this model.
- *
- * All other cases use kernel_fpu_begin/end() which disable preemption
- * during kernel FPU usage.
- */
-extern void __kernel_fpu_begin(void);
-extern void __kernel_fpu_end(void);
-
-static inline void kernel_fpu_begin(void)
-{
- preempt_disable();
- WARN_ON_ONCE(!irq_fpu_usable());
- __kernel_fpu_begin();
-}
-
-static inline void kernel_fpu_end(void)
-{
- __kernel_fpu_end();
- preempt_enable();
-}
-
-/* Must be called with preempt disabled */
-extern void kernel_fpu_disable(void);
-extern void kernel_fpu_enable(void);
-
-/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-static inline int irq_ts_save(void)
-{
- /*
- * If in process context and not atomic, we can take a spurious DNA fault.
- * Otherwise, doing clts() in process context requires disabling preemption
- * or some heavy lifting like kernel_fpu_begin()
- */
- if (!in_atomic())
- return 0;
-
- if (read_cr0() & X86_CR0_TS) {
- clts();
- return 1;
- }
-
- return 0;
-}
-
-static inline void irq_ts_restore(int TS_state)
-{
- if (TS_state)
- stts();
-}
-
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int user_has_fpu(void)
-{
- return current->thread.fpu.has_fpu;
-}
-
-extern void unlazy_fpu(struct task_struct *tsk);
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e962e3..f8c0ec3a4a97 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
unsigned nxe:1;
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
+ unsigned smap_andnot_wp:1;
};
};
@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu;
+ bool eager_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -743,6 +745,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -999,8 +1002,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
-int fx_init(struct kvm_vcpu *vcpu);
-
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 883f6b933fa4..5e8daee7c5c9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
paravirt_arch_exit_mmap(mm);
}
+#ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+ return !config_enabled(CONFIG_IA32_EMULATION) ||
+ !(mm->context.ia32_compat == TIF_IA32);
+}
+#else
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+ return false;
+}
+#endif
+
static inline void arch_bprm_mm_init(struct mm_struct *mm,
struct vm_area_struct *vma)
{
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index a952a13d59a7..7a35495275a9 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -13,55 +13,50 @@
#define MPX_BNDCFG_ENABLE_FLAG 0x1
#define MPX_BD_ENTRY_VALID_FLAG 0x1
-#ifdef CONFIG_X86_64
-
-/* upper 28 bits [47:20] of the virtual address in 64-bit used to
- * index into bounds directory (BD).
- */
-#define MPX_BD_ENTRY_OFFSET 28
-#define MPX_BD_ENTRY_SHIFT 3
-/* bits [19:3] of the virtual address in 64-bit used to index into
- * bounds table (BT).
+/*
+ * The upper 28 bits [47:20] of the virtual address in 64-bit
+ * are used to index into bounds directory (BD).
+ *
+ * The directory is 2G (2^31) in size, and with 8-byte entries
+ * it has 2^28 entries.
*/
-#define MPX_BT_ENTRY_OFFSET 17
-#define MPX_BT_ENTRY_SHIFT 5
-#define MPX_IGN_BITS 3
-#define MPX_BD_ENTRY_TAIL 3
+#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
+#define MPX_BD_ENTRY_BYTES_64 8
+#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-#else
-
-#define MPX_BD_ENTRY_OFFSET 20
-#define MPX_BD_ENTRY_SHIFT 2
-#define MPX_BT_ENTRY_OFFSET 10
-#define MPX_BT_ENTRY_SHIFT 4
-#define MPX_IGN_BITS 2
-#define MPX_BD_ENTRY_TAIL 2
+/*
+ * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
+ * entries it has 2^20 entries.
+ */
+#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
+#define MPX_BD_ENTRY_BYTES_32 4
+#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-#endif
+/*
+ * A 64-bit table is 4MB total in size, and an entry is
+ * 4 64-bit pointers in size.
+ */
+#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
+#define MPX_BT_ENTRY_BYTES_64 32
+#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
-#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
+/*
+ * A 32-bit table is 16kB total in size, and an entry is
+ * 4 32-bit pointers in size.
+ */
+#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
+#define MPX_BT_ENTRY_BYTES_32 16
+#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
#define MPX_BNDSTA_TAIL 2
#define MPX_BNDCFG_TAIL 12
#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1))
-
-#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
#define MPX_BNDSTA_ERROR_CODE 0x3
-#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
-#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
-#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
- MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
-#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
- MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
-
#ifdef CONFIG_X86_INTEL_MPX
-siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
- struct xsave_struct *xsave_buf);
-int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
+siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
+int mpx_handle_bd_fault(void);
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
{
return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
@@ -77,12 +72,11 @@ static inline void mpx_mm_init(struct mm_struct *mm)
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#else
-static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
- struct xsave_struct *xsave_buf)
+static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
{
return NULL;
}
-static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+static inline int mpx_handle_bd_fault(void)
{
return -EINVAL;
}
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8957810ad7d1..d143bfad45d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
+ u32 val)
+{
+ PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
+}
+
+static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
+}
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+ PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+ PVOP_VCALL1(pv_lock_ops.kick, cpu);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket)
{
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
}
-#endif
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f7b0b5c112f2..8766c7c395c2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -333,9 +333,19 @@ struct arch_spinlock;
typedef u16 __ticket_t;
#endif
+struct qspinlock;
+
struct pv_lock_ops {
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
+ struct paravirt_callee_save queued_spin_unlock;
+
+ void (*wait)(u8 *ptr, u8 val);
+ void (*kick)(int cpu);
+#else /* !CONFIG_QUEUED_SPINLOCKS */
struct paravirt_callee_save lock_spinning;
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
};
/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 8f3271842533..dca71714f860 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -99,11 +99,9 @@ static __always_inline bool should_resched(void)
extern asmlinkage void ___preempt_schedule(void);
# define __preempt_schedule() asm ("call ___preempt_schedule")
extern asmlinkage void preempt_schedule(void);
-# ifdef CONFIG_CONTEXT_TRACKING
- extern asmlinkage void ___preempt_schedule_context(void);
-# define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
- extern asmlinkage void preempt_schedule_context(void);
-# endif
+ extern asmlinkage void ___preempt_schedule_notrace(void);
+# define __preempt_schedule_notrace() asm ("call ___preempt_schedule_notrace")
+ extern asmlinkage void preempt_schedule_notrace(void);
#endif
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 23ba6765b718..43e6519df0d5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,6 +21,7 @@ struct mm_struct;
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
+#include <asm/fpu/types.h>
#include <linux/personality.h>
#include <linux/cpumask.h>
@@ -52,11 +53,16 @@ static inline void *current_text_addr(void)
return pc;
}
+/*
+ * These alignment constraints are for performance in the vSMP case,
+ * but in the task_struct case we must also meet hardware imposed
+ * alignment requirements of the FPU state:
+ */
#ifdef CONFIG_X86_VSMP
# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
#else
-# define ARCH_MIN_TASKALIGN 16
+# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state)
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
@@ -166,7 +172,6 @@ extern const struct seq_operations cpuinfo_op;
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
extern void cpu_detect(struct cpuinfo_x86 *c);
-extern void fpu_detect(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
@@ -313,128 +318,6 @@ struct orig_ist {
unsigned long ist[7];
};
-#define MXCSR_DEFAULT 0x1f80
-
-struct i387_fsave_struct {
- u32 cwd; /* FPU Control Word */
- u32 swd; /* FPU Status Word */
- u32 twd; /* FPU Tag Word */
- u32 fip; /* FPU IP Offset */
- u32 fcs; /* FPU IP Selector */
- u32 foo; /* FPU Operand Pointer Offset */
- u32 fos; /* FPU Operand Pointer Selector */
-
- /* 8*10 bytes for each FP-reg = 80 bytes: */
- u32 st_space[20];
-
- /* Software status information [not touched by FSAVE ]: */
- u32 status;
-};
-
-struct i387_fxsave_struct {
- u16 cwd; /* Control Word */
- u16 swd; /* Status Word */
- u16 twd; /* Tag Word */
- u16 fop; /* Last Instruction Opcode */
- union {
- struct {
- u64 rip; /* Instruction Pointer */
- u64 rdp; /* Data Pointer */
- };
- struct {
- u32 fip; /* FPU IP Offset */
- u32 fcs; /* FPU IP Selector */
- u32 foo; /* FPU Operand Offset */
- u32 fos; /* FPU Operand Selector */
- };
- };
- u32 mxcsr; /* MXCSR Register State */
- u32 mxcsr_mask; /* MXCSR Mask */
-
- /* 8*16 bytes for each FP-reg = 128 bytes: */
- u32 st_space[32];
-
- /* 16*16 bytes for each XMM-reg = 256 bytes: */
- u32 xmm_space[64];
-
- u32 padding[12];
-
- union {
- u32 padding1[12];
- u32 sw_reserved[12];
- };
-
-} __attribute__((aligned(16)));
-
-struct i387_soft_struct {
- u32 cwd;
- u32 swd;
- u32 twd;
- u32 fip;
- u32 fcs;
- u32 foo;
- u32 fos;
- /* 8*10 bytes for each FP-reg = 80 bytes: */
- u32 st_space[20];
- u8 ftop;
- u8 changed;
- u8 lookahead;
- u8 no_update;
- u8 rm;
- u8 alimit;
- struct math_emu_info *info;
- u32 entry_eip;
-};
-
-struct ymmh_struct {
- /* 16 * 16 bytes for each YMMH-reg = 256 bytes */
- u32 ymmh_space[64];
-};
-
-/* We don't support LWP yet: */
-struct lwp_struct {
- u8 reserved[128];
-};
-
-struct bndreg {
- u64 lower_bound;
- u64 upper_bound;
-} __packed;
-
-struct bndcsr {
- u64 bndcfgu;
- u64 bndstatus;
-} __packed;
-
-struct xsave_hdr_struct {
- u64 xstate_bv;
- u64 xcomp_bv;
- u64 reserved[6];
-} __attribute__((packed));
-
-struct xsave_struct {
- struct i387_fxsave_struct i387;
- struct xsave_hdr_struct xsave_hdr;
- struct ymmh_struct ymmh;
- struct lwp_struct lwp;
- struct bndreg bndreg[4];
- struct bndcsr bndcsr;
- /* new processor state extensions will go here */
-} __attribute__ ((packed, aligned (64)));
-
-union thread_xstate {
- struct i387_fsave_struct fsave;
- struct i387_fxsave_struct fxsave;
- struct i387_soft_struct soft;
- struct xsave_struct xsave;
-};
-
-struct fpu {
- unsigned int last_cpu;
- unsigned int has_fpu;
- union thread_xstate *state;
-};
-
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
@@ -483,8 +366,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */
extern unsigned int xstate_size;
-extern void free_thread_xstate(struct task_struct *);
-extern struct kmem_cache *task_xstate_cachep;
struct perf_event;
@@ -508,6 +389,10 @@ struct thread_struct {
unsigned long fs;
#endif
unsigned long gs;
+
+ /* Floating point and extended processor state */
+ struct fpu fpu;
+
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
@@ -518,8 +403,6 @@ struct thread_struct {
unsigned long cr2;
unsigned long trap_nr;
unsigned long error_code;
- /* floating point and extended processor state */
- struct fpu fpu;
#ifdef CONFIG_X86_32
/* Virtual 86 mode info */
struct vm86_struct __user *vm86_info;
@@ -535,15 +418,6 @@ struct thread_struct {
unsigned long iopl;
/* Max allowed port in the bitmap, in bytes: */
unsigned io_bitmap_max;
- /*
- * fpu_counter contains the number of consecutive context switches
- * that the FPU is used. If this is over a threshold, the lazy fpu
- * saving becomes unlazy to save the trap. This is an unsigned char
- * so that after 256 times the counter wraps and the behavior turns
- * lazy again; this to deal with bursty apps that only use FPU for
- * a short time
- */
- unsigned char fpu_counter;
};
/*
@@ -928,24 +802,25 @@ extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk))
-#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk))
+#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
+#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(struct task_struct *tsk);
-extern int mpx_disable_management(struct task_struct *tsk);
+extern int mpx_enable_management(void);
+extern int mpx_disable_management(void);
#else
-static inline int mpx_enable_management(struct task_struct *tsk)
+static inline int mpx_enable_management(void)
{
return -EINVAL;
}
-static inline int mpx_disable_management(struct task_struct *tsk)
+static inline int mpx_disable_management(void)
{
return -EINVAL;
}
#endif /* CONFIG_X86_INTEL_MPX */
extern u16 amd_get_nb_id(int cpu);
+extern u32 amd_get_nodes_per_socket(void);
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19507ffa5d28..5fabf1362942 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -107,7 +107,7 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
static inline int user_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
- return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
+ return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
#else
return !!(regs->cs & 3);
#endif
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644
index 000000000000..9d51fae1cba3
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
+#ifndef _ASM_X86_QSPINLOCK_H
+#define _ASM_X86_QSPINLOCK_H
+
+#include <asm/cpufeature.h>
+#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#define queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * A smp_store_release() on the least-significant byte.
+ */
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+ smp_store_release((u8 *)lock, 0);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+ pv_queued_spin_lock_slowpath(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ pv_queued_spin_unlock(lock);
+}
+#else
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+#endif
+
+#define virt_queued_spin_lock virt_queued_spin_lock
+
+static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+{
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ return false;
+
+ while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
+ cpu_relax();
+
+ return true;
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000000000000..b002e711ba88
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_QSPINLOCK_PARAVIRT_H
+#define __ASM_QSPINLOCK_PARAVIRT_H
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+
+#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5a9856eb12ba..7d5a1929d76b 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -231,11 +231,21 @@
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
#ifdef __KERNEL__
+
+/*
+ * early_idt_handler_array is an array of entry points referenced in the
+ * early IDT. For simplicity, it's a real array with one entry point
+ * every nine bytes. That leaves room for an optional 'push $0' if the
+ * vector has no error code (two bytes), a 'push $vector_number' (two
+ * bytes), and a jump to the common entry code (up to five bytes).
+ */
+#define EARLY_IDT_HANDLER_SIZE 9
+
#ifndef __ASSEMBLY__
-extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
#ifdef CONFIG_TRACING
-# define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handler_array early_idt_handler_array
#endif
/*
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index ee80b92f0096..6c8a7ed13365 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,5 +1,5 @@
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 17a8dced12da..222a6a3ca2b5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -37,16 +37,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
-static inline struct cpumask *cpu_sibling_mask(int cpu)
-{
- return per_cpu(cpu_sibling_map, cpu);
-}
-
-static inline struct cpumask *cpu_core_mask(int cpu)
-{
- return per_cpu(cpu_core_map, cpu);
-}
-
static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
return per_cpu(cpu_llc_shared_map, cpu);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index cf87de3fc390..be0a05913b91 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
extern struct static_key paravirt_ticketlocks_enabled;
static __always_inline bool static_key_false(struct static_key *key);
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -169,7 +173,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
tmp.head &= ~TICKET_SLOWPATH_FLAG;
- return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+ return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
}
#define arch_spin_is_contended arch_spin_is_contended
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
cpu_relax();
}
}
+#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
* Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d7572d82b..65c3e37f879a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
typedef struct arch_spinlock {
union {
__ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
#include <asm-generic/qrwlock_types.h>
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 6a998598f172..c2e00bb2a136 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -39,7 +39,9 @@
#include <asm/processor.h>
#include <asm/percpu.h>
#include <asm/desc.h>
+
#include <linux/random.h>
+#include <linux/sched.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 552d6c90a6d4..d1793f06854d 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -7,7 +7,7 @@
#define _ASM_X86_SUSPEND_32_H
#include <asm/desc.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
/* image of the saved processor state */
struct saved_context {
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index bc6232834bab..7ebf0ebe4e68 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -7,7 +7,7 @@
#define _ASM_X86_SUSPEND_64_H
#include <asm/desc.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
/*
* Image of the saved processor state, used by the low level ACPI suspend to
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0e8f04f2c26f..5a77593fdace 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -124,7 +124,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
#ifdef ENABLE_TOPO_DEFINES
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
-#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
new file mode 100644
index 000000000000..173dd3ba108c
--- /dev/null
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -0,0 +1,132 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mpx
+
+#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MPX_H
+
+#include <linux/tracepoint.h>
+
+#ifdef CONFIG_X86_INTEL_MPX
+
+TRACE_EVENT(mpx_bounds_register_exception,
+
+ TP_PROTO(void *addr_referenced,
+ const struct bndreg *bndreg),
+ TP_ARGS(addr_referenced, bndreg),
+
+ TP_STRUCT__entry(
+ __field(void *, addr_referenced)
+ __field(u64, lower_bound)
+ __field(u64, upper_bound)
+ ),
+
+ TP_fast_assign(
+ __entry->addr_referenced = addr_referenced;
+ __entry->lower_bound = bndreg->lower_bound;
+ __entry->upper_bound = bndreg->upper_bound;
+ ),
+ /*
+ * Note that we are printing out the '~' of the upper
+ * bounds register here. It is actually stored in its
+ * one's complement form so that its 'init' state
+ * corresponds to all 0's. But, that looks like
+ * gibberish when printed out, so print out the 1's
+ * complement instead of the actual value here. Note
+ * though that you still need to specify filters for the
+ * actual value, not the displayed one.
+ */
+ TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
+ __entry->addr_referenced,
+ __entry->lower_bound,
+ ~__entry->upper_bound
+ )
+);
+
+TRACE_EVENT(bounds_exception_mpx,
+
+ TP_PROTO(const struct bndcsr *bndcsr),
+ TP_ARGS(bndcsr),
+
+ TP_STRUCT__entry(
+ __field(u64, bndcfgu)
+ __field(u64, bndstatus)
+ ),
+
+ TP_fast_assign(
+ /* need to get rid of the 'const' on bndcsr */
+ __entry->bndcfgu = (u64)bndcsr->bndcfgu;
+ __entry->bndstatus = (u64)bndcsr->bndstatus;
+ ),
+
+ TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
+ __entry->bndcfgu,
+ __entry->bndstatus)
+);
+
+DECLARE_EVENT_CLASS(mpx_range_trace,
+
+ TP_PROTO(unsigned long start,
+ unsigned long end),
+ TP_ARGS(start, end),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->end = end;
+ ),
+
+ TP_printk("[0x%p:0x%p]",
+ (void *)__entry->start,
+ (void *)__entry->end
+ )
+);
+
+DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
+ TP_PROTO(unsigned long start, unsigned long end),
+ TP_ARGS(start, end)
+);
+
+DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
+ TP_PROTO(unsigned long start, unsigned long end),
+ TP_ARGS(start, end)
+);
+
+TRACE_EVENT(mpx_new_bounds_table,
+
+ TP_PROTO(unsigned long table_vaddr),
+ TP_ARGS(table_vaddr),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, table_vaddr)
+ ),
+
+ TP_fast_assign(
+ __entry->table_vaddr = table_vaddr;
+ ),
+
+ TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
+);
+
+#else
+
+/*
+ * This gets used outside of MPX-specific code, so we need a stub.
+ */
+static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
+{
+}
+
+#endif /* CONFIG_X86_INTEL_MPX */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mpx
+#endif /* _TRACE_MPX_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ace9dec050b1..a8df874f3e88 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
@@ -145,7 +146,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -240,7 +242,8 @@ extern void __put_user_8(void);
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
@@ -455,7 +458,8 @@ struct __large_struct { unsigned long buf[100]; };
* @x: Variable to store result.
* @ptr: Source address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple variable from user space to kernel
* space. It supports simple types like char and int, but not larger
@@ -479,7 +483,8 @@ struct __large_struct { unsigned long buf[100]; };
* @x: Value to copy to user space.
* @ptr: Destination address, in user space.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* This macro copies a single simple value from kernel space to user
* space. It supports simple types like char and int, but not larger
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 3c03a5de64d3..7c8ad3451988 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -70,7 +70,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
@@ -117,7 +118,8 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space. Caller must check
* the specified block with access_ok() before calling this function.
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index ccab4af1646d..59a54e869f15 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -14,8 +14,8 @@ struct user_ymmh_regs {
__u32 ymmh_space[64];
};
-struct user_xsave_hdr {
- __u64 xstate_bv;
+struct user_xstate_header {
+ __u64 xfeatures;
__u64 reserved1[2];
__u64 reserved2[5];
};
@@ -41,11 +41,11 @@ struct user_xsave_hdr {
* particular process/thread.
*
* Also when the user modifies certain state FP/SSE/etc through the
- * ptrace interface, they must ensure that the xsave_hdr.xstate_bv
+ * ptrace interface, they must ensure that the header.xfeatures
* bytes[512..519] of the memory layout are updated correspondingly.
* i.e., for example when FP state is modified to a non-init state,
- * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to
- * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc.
+ * header.xfeatures's bit 0 must be set to '1', when SSE is modified to
+ * non-init state, header.xfeatures's bit 1 must to be set to '1', etc.
*/
#define USER_XSTATE_FX_SW_WORDS 6
#define USER_XSTATE_XCR0_WORD 0
@@ -55,7 +55,7 @@ struct user_xstateregs {
__u64 fpx_space[58];
__u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
} i387;
- struct user_xsave_hdr xsave_hdr;
+ struct user_xstate_header header;
struct user_ymmh_regs ymmh;
/* further processor state extensions go here */
};
diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h
deleted file mode 100644
index f2cba4e79a23..000000000000
--- a/arch/x86/include/asm/xcr.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * Copyright 2008 rPath, Inc. - All Rights Reserved
- *
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2 or (at your
- * option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * asm-x86/xcr.h
- *
- * Definitions for the eXtended Control Register instructions
- */
-
-#ifndef _ASM_X86_XCR_H
-#define _ASM_X86_XCR_H
-
-#define XCR_XFEATURE_ENABLED_MASK 0x00000000
-
-#ifdef __KERNEL__
-# ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-static inline u64 xgetbv(u32 index)
-{
- u32 eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
-}
-
-# endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_XCR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 358dcd338915..c44a5d53e464 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
return false;
}
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+ return __get_free_pages(__GFP_NOWARN, order);
+}
+
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index d8829751b3f8..1f5c5161ead6 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -36,7 +36,7 @@
* no advantages to be gotten from x86-64 here anyways.
*/
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#ifdef CONFIG_X86_32
/* reduce register pressure */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index ce05722e3c68..5a08bc8bff33 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -26,7 +26,7 @@
#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
static void
xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 492b29802f57..7c0a517ec751 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -18,7 +18,7 @@
#ifdef CONFIG_AS_AVX
#include <linux/compiler.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#define BLOCK4(i) \
BLOCK(32 * i, 0) \
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
deleted file mode 100644
index c9a6d68b8d62..000000000000
--- a/arch/x86/include/asm/xsave.h
+++ /dev/null
@@ -1,257 +0,0 @@
-#ifndef __ASM_X86_XSAVE_H
-#define __ASM_X86_XSAVE_H
-
-#include <linux/types.h>
-#include <asm/processor.h>
-
-#define XSTATE_CPUID 0x0000000d
-
-#define XSTATE_FP 0x1
-#define XSTATE_SSE 0x2
-#define XSTATE_YMM 0x4
-#define XSTATE_BNDREGS 0x8
-#define XSTATE_BNDCSR 0x10
-#define XSTATE_OPMASK 0x20
-#define XSTATE_ZMM_Hi256 0x40
-#define XSTATE_Hi16_ZMM 0x80
-
-#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
-#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
-/* Bit 63 of XCR0 is reserved for future expansion */
-#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
-
-#define FXSAVE_SIZE 512
-
-#define XSAVE_HDR_SIZE 64
-#define XSAVE_HDR_OFFSET FXSAVE_SIZE
-
-#define XSAVE_YMM_SIZE 256
-#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
-
-/* Supported features which support lazy state saving */
-#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
- | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
-
-#ifdef CONFIG_X86_64
-#define REX_PREFIX "0x48, "
-#else
-#define REX_PREFIX
-#endif
-
-extern unsigned int xstate_size;
-extern u64 pcntxt_mask;
-extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
-extern struct xsave_struct *init_xstate_buf;
-
-extern void xsave_init(void);
-extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
-extern int init_fpu(struct task_struct *child);
-
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-
-#define xstate_fault ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err)
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err = 0;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- return err;
-}
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err = 0;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XRSTORS"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- else
- asm volatile("1:"XRSTOR"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- return err;
-}
-
-/*
- * Save processor xstate to xsave area.
- */
-static inline int xsave_state(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err = 0;
-
- /*
- * If xsaves is enabled, xsaves replaces xsaveopt because
- * it supports compact format and supervisor states in addition to
- * modified optimization in xsaveopt.
- *
- * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
- * because xsaveopt supports modified optimization which is not
- * supported by xsave.
- *
- * If none of xsaves and xsaveopt is enabled, use xsave.
- */
- alternative_input_2(
- "1:"XSAVE,
- XSAVEOPT,
- X86_FEATURE_XSAVEOPT,
- XSAVES,
- X86_FEATURE_XSAVES,
- [fx] "D" (fx), "a" (lmask), "d" (hmask) :
- "memory");
- asm volatile("2:\n\t"
- xstate_fault
- : "0" (0)
- : "memory");
-
- return err;
-}
-
-/*
- * Restore processor xstate from xsave area.
- */
-static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
-{
- int err = 0;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- /*
- * Use xrstors to restore context if it is enabled. xrstors supports
- * compacted format of xsave area which is not supported by xrstor.
- */
- alternative_input(
- "1: " XRSTOR,
- XRSTORS,
- X86_FEATURE_XSAVES,
- "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-
- asm volatile("2:\n"
- xstate_fault
- : "0" (0)
- : "memory");
-
- return err;
-}
-
-/*
- * Save xstate context for old process during context switch.
- */
-static inline void fpu_xsave(struct fpu *fpu)
-{
- xsave_state(&fpu->state->xsave, -1);
-}
-
-/*
- * Restore xstate context for new process during context switch.
- */
-static inline int fpu_xrstor_checking(struct xsave_struct *fx)
-{
- return xrstor_state(fx, -1);
-}
-
-/*
- * Save xstate to user space xsave area.
- *
- * We don't use modified optimization because xrstor/xrstors might track
- * a different application.
- *
- * We don't use compacted format xsave area for
- * backward compatibility for old applications which don't understand
- * compacted format of xsave area.
- */
-static inline int xsave_user(struct xsave_struct __user *buf)
-{
- int err;
-
- /*
- * Clear the xsave header first, so that reserved fields are
- * initialized to zero.
- */
- err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
- if (unlikely(err))
- return -EFAULT;
-
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XSAVE"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault
- : "D" (buf), "a" (-1), "d" (-1), "0" (0)
- : "memory");
- return err;
-}
-
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
-{
- int err = 0;
- struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XRSTOR"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault
- : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
- : "memory"); /* memory required? */
- return err;
-}
-
-void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
-void setup_xstate_comp(void);
-
-#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c469490db4a8..3c6bb342a48f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -140,6 +140,7 @@
#define MSR_CORE_C3_RESIDENCY 0x000003fc
#define MSR_CORE_C6_RESIDENCY 0x000003fd
#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
#define MSR_PKG_C2_RESIDENCY 0x0000060d
#define MSR_PKG_C8_RESIDENCY 0x00000630
#define MSR_PKG_C9_RESIDENCY 0x00000631
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 16dc4e8a2cd3..0e8a973de9ee 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -25,7 +25,7 @@ struct _fpx_sw_bytes {
__u32 extended_size; /* total size of the layout referred by
* fpstate pointer in the sigcontext.
*/
- __u64 xstate_bv;
+ __u64 xfeatures;
/* feature bit mask (including fp/sse/extended
* state) that is present in the memory
* layout.
@@ -209,8 +209,8 @@ struct sigcontext {
#endif /* !__i386__ */
-struct _xsave_hdr {
- __u64 xstate_bv;
+struct _header {
+ __u64 xfeatures;
__u64 reserved1[2];
__u64 reserved2[5];
};
@@ -228,7 +228,7 @@ struct _ymmh_state {
*/
struct _xstate {
struct _fpstate fpstate;
- struct _xsave_hdr xstate_hdr;
+ struct _header xstate_hdr;
struct _ymmh_state ymmh;
/* new processor state extensions go here */
};
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bcd0b56ca17..febaf180621b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,7 +44,7 @@ obj-y += pci-iommu_table.o
obj-y += resource.o
obj-y += process.o
-obj-y += i387.o xsave.o
+obj-y += fpu/
obj-y += ptrace.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index aef653193160..7fe097235376 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -21,6 +21,10 @@
#include <asm/io.h>
#include <asm/fixmap.h>
+int __read_mostly alternatives_patched;
+
+EXPORT_SYMBOL_GPL(alternatives_patched);
+
#define MAX_PATCH_LEN (255-1)
static int __initdata_or_module debug_alternative;
@@ -627,6 +631,7 @@ void __init alternative_instructions(void)
apply_paravirt(__parainstructions, __parainstructions_end);
restart_nmi();
+ alternatives_patched = 1;
}
/**
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 5caed1dd7ccf..29fa475ec518 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -89,9 +89,7 @@ int amd_cache_northbridges(void)
next_northbridge(link, amd_nb_link_ids);
}
- /* GART present only on Fam15h upto model 0fh */
- if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
+ if (amd_gart_present())
amd_northbridges.flags |= AMD_NB_GART;
/*
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 76164e173a24..6e85f713641d 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -262,6 +262,9 @@ void __init early_gart_iommu_check(void)
u64 aper_base = 0, last_aper_base = 0;
int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0;
+ if (!amd_gart_present())
+ return;
+
if (!early_pci_allowed())
return;
@@ -355,6 +358,9 @@ int __init gart_iommu_hole_init(void)
int fix, slot, valid_agp = 0;
int i, node;
+ if (!amd_gart_present())
+ return -ENODEV;
+
if (gart_iommu_aperture_disabled || !fix_aperture ||
!early_pci_allowed())
return -ENODEV;
@@ -452,7 +458,7 @@ out:
force_iommu ||
valid_agp ||
fallback_aper_force) {
- pr_info("Your BIOS doesn't leave a aperture memory hole\n");
+ pr_info("Your BIOS doesn't leave an aperture memory hole\n");
pr_info("Please enable the IOMMU option in the BIOS setup\n");
pr_info("This costs you %dMB of RAM\n",
32 << fallback_aper_order);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 9f6b9341950f..b27f6ec90caa 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -41,6 +41,25 @@ void common(void) {
OFFSET(pbe_orig_address, pbe, orig_address);
OFFSET(pbe_next, pbe, next);
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+ BLANK();
+ OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
+ OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
+ OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
+ OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
+ OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
+ OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
+ OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
+ OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
+ OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
+
+ BLANK();
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+
+ BLANK();
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
+#endif
+
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 47703aed74cf..6ce39025f467 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -17,17 +17,6 @@ void foo(void);
void foo(void)
{
- OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
- OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
- OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
- OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
- OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
- OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
- OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
- OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
- OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
- BLANK();
-
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
@@ -37,10 +26,6 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_sysenter_return, thread_info, sysenter_return);
- OFFSET(TI_cpu, thread_info, cpu);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -60,9 +45,6 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
- BLANK();
-
OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 5ce6f2da8763..dcaab87da629 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -29,27 +29,6 @@ int main(void)
BLANK();
#endif
-#ifdef CONFIG_IA32_EMULATION
- OFFSET(TI_sysenter_return, thread_info, sysenter_return);
- BLANK();
-
-#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
- ENTRY(ax);
- ENTRY(bx);
- ENTRY(cx);
- ENTRY(dx);
- ENTRY(si);
- ENTRY(di);
- ENTRY(bp);
- ENTRY(sp);
- ENTRY(ip);
- BLANK();
-#undef ENTRY
-
- OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
- BLANK();
-#endif
-
#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(cx);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e4cf63301ff4..56cae1964a81 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -19,6 +19,13 @@
#include "cpu.h"
+/*
+ * nodes_per_socket: Stores the number of nodes per socket.
+ * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
+ * Node Identifiers[10:8]
+ */
+static u32 nodes_per_socket = 1;
+
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -291,7 +298,7 @@ static int nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes, cores_per_cu = 1;
+ u32 cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -300,7 +307,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
- nodes = ((ecx >> 8) & 7) + 1;
+ nodes_per_socket = ((ecx >> 8) & 7) + 1;
node_id = ecx & 7;
/* get compute unit information */
@@ -311,18 +318,18 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
- nodes = ((value >> 3) & 7) + 1;
+ nodes_per_socket = ((value >> 3) & 7) + 1;
node_id = value & 7;
} else
return;
/* fixup multi-node processor information */
- if (nodes > 1) {
+ if (nodes_per_socket > 1) {
u32 cores_per_node;
u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
- cores_per_node = c->x86_max_cores / nodes;
+ cores_per_node = c->x86_max_cores / nodes_per_socket;
cus_per_node = cores_per_node / cores_per_cu;
/* store NodeID, use llc_shared_map to store sibling info */
@@ -366,6 +373,12 @@ u16 amd_get_nb_id(int cpu)
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
+u32 amd_get_nodes_per_socket(void)
+{
+ return nodes_per_socket;
+}
+EXPORT_SYMBOL_GPL(amd_get_nodes_per_socket);
+
static void srat_detect_node(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_NUMA
@@ -520,8 +533,16 @@ static void early_init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
- /* check CPU config space for extended APIC ID */
- if (cpu_has_apic && c->x86 >= 0xf) {
+ /*
+ * ApicID can always be treated as an 8-bit value for AMD APIC versions
+ * >= 0x10, but even old K8s came out of reset with version 0x10. So, we
+ * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
+ * after 16h.
+ */
+ if (cpu_has_apic && c->x86 > 0x16) {
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ } else if (cpu_has_apic && c->x86 >= 0xf) {
+ /* check CPU config space for extended APIC ID */
unsigned int val;
val = read_pci_config(0, 24, 0, 0x68);
if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03445346ee0a..bd17db15a2c1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,57 +12,11 @@
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
-#include <asm/i387.h>
+#include <asm/fpu/internal.h>
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
-static double __initdata x = 4195835.0;
-static double __initdata y = 3145727.0;
-
-/*
- * This used to check for exceptions..
- * However, it turns out that to support that,
- * the XMM trap handlers basically had to
- * be buggy. So let's have a correct XMM trap
- * handler, and forget about printing out
- * some status at boot.
- *
- * We should really only care about bugs here
- * anyway. Not features.
- */
-static void __init check_fpu(void)
-{
- s32 fdiv_bug;
-
- kernel_fpu_begin();
-
- /*
- * trap_init() enabled FXSR and company _before_ testing for FP
- * problems here.
- *
- * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
- */
- __asm__("fninit\n\t"
- "fldl %1\n\t"
- "fdivl %2\n\t"
- "fmull %2\n\t"
- "fldl %1\n\t"
- "fsubp %%st,%%st(1)\n\t"
- "fistpl %0\n\t"
- "fwait\n\t"
- "fninit"
- : "=m" (*&fdiv_bug)
- : "m" (*&x), "m" (*&y));
-
- kernel_fpu_end();
-
- if (fdiv_bug) {
- set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
- pr_warn("Hmm, FPU with FDIV bug\n");
- }
-}
-
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -85,10 +39,5 @@ void __init check_bugs(void)
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
- /*
- * kernel_fpu_begin/end() in check_fpu() relies on the patched
- * alternative instructions.
- */
- if (cpu_has_fpu)
- check_fpu();
+ fpu__init_check_bugs();
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a62cf04dac8a..b28e5262a0a5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/string.h>
+#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/init.h>
@@ -31,8 +32,7 @@
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
@@ -145,32 +145,21 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
-static int __init x86_xsave_setup(char *s)
+static int __init x86_mpx_setup(char *s)
{
+ /* require an exact match without trailing characters */
if (strlen(s))
return 0;
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
- setup_clear_cpu_cap(X86_FEATURE_AVX);
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
- return 1;
-}
-__setup("noxsave", x86_xsave_setup);
-static int __init x86_xsaveopt_setup(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
- return 1;
-}
-__setup("noxsaveopt", x86_xsaveopt_setup);
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_MPX))
+ return 1;
-static int __init x86_xsaves_setup(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+ setup_clear_cpu_cap(X86_FEATURE_MPX);
+ pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
return 1;
}
-__setup("noxsaves", x86_xsaves_setup);
+__setup("nompx", x86_mpx_setup);
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
@@ -183,14 +172,6 @@ static int __init cachesize_setup(char *str)
}
__setup("cachesize=", cachesize_setup);
-static int __init x86_fxsr_setup(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_FXSR);
- setup_clear_cpu_cap(X86_FEATURE_XMM);
- return 1;
-}
-__setup("nofxsr", x86_fxsr_setup);
-
static int __init x86_sep_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_SEP);
@@ -419,7 +400,7 @@ static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
static void get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
- char *p, *q;
+ char *p, *q, *s;
if (c->extended_cpuid_level < 0x80000004)
return;
@@ -430,19 +411,21 @@ static void get_model_name(struct cpuinfo_x86 *c)
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
c->x86_model_id[48] = 0;
- /*
- * Intel chips right-justify this string for some dumb reason;
- * undo that brain damage:
- */
- p = q = &c->x86_model_id[0];
+ /* Trim whitespace */
+ p = q = s = &c->x86_model_id[0];
+
while (*p == ' ')
p++;
- if (p != q) {
- while (*p)
- *q++ = *p++;
- while (q <= &c->x86_model_id[48])
- *q++ = '\0'; /* Zero-pad the rest */
+
+ while (*p) {
+ /* Note the last non-whitespace index */
+ if (!isspace(*p))
+ s = q;
+
+ *q++ = *p++;
}
+
+ *(s + 1) = '\0';
}
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
@@ -759,7 +742,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_detect(c);
get_cpu_vendor(c);
get_cpu_cap(c);
- fpu_detect(c);
+ fpu__init_system(c);
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
@@ -1122,7 +1105,7 @@ void print_cpu_info(struct cpuinfo_x86 *c)
printk(KERN_CONT "%s ", vendor);
if (c->x86_model_id[0])
- printk(KERN_CONT "%s", strim(c->x86_model_id));
+ printk(KERN_CONT "%s", c->x86_model_id);
else
printk(KERN_CONT "%d86", c->x86);
@@ -1183,8 +1166,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
-DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
-
/*
* Special IST stacks which the CPU switches to when it calls
* an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1275,7 +1256,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
-DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
/*
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
@@ -1439,7 +1419,7 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- fpu_init();
+ fpu__init_cpu();
if (is_uv_system())
uv_cpu_init();
@@ -1495,7 +1475,7 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- fpu_init();
+ fpu__init_cpu();
}
#endif
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 36ce402a3fa5..d820d8eae96b 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -27,8 +27,8 @@
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
-#ifdef CONFIG_XEN_PVHVM
- &x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+ &x86_hyper_xen,
#endif
&x86_hyper_vmware,
&x86_hyper_ms_hyperv,
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533d5ab8..95cf78d44ab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -53,9 +53,12 @@
static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define rcu_dereference_check_mce(p) \
- rcu_dereference_index_check((p), \
- rcu_read_lock_sched_held() || \
- lockdep_is_held(&mce_chrdev_read_mutex))
+({ \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&mce_chrdev_read_mutex), \
+ "suspicious rcu_dereference_check_mce() usage"); \
+ smp_load_acquire(&(p)); \
+})
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -708,6 +711,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
int i, ret = 0;
+ char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +720,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
}
- if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
- MCE_PANIC_SEVERITY)
+
+ if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ *msg = tmp;
ret = 1;
+ }
}
return ret;
}
@@ -1884,7 +1890,7 @@ out:
static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
- if (rcu_access_index(mcelog.next))
+ if (READ_ONCE(mcelog.next))
return POLLIN | POLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return POLLIN | POLLRDNORM;
@@ -1929,8 +1935,8 @@ void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
}
EXPORT_SYMBOL_GPL(register_mce_write_callback);
-ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
- size_t usize, loff_t *off)
+static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
+ size_t usize, loff_t *off)
{
if (mce_write)
return mce_write(filp, ubuf, usize, off);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 87848ebe2bb7..5801a14f7524 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
}
static atomic_t active_events;
+static atomic_t pmc_refcount;
static DEFINE_MUTEX(pmc_reserve_mutex);
#ifdef CONFIG_X86_LOCAL_APIC
@@ -190,6 +191,7 @@ static bool check_hw_exists(void)
u64 val, val_fail, val_new= ~0;
int i, reg, reg_fail, ret = 0;
int bios_fail = 0;
+ int reg_safe = -1;
/*
* Check to see if the BIOS enabled any of the counters, if so
@@ -204,6 +206,8 @@ static bool check_hw_exists(void)
bios_fail = 1;
val_fail = val;
reg_fail = reg;
+ } else {
+ reg_safe = i;
}
}
@@ -222,11 +226,22 @@ static bool check_hw_exists(void)
}
/*
+ * If all the counters are enabled, the below test will always
+ * fail. The tools will also become useless in this scenario.
+ * Just fail and disable the hardware counters.
+ */
+
+ if (reg_safe == -1) {
+ reg = reg_safe;
+ goto msr_fail;
+ }
+
+ /*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- reg = x86_pmu_event_addr(0);
+ reg = x86_pmu_event_addr(reg_safe);
if (rdmsrl_safe(reg, &val))
goto msr_fail;
val ^= 0xffffUL;
@@ -256,11 +271,8 @@ msr_fail:
static void hw_perf_event_destroy(struct perf_event *event)
{
- if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
- release_pmc_hardware();
- release_ds_buffers();
- mutex_unlock(&pmc_reserve_mutex);
- }
+ x86_release_hardware();
+ atomic_dec(&active_events);
}
void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -310,6 +322,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
return x86_pmu_extra_regs(val, event);
}
+int x86_reserve_hardware(void)
+{
+ int err = 0;
+
+ if (!atomic_inc_not_zero(&pmc_refcount)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&pmc_refcount) == 0) {
+ if (!reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ reserve_ds_buffers();
+ }
+ if (!err)
+ atomic_inc(&pmc_refcount);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+
+ return err;
+}
+
+void x86_release_hardware(void)
+{
+ if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
+ release_ds_buffers();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
/*
* Check if we can create event of a certain type (that no conflicting events
* are present).
@@ -322,21 +363,34 @@ int x86_add_exclusive(unsigned int what)
return 0;
mutex_lock(&pmc_reserve_mutex);
- for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+ for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
goto out;
+ }
atomic_inc(&x86_pmu.lbr_exclusive[what]);
ret = 0;
out:
mutex_unlock(&pmc_reserve_mutex);
+
+ /*
+ * Assuming that all exclusive events will share the PMI handler
+ * (which checks active_events for whether there is work to do),
+ * we can bump active_events counter right here, except for
+ * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
+ * path, which already bumps active_events for them.
+ */
+ if (!ret && what != x86_lbr_exclusive_lbr)
+ atomic_inc(&active_events);
+
return ret;
}
void x86_del_exclusive(unsigned int what)
{
atomic_dec(&x86_pmu.lbr_exclusive[what]);
+ atomic_dec(&active_events);
}
int x86_setup_perfctr(struct perf_event *event)
@@ -513,22 +567,11 @@ static int __x86_pmu_event_init(struct perf_event *event)
if (!x86_pmu_initialized())
return -ENODEV;
- err = 0;
- if (!atomic_inc_not_zero(&active_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&active_events) == 0) {
- if (!reserve_pmc_hardware())
- err = -EBUSY;
- else
- reserve_ds_buffers();
- }
- if (!err)
- atomic_inc(&active_events);
- mutex_unlock(&pmc_reserve_mutex);
- }
+ err = x86_reserve_hardware();
if (err)
return err;
+ atomic_inc(&active_events);
event->destroy = hw_perf_event_destroy;
event->hw.idx = -1;
@@ -611,6 +654,7 @@ struct sched_state {
int event; /* event index */
int counter; /* counter index */
int unassigned; /* number of events to be assigned left */
+ int nr_gp; /* number of GP counters used */
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
@@ -620,27 +664,29 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct perf_event **events;
- struct sched_state state;
+ int max_gp;
int saved_states;
+ struct event_constraint **constraints;
+ struct sched_state state;
struct sched_state saved[SCHED_STATES_MAX];
};
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
- int num, int wmin, int wmax)
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+ int num, int wmin, int wmax, int gpmax)
{
int idx;
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->events = events;
+ sched->max_gp = gpmax;
+ sched->constraints = constraints;
for (idx = 0; idx < num; idx++) {
- if (events[idx]->hw.constraint->weight == wmin)
+ if (constraints[idx]->weight == wmin)
break;
}
@@ -687,7 +733,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
if (sched->state.event >= sched->max_events)
return false;
- c = sched->events[sched->state.event]->hw.constraint;
+ c = sched->constraints[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -696,11 +742,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
goto done;
}
}
+
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
- if (!__test_and_set_bit(idx, sched->state.used))
+ if (!__test_and_set_bit(idx, sched->state.used)) {
+ if (sched->state.nr_gp++ >= sched->max_gp)
+ return false;
+
goto done;
+ }
}
return false;
@@ -745,7 +796,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->events[sched->state.event]->hw.constraint;
+ c = sched->constraints[sched->state.event];
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
@@ -756,12 +807,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct perf_event **events, int n,
- int wmin, int wmax, int *assign)
+int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int gpmax, int *assign)
{
struct perf_sched sched;
- perf_sched_init(&sched, events, n, wmin, wmax);
+ perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
do {
if (!perf_sched_find_counter(&sched))
@@ -788,9 +839,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
x86_pmu.start_scheduling(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- hwc = &cpuc->event_list[i]->hw;
+ cpuc->event_constraint[i] = NULL;
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
- hwc->constraint = c;
+ cpuc->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
@@ -801,7 +852,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = hwc->constraint;
+ c = cpuc->event_constraint[i];
/* never assigned */
if (hwc->idx == -1)
@@ -821,9 +872,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
}
/* slow path */
- if (i != n)
- unsched = perf_assign_events(cpuc->event_list, n, wmin,
- wmax, assign);
+ if (i != n) {
+ int gpmax = x86_pmu.num_counters;
+
+ /*
+ * Do not allow scheduling of more than half the available
+ * generic counters.
+ *
+ * This helps avoid counter starvation of sibling thread by
+ * ensuring at most half the counters cannot be in exclusive
+ * mode. There is no designated counters for the limits. Any
+ * N/2 counters can be used. This helps with events with
+ * specific counter constraints.
+ */
+ if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+ READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+ gpmax /= 2;
+
+ unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
+ wmax, gpmax, assign);
+ }
/*
* In case of success (unsched = 0), mark events as committed,
@@ -840,12 +908,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
e = cpuc->event_list[i];
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
- x86_pmu.commit_scheduling(cpuc, e, assign[i]);
+ x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
- }
-
- if (!assign || unsched) {
-
+ } else {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
/*
@@ -1058,13 +1123,16 @@ int x86_perf_event_set_period(struct perf_event *event)
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
- /*
- * The hw event starts counting from this event offset,
- * mark it to be able to extra future deltas:
- */
- local64_set(&hwc->prev_count, (u64)-left);
+ if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+ local64_read(&hwc->prev_count) != (u64)-left) {
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extra future deltas:
+ */
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ }
/*
* Due to erratum on certan cpu we need
@@ -1292,8 +1360,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)
x86_pmu.put_event_constraints(cpuc, event);
/* Delete the array entry. */
- while (++i < cpuc->n_events)
+ while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
+ cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ }
--cpuc->n_events;
perf_event_update_userpage(event);
@@ -1374,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
u64 finish_clock;
int ret;
+ /*
+ * All PMUs/events that share this PMI handler should make sure to
+ * increment active_events for their events.
+ */
if (!atomic_read(&active_events))
return NMI_DONE;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6ac5cb7a9e14..3e7fd27dfe20 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -74,6 +74,9 @@ struct event_constraint {
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
struct amd_nb {
@@ -87,6 +90,18 @@ struct amd_nb {
#define MAX_PEBS_EVENTS 8
/*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+ PERF_SAMPLE_TRANSACTION)
+
+/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
@@ -132,10 +147,7 @@ enum intel_excl_state_type {
};
struct intel_excl_states {
- enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
- int num_alloc_cntrs;/* #counters allocated */
- int max_alloc_cntrs;/* max #counters allowed */
bool sched_started; /* true if scheduling has started */
};
@@ -144,6 +156,11 @@ struct intel_excl_cntrs {
struct intel_excl_states states[2];
+ union {
+ u16 has_exclusive[2];
+ u32 exclusive_present;
+ };
+
int refcnt; /* per-core: #HT threads */
unsigned core_id; /* per-core: core id */
};
@@ -172,7 +189,11 @@ struct cpu_hw_events {
added in the current transaction */
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
+
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+
+ int n_excl; /* the number of exclusive events */
unsigned int group_flag;
int is_fake;
@@ -519,12 +540,10 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
- void (*commit_scheduling)(struct cpu_hw_events *cpuc,
- struct perf_event *event,
- int cntr);
-
void (*start_scheduling)(struct cpu_hw_events *cpuc);
+ void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
void (*stop_scheduling)(struct cpu_hw_events *cpuc);
struct event_constraint *event_constraints;
@@ -697,6 +716,10 @@ int x86_add_exclusive(unsigned int what);
void x86_del_exclusive(unsigned int what);
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
void hw_perf_lbr_event_destroy(struct perf_event *event);
int x86_setup_perfctr(struct perf_event *event);
@@ -717,8 +740,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added);
-int perf_assign_events(struct perf_event **events, int n,
- int wmin, int wmax, int *assign);
+int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int gpmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
void x86_pmu_stop(struct perf_event *event, int flags);
@@ -860,6 +883,8 @@ void intel_pmu_pebs_enable_all(void);
void intel_pmu_pebs_disable_all(void);
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
@@ -929,4 +954,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
return NULL;
}
+static inline int is_ht_workaround_enabled(void)
+{
+ return 0;
+}
#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 219d3fb423a1..b9826a981fb2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
- [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
@@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
- /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
- [ C(RESULT_MISS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
@@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
- [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -1904,9 +1903,8 @@ static void
intel_start_scheduling(struct cpu_hw_events *cpuc)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl, *xlo;
+ struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid; /* sibling thread */
/*
* nothing needed if in group validation mode
@@ -1917,35 +1915,52 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
/*
* no exclusion needed
*/
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return;
- xlo = &excl_cntrs->states[o_tid];
xl = &excl_cntrs->states[tid];
xl->sched_started = true;
- xl->num_alloc_cntrs = 0;
/*
* lock shared state until we are done scheduling
* in stop_event_scheduling()
* makes scheduling appear as a transaction
*/
- WARN_ON_ONCE(!irqs_disabled());
raw_spin_lock(&excl_cntrs->lock);
+}
- /*
- * save initial state of sibling thread
- */
- memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ struct event_constraint *c = cpuc->event_constraint[idx];
+ struct intel_excl_states *xl;
+ int tid = cpuc->excl_thread_id;
+
+ if (cpuc->is_fake || !is_ht_workaround_enabled())
+ return;
+
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return;
+
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+ return;
+
+ xl = &excl_cntrs->states[tid];
+
+ lockdep_assert_held(&excl_cntrs->lock);
+
+ if (c->flags & PERF_X86_EVENT_EXCL)
+ xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+ else
+ xl->state[cntr] = INTEL_EXCL_SHARED;
}
static void
intel_stop_scheduling(struct cpu_hw_events *cpuc)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl, *xlo;
+ struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid; /* sibling thread */
/*
* nothing needed if in group validation mode
@@ -1955,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
/*
* no exclusion needed
*/
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return;
- xlo = &excl_cntrs->states[o_tid];
xl = &excl_cntrs->states[tid];
- /*
- * make new sibling thread state visible
- */
- memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
-
xl->sched_started = false;
/*
* release shared state lock (acquired in intel_start_scheduling())
@@ -1977,12 +1986,10 @@ static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
int idx, struct event_constraint *c)
{
- struct event_constraint *cx;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl, *xlo;
- int is_excl, i;
+ struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid; /* alternate */
+ int is_excl, i;
/*
* validating a group does not require
@@ -1994,34 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
/*
* no exclusion needed
*/
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return c;
- /*
- * event requires exclusive counter access
- * across HT threads
- */
- is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
- /*
- * xl = state of current HT
- * xlo = state of sibling HT
- */
- xl = &excl_cntrs->states[tid];
- xlo = &excl_cntrs->states[o_tid];
-
- /*
- * do not allow scheduling of more than max_alloc_cntrs
- * which is set to half the available generic counters.
- * this helps avoid counter starvation of sibling thread
- * by ensuring at most half the counters cannot be in
- * exclusive mode. There is not designated counters for the
- * limits. Any N/2 counters can be used. This helps with
- * events with specifix counter constraints
- */
- if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
- return &emptyconstraint;
-
- cx = c;
/*
* because we modify the constraint, we need
@@ -2032,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* been cloned (marked dynamic)
*/
if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-
- /* sanity check */
- if (idx < 0)
- return &emptyconstraint;
+ struct event_constraint *cx;
/*
* grab pre-allocated constraint entry
@@ -2046,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* initialize dynamic constraint
* with static constraint
*/
- memcpy(cx, c, sizeof(*cx));
+ *cx = *c;
/*
* mark constraint as dynamic, so we
* can free it later on
*/
cx->flags |= PERF_X86_EVENT_DYNAMIC;
+ c = cx;
}
/*
@@ -2063,6 +2042,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
*/
/*
+ * state of sibling HT
+ */
+ xlo = &excl_cntrs->states[tid ^ 1];
+
+ /*
+ * event requires exclusive counter access
+ * across HT threads
+ */
+ is_excl = c->flags & PERF_X86_EVENT_EXCL;
+ if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+ event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+ if (!cpuc->n_excl++)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+ }
+
+ /*
* Modify static constraint with current dynamic
* state of thread
*
@@ -2070,44 +2065,44 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
- for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+ for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
- __clear_bit(i, cx->idxmsk);
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ __clear_bit(i, c->idxmsk);
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
- __clear_bit(i, cx->idxmsk);
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ __clear_bit(i, c->idxmsk);
}
/*
* recompute actual bit weight for scheduling algorithm
*/
- cx->weight = hweight64(cx->idxmsk64);
+ c->weight = hweight64(c->idxmsk64);
/*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (cx->weight == 0)
- cx = &emptyconstraint;
+ if (c->weight == 0)
+ c = &emptyconstraint;
- return cx;
+ return c;
}
static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = event->hw.constraint;
+ struct event_constraint *c1 = cpuc->event_constraint[idx];
struct event_constraint *c2;
/*
@@ -2133,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
{
struct hw_perf_event *hwc = &event->hw;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xlo, *xl;
- unsigned long flags = 0; /* keep compiler happy */
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid;
+ struct intel_excl_states *xl;
/*
* nothing needed if in group validation mode
@@ -2144,31 +2137,35 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
if (cpuc->is_fake)
return;
- WARN_ON_ONCE(!excl_cntrs);
-
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return;
- xl = &excl_cntrs->states[tid];
- xlo = &excl_cntrs->states[o_tid];
+ if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+ hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+ if (!--cpuc->n_excl)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+ }
/*
- * put_constraint may be called from x86_schedule_events()
- * which already has the lock held so here make locking
- * conditional
+ * If event was actually assigned, then mark the counter state as
+ * unused now.
*/
- if (!xl->sched_started)
- raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+ if (hwc->idx >= 0) {
+ xl = &excl_cntrs->states[tid];
- /*
- * if event was actually assigned, then mark the
- * counter state as unused now
- */
- if (hwc->idx >= 0)
- xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+ /*
+ * put_constraint may be called from x86_schedule_events()
+ * which already has the lock held so here make locking
+ * conditional.
+ */
+ if (!xl->sched_started)
+ raw_spin_lock(&excl_cntrs->lock);
+
+ xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
- if (!xl->sched_started)
- raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+ if (!xl->sched_started)
+ raw_spin_unlock(&excl_cntrs->lock);
+ }
}
static void
@@ -2189,8 +2186,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = event->hw.constraint;
-
intel_put_shared_regs_event_constraints(cpuc, event);
/*
@@ -2198,48 +2193,8 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
* all events are subject to and must call the
* put_excl_constraints() routine
*/
- if (c && cpuc->excl_cntrs)
+ if (cpuc->excl_cntrs)
intel_put_excl_constraints(cpuc, event);
-
- /* cleanup dynamic constraint */
- if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
- event->hw.constraint = NULL;
-}
-
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
- struct perf_event *event, int cntr)
-{
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct event_constraint *c = event->hw.constraint;
- struct intel_excl_states *xlo, *xl;
- int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid;
- int is_excl;
-
- if (cpuc->is_fake || !c)
- return;
-
- is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
- return;
-
- WARN_ON_ONCE(!excl_cntrs);
-
- if (!excl_cntrs)
- return;
-
- xl = &excl_cntrs->states[tid];
- xlo = &excl_cntrs->states[o_tid];
-
- WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
-
- if (cntr >= 0) {
- if (is_excl)
- xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
- else
- xlo->init_state[cntr] = INTEL_EXCL_SHARED;
- }
}
static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -2305,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
- if (event->attr.precise_ip && x86_pmu.pebs_aliases)
- x86_pmu.pebs_aliases(event);
+ if (event->attr.precise_ip) {
+ if (!event->attr.freq) {
+ event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+ if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+ event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+ }
+ if (x86_pmu.pebs_aliases)
+ x86_pmu.pebs_aliases(event);
+ }
if (needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
@@ -2533,34 +2495,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
-static __initconst const struct x86_pmu core_pmu = {
- .name = "core",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
- .enable_all = core_pmu_enable_all,
- .enable = core_pmu_enable_event,
- .disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .event_map = intel_pmu_event_map,
- .max_events = ARRAY_SIZE(intel_perfmon_event_map),
- .apic = 1,
- /*
- * Intel PMCs cannot be accessed sanely above 32 bit width,
- * so we install an artificial 1<<31 period regardless of
- * the generic event period:
- */
- .max_period = (1ULL << 31) - 1,
- .get_event_constraints = intel_get_event_constraints,
- .put_event_constraints = intel_put_event_constraints,
- .event_constraints = intel_core_event_constraints,
- .guest_get_msrs = core_guest_get_msrs,
- .format_attrs = intel_arch_formats_attr,
- .events_sysfs_show = intel_event_sysfs_show,
-};
-
struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
@@ -2583,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
{
struct intel_excl_cntrs *c;
- int i;
c = kzalloc_node(sizeof(struct intel_excl_cntrs),
GFP_KERNEL, cpu_to_node(cpu));
if (c) {
raw_spin_lock_init(&c->lock);
- for (i = 0; i < X86_PMC_IDX_MAX; i++) {
- c->states[0].state[i] = INTEL_EXCL_UNUSED;
- c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
-
- c->states[1].state[i] = INTEL_EXCL_UNUSED;
- c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
- }
c->core_id = -1;
}
return c;
@@ -2650,7 +2576,7 @@ static void intel_pmu_cpu_starting(int cpu)
if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
- for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ for_each_cpu(i, topology_sibling_cpumask(cpu)) {
struct intel_shared_regs *pc;
pc = per_cpu(cpu_hw_events, i).shared_regs;
@@ -2668,9 +2594,7 @@ static void intel_pmu_cpu_starting(int cpu)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
- int h = x86_pmu.num_counters >> 1;
-
- for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ for_each_cpu(i, topology_sibling_cpumask(cpu)) {
struct intel_excl_cntrs *c;
c = per_cpu(cpu_hw_events, i).excl_cntrs;
@@ -2683,11 +2607,6 @@ static void intel_pmu_cpu_starting(int cpu)
}
cpuc->excl_cntrs->core_id = core_id;
cpuc->excl_cntrs->refcnt++;
- /*
- * set hard limit to half the number of generic counters
- */
- cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
- cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
}
}
@@ -2723,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+ bool sched_in)
+{
+ if (x86_pmu.pebs_active)
+ intel_pmu_pebs_sched_task(ctx, sched_in);
+ if (x86_pmu.lbr_nr)
+ intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2743,6 +2671,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
NULL,
};
+static __initconst const struct x86_pmu core_pmu = {
+ .name = "core",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = core_pmu_enable_all,
+ .enable = core_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .event_map = intel_pmu_event_map,
+ .max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
+ /*
+ * Intel PMCs cannot be accessed sanely above 32-bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic event period:
+ */
+ .max_period = (1ULL<<31) - 1,
+ .get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
+ .event_constraints = intel_core_event_constraints,
+ .guest_get_msrs = core_guest_get_msrs,
+ .format_attrs = intel_arch_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
+
+ /*
+ * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+ * together with PMU version 1 and thus be using core_pmu with
+ * shared_regs. We need following callbacks here to allocate
+ * it properly.
+ */
+ .cpu_prepare = intel_pmu_cpu_prepare,
+ .cpu_starting = intel_pmu_cpu_starting,
+ .cpu_dying = intel_pmu_cpu_dying,
+};
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -2774,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
- .sched_task = intel_pmu_lbr_sched_task,
+ .sched_task = intel_pmu_sched_task,
};
static __init void intel_clovertown_quirk(void)
@@ -2947,8 +2913,8 @@ static __init void intel_ht_bug(void)
{
x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
- x86_pmu.commit_scheduling = intel_commit_scheduling;
x86_pmu.start_scheduling = intel_start_scheduling;
+ x86_pmu.commit_scheduling = intel_commit_scheduling;
x86_pmu.stop_scheduling = intel_stop_scheduling;
}
@@ -3261,6 +3227,8 @@ __init int intel_pmu_init(void)
case 61: /* 14nm Broadwell Core-M */
case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3330,13 +3298,13 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != FIXED_EVENT_FLAGS
- || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
- continue;
+ if (c->cmask == FIXED_EVENT_FLAGS
+ && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+ c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
}
-
- c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
- c->weight += x86_pmu.num_counters;
+ c->idxmsk64 &=
+ ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+ c->weight = hweight64(c->idxmsk64);
}
}
@@ -3394,7 +3362,7 @@ static __init int fixup_ht_bug(void)
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_thread_cpumask(cpu));
+ w = cpumask_weight(topology_sibling_cpumask(cpu));
if (w > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
@@ -3404,8 +3372,8 @@ static __init int fixup_ht_bug(void)
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
- x86_pmu.commit_scheduling = NULL;
x86_pmu.start_scheduling = NULL;
+ x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
watchdog_nmi_enable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index ac1f0c55f379..7795f3f8b1d5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode)
static void bts_event_destroy(struct perf_event *event)
{
+ x86_release_hardware();
x86_del_exclusive(x86_lbr_exclusive_bts);
}
static int bts_event_init(struct perf_event *event)
{
+ int ret;
+
if (event->attr.type != bts_pmu.type)
return -ENOENT;
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;
+ ret = x86_reserve_hardware();
+ if (ret) {
+ x86_del_exclusive(x86_lbr_exclusive_bts);
+ return ret;
+ }
+
event->destroy = bts_event_destroy;
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b738fa..188076161c1b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -13,16 +13,35 @@
#define MSR_IA32_QM_CTR 0x0c8e
#define MSR_IA32_QM_EVTSEL 0x0c8d
-static unsigned int cqm_max_rmid = -1;
+static u32 cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-struct intel_cqm_state {
- raw_spinlock_t lock;
- int rmid;
- int cnt;
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid: The cached Resource Monitoring ID
+ * @closid: The cached Class Of Service ID
+ * @rmid_usecnt: The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+ u32 rmid;
+ u32 closid;
+ int rmid_usecnt;
};
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
/*
* Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -57,7 +76,7 @@ static cpumask_t cqm_cpumask;
* near-zero occupancy value, i.e. no cachelines are tagged with this
* RMID, once __intel_cqm_rmid_rotate() returns.
*/
-static unsigned int intel_cqm_rotation_rmid;
+static u32 intel_cqm_rotation_rmid;
#define INVALID_RMID (-1)
@@ -69,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
* Likewise, an rmid value of -1 is used to indicate "no rmid currently
* assigned" and is used as part of the rotation code.
*/
-static inline bool __rmid_valid(unsigned int rmid)
+static inline bool __rmid_valid(u32 rmid)
{
if (!rmid || rmid == INVALID_RMID)
return false;
@@ -77,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
return true;
}
-static u64 __rmid_read(unsigned int rmid)
+static u64 __rmid_read(u32 rmid)
{
u64 val;
@@ -102,7 +121,7 @@ enum rmid_recycle_state {
};
struct cqm_rmid_entry {
- unsigned int rmid;
+ u32 rmid;
enum rmid_recycle_state state;
struct list_head list;
unsigned long queue_time;
@@ -147,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
*/
static struct cqm_rmid_entry **cqm_rmid_ptrs;
-static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
{
struct cqm_rmid_entry *entry;
@@ -162,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
*
* We expect to be called with cache_mutex held.
*/
-static int __get_rmid(void)
+static u32 __get_rmid(void)
{
struct cqm_rmid_entry *entry;
@@ -177,7 +196,7 @@ static int __get_rmid(void)
return entry->rmid;
}
-static void __put_rmid(unsigned int rmid)
+static void __put_rmid(u32 rmid)
{
struct cqm_rmid_entry *entry;
@@ -372,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
}
struct rmid_read {
- unsigned int rmid;
+ u32 rmid;
atomic64_t value;
};
@@ -381,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
/*
* Exchange the RMID of a group of events.
*/
-static unsigned int
-intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
{
struct perf_event *event;
- unsigned int old_rmid = group->hw.cqm_rmid;
struct list_head *head = &group->hw.cqm_group_entry;
+ u32 old_rmid = group->hw.cqm_rmid;
lockdep_assert_held(&cache_mutex);
@@ -451,7 +469,7 @@ static void intel_cqm_stable(void *arg)
* If we have group events waiting for an RMID that don't conflict with
* events already running, assign @rmid.
*/
-static bool intel_cqm_sched_in_event(unsigned int rmid)
+static bool intel_cqm_sched_in_event(u32 rmid)
{
struct perf_event *leader, *event;
@@ -598,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
static void __intel_cqm_pick_and_rotate(struct perf_event *next)
{
struct perf_event *rotor;
- unsigned int rmid;
+ u32 rmid;
lockdep_assert_held(&cache_mutex);
@@ -626,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
{
struct perf_event *group, *g;
- unsigned int rmid;
+ u32 rmid;
lockdep_assert_held(&cache_mutex);
@@ -828,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
struct perf_event **group)
{
struct perf_event *iter;
- unsigned int rmid;
bool conflict = false;
+ u32 rmid;
list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
rmid = iter->hw.cqm_rmid;
@@ -860,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
static void intel_cqm_event_read(struct perf_event *event)
{
unsigned long flags;
- unsigned int rmid;
+ u32 rmid;
u64 val;
/*
@@ -961,55 +979,48 @@ out:
static void intel_cqm_event_start(struct perf_event *event, int mode)
{
- struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
- unsigned int rmid = event->hw.cqm_rmid;
- unsigned long flags;
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 rmid = event->hw.cqm_rmid;
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;
event->hw.cqm_state &= ~PERF_HES_STOPPED;
- raw_spin_lock_irqsave(&state->lock, flags);
-
- if (state->cnt++)
- WARN_ON_ONCE(state->rmid != rmid);
- else
+ if (state->rmid_usecnt++) {
+ if (!WARN_ON_ONCE(state->rmid != rmid))
+ return;
+ } else {
WARN_ON_ONCE(state->rmid);
+ }
state->rmid = rmid;
- wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
-
- raw_spin_unlock_irqrestore(&state->lock, flags);
+ wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
}
static void intel_cqm_event_stop(struct perf_event *event, int mode)
{
- struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
- unsigned long flags;
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;
event->hw.cqm_state |= PERF_HES_STOPPED;
- raw_spin_lock_irqsave(&state->lock, flags);
intel_cqm_event_read(event);
- if (!--state->cnt) {
+ if (!--state->rmid_usecnt) {
state->rmid = 0;
- wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
} else {
WARN_ON_ONCE(!state->rmid);
}
-
- raw_spin_unlock_irqrestore(&state->lock, flags);
}
static int intel_cqm_event_add(struct perf_event *event, int mode)
{
unsigned long flags;
- unsigned int rmid;
+ u32 rmid;
raw_spin_lock_irqsave(&cache_lock, flags);
@@ -1024,11 +1035,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
return 0;
}
-static void intel_cqm_event_del(struct perf_event *event, int mode)
-{
- intel_cqm_event_stop(event, mode);
-}
-
static void intel_cqm_event_destroy(struct perf_event *event)
{
struct perf_event *group_other = NULL;
@@ -1057,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
list_replace(&event->hw.cqm_groups_entry,
&group_other->hw.cqm_groups_entry);
} else {
- unsigned int rmid = event->hw.cqm_rmid;
+ u32 rmid = event->hw.cqm_rmid;
if (__rmid_valid(rmid))
__put_rmid(rmid);
@@ -1221,7 +1227,7 @@ static struct pmu intel_cqm_pmu = {
.task_ctx_nr = perf_sw_context,
.event_init = intel_cqm_event_init,
.add = intel_cqm_event_add,
- .del = intel_cqm_event_del,
+ .del = intel_cqm_event_stop,
.start = intel_cqm_event_start,
.stop = intel_cqm_event_stop,
.read = intel_cqm_event_read,
@@ -1243,12 +1249,12 @@ static inline void cqm_pick_event_reader(int cpu)
static void intel_cqm_cpu_prepare(unsigned int cpu)
{
- struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+ struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);
- raw_spin_lock_init(&state->lock);
state->rmid = 0;
- state->cnt = 0;
+ state->closid = 0;
+ state->rmid_usecnt = 0;
WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 813f75d71175..71fc40238843 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -11,7 +11,7 @@
#define BTS_RECORD_SIZE 24
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE PAGE_SIZE
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
- int max, thresh = 1; /* always use a single PEBS record */
+ int max;
void *buffer, *ibuffer;
if (!x86_pmu.pebs)
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
- ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
- thresh * x86_pmu.pebs_record_size;
-
return 0;
}
@@ -549,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
return 1;
}
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+ struct pt_regs regs;
+
+ x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ if (!sched_in)
+ intel_pmu_drain_pebs_buffer();
+}
+
/*
* PEBS
*/
@@ -684,33 +694,81 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
return &emptyconstraint;
}
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+ return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
+ bool first_pebs;
+ u64 threshold;
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+ first_pebs = !pebs_is_enabled(cpuc);
cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled |= 1ULL << 63;
+
+ /*
+ * When the event is constrained enough we can use a larger
+ * threshold and run the event with less frequent PMI.
+ */
+ if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+ threshold = ds->pebs_absolute_maximum -
+ x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+ if (first_pebs)
+ perf_sched_cb_inc(event->ctx->pmu);
+ } else {
+ threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+ /*
+ * If not all events can use larger buffer,
+ * roll back to threshold = 1
+ */
+ if (!first_pebs &&
+ (ds->pebs_interrupt_threshold > threshold))
+ perf_sched_cb_dec(event->ctx->pmu);
+ }
+
+ /* Use auto-reload if possible to save a MSR write in the PMI */
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ ds->pebs_event_reset[hwc->idx] =
+ (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+ }
+
+ if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+ ds->pebs_interrupt_threshold = threshold;
}
void intel_pmu_pebs_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
- if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
- else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
+ if (ds->pebs_interrupt_threshold >
+ ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
+ intel_pmu_drain_pebs_buffer();
+ if (!pebs_is_enabled(cpuc))
+ perf_sched_cb_dec(event->ctx->pmu);
+ }
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -846,8 +904,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
return txn;
}
-static void __intel_pmu_pebs_event(struct perf_event *event,
- struct pt_regs *iregs, void *__pebs)
+static void setup_pebs_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
#define PERF_X86_EVENT_PEBS_HSW_PREC \
(PERF_X86_EVENT_PEBS_ST_HSW | \
@@ -859,13 +919,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
*/
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_record_hsw *pebs = __pebs;
- struct perf_sample_data data;
- struct pt_regs regs;
u64 sample_type;
int fll, fst, dsrc;
int fl = event->hw.flags;
- if (!intel_pmu_save_and_restart(event))
+ if (pebs == NULL)
return;
sample_type = event->attr.sample_type;
@@ -874,15 +932,15 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(data, 0, event->hw.last_period);
- data.period = event->hw.last_period;
+ data->period = event->hw.last_period;
/*
* Use latency for weight (only avail with PEBS-LL)
*/
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
- data.weight = pebs->lat;
+ data->weight = pebs->lat;
/*
* data.data_src encodes the data source
@@ -895,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
val = precise_datala_hsw(event, pebs->dse);
else if (fst)
val = precise_store_data(pebs->dse);
- data.data_src.val = val;
+ data->data_src.val = val;
}
/*
@@ -908,61 +966,123 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
*/
- regs = *iregs;
- regs.flags = pebs->flags;
- set_linear_ip(&regs, pebs->ip);
- regs.bp = pebs->bp;
- regs.sp = pebs->sp;
+ *regs = *iregs;
+ regs->flags = pebs->flags;
+ set_linear_ip(regs, pebs->ip);
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
if (sample_type & PERF_SAMPLE_REGS_INTR) {
- regs.ax = pebs->ax;
- regs.bx = pebs->bx;
- regs.cx = pebs->cx;
- regs.dx = pebs->dx;
- regs.si = pebs->si;
- regs.di = pebs->di;
- regs.bp = pebs->bp;
- regs.sp = pebs->sp;
-
- regs.flags = pebs->flags;
+ regs->ax = pebs->ax;
+ regs->bx = pebs->bx;
+ regs->cx = pebs->cx;
+ regs->dx = pebs->dx;
+ regs->si = pebs->si;
+ regs->di = pebs->di;
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
+
+ regs->flags = pebs->flags;
#ifndef CONFIG_X86_32
- regs.r8 = pebs->r8;
- regs.r9 = pebs->r9;
- regs.r10 = pebs->r10;
- regs.r11 = pebs->r11;
- regs.r12 = pebs->r12;
- regs.r13 = pebs->r13;
- regs.r14 = pebs->r14;
- regs.r15 = pebs->r15;
+ regs->r8 = pebs->r8;
+ regs->r9 = pebs->r9;
+ regs->r10 = pebs->r10;
+ regs->r11 = pebs->r11;
+ regs->r12 = pebs->r12;
+ regs->r13 = pebs->r13;
+ regs->r14 = pebs->r14;
+ regs->r15 = pebs->r15;
#endif
}
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs.ip = pebs->real_ip;
- regs.flags |= PERF_EFLAGS_EXACT;
- } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
- regs.flags |= PERF_EFLAGS_EXACT;
+ regs->ip = pebs->real_ip;
+ regs->flags |= PERF_EFLAGS_EXACT;
+ } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+ regs->flags |= PERF_EFLAGS_EXACT;
else
- regs.flags &= ~PERF_EFLAGS_EXACT;
+ regs->flags &= ~PERF_EFLAGS_EXACT;
if ((sample_type & PERF_SAMPLE_ADDR) &&
x86_pmu.intel_cap.pebs_format >= 1)
- data.addr = pebs->dla;
+ data->addr = pebs->dla;
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
- data.weight = intel_hsw_weight(pebs);
+ data->weight = intel_hsw_weight(pebs);
if (sample_type & PERF_SAMPLE_TRANSACTION)
- data.txn = intel_hsw_transaction(pebs);
+ data->txn = intel_hsw_transaction(pebs);
}
if (has_branch_stack(event))
- data.br_stack = &cpuc->lbr_stack;
+ data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ void *at;
+ u64 pebs_status;
+
+ if (base == NULL)
+ return NULL;
+
+ for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
- if (perf_event_overflow(event, &data, &regs))
+ if (test_bit(bit, (unsigned long *)&p->status)) {
+ /* PEBS v3 has accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3)
+ return at;
+
+ if (p->status == (1 << bit))
+ return at;
+
+ /* clear non-PEBS bit and re-check */
+ pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+ if (pebs_status == (1 << bit))
+ return at;
+ }
+ }
+ return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ void *base, void *top,
+ int bit, int count)
+{
+ struct perf_sample_data data;
+ struct pt_regs regs;
+ void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+ if (!intel_pmu_save_and_restart(event) &&
+ !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+ return;
+
+ while (count > 1) {
+ setup_pebs_sample_data(event, iregs, at, &data, &regs);
+ perf_event_output(event, &data, &regs);
+ at += x86_pmu.pebs_record_size;
+ at = get_next_pebs_record_by_bit(at, top, bit);
+ count--;
+ }
+
+ setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+ /*
+ * All but the last records are processed.
+ * The last one is left to be able to call the overflow handler.
+ */
+ if (perf_event_overflow(event, &data, &regs)) {
x86_pmu_stop(event, 0);
+ return;
+ }
+
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
@@ -992,72 +1112,99 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
if (!event->attr.precise_ip)
return;
- n = top - at;
+ n = (top - at) / x86_pmu.pebs_record_size;
if (n <= 0)
return;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
- at += n - 1;
-
- __intel_pmu_pebs_event(event, iregs, at);
+ __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
}
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
- struct perf_event *event = NULL;
- void *at, *top;
- u64 status = 0;
- int bit;
+ struct perf_event *event;
+ void *base, *at, *top;
+ short counts[MAX_PEBS_EVENTS] = {};
+ short error[MAX_PEBS_EVENTS] = {};
+ int bit, i;
if (!x86_pmu.pebs_active)
return;
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
ds->pebs_index = ds->pebs_buffer_base;
- if (unlikely(at > top))
+ if (unlikely(base >= top))
return;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
- "Unexpected number of pebs records %ld\n",
- (long)(top - at) / x86_pmu.pebs_record_size);
-
- for (; at < top; at += x86_pmu.pebs_record_size) {
+ for (at = base; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
- for_each_set_bit(bit, (unsigned long *)&p->status,
- x86_pmu.max_pebs_events) {
- event = cpuc->events[bit];
- if (!test_bit(bit, cpuc->active_mask))
- continue;
-
- WARN_ON_ONCE(!event);
+ /* PEBS v3 has accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3) {
+ for_each_set_bit(bit, (unsigned long *)&p->status,
+ MAX_PEBS_EVENTS)
+ counts[bit]++;
- if (!event->attr.precise_ip)
- continue;
+ continue;
+ }
- if (__test_and_set_bit(bit, (unsigned long *)&status))
+ bit = find_first_bit((unsigned long *)&p->status,
+ x86_pmu.max_pebs_events);
+ if (bit >= x86_pmu.max_pebs_events)
+ continue;
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+ /*
+ * The PEBS hardware does not deal well with the situation
+ * when events happen near to each other and multiple bits
+ * are set. But it should happen rarely.
+ *
+ * If these events include one PEBS and multiple non-PEBS
+ * events, it doesn't impact PEBS record. The record will
+ * be handled normally. (slow path)
+ *
+ * If these events include two or more PEBS events, the
+ * records for the events can be collapsed into a single
+ * one, and it's not possible to reconstruct all events
+ * that caused the PEBS record. It's called collision.
+ * If collision happened, the record will be dropped.
+ *
+ */
+ if (p->status != (1 << bit)) {
+ u64 pebs_status;
+
+ /* slow path */
+ pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+ if (pebs_status != (1 << bit)) {
+ for_each_set_bit(i, (unsigned long *)&pebs_status,
+ MAX_PEBS_EVENTS)
+ error[i]++;
continue;
-
- break;
+ }
}
+ counts[bit]++;
+ }
- if (!event || bit >= x86_pmu.max_pebs_events)
+ for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+ if ((counts[bit] == 0) && (error[bit] == 0))
continue;
+ event = cpuc->events[bit];
+ WARN_ON_ONCE(!event);
+ WARN_ON_ONCE(!event->attr.precise_ip);
- __intel_pmu_pebs_event(event, iregs, at);
+ /* log dropped samples number */
+ if (error[bit])
+ perf_log_lost_samples(event, error[bit]);
+
+ if (counts[bit]) {
+ __intel_pmu_pebs_event(event, iregs, base,
+ top, bit, counts[bit]);
+ }
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 94e5b506caa6..452a7bd2dedb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -96,6 +96,7 @@ enum {
X86_BR_NO_TX = 1 << 14,/* not in transaction */
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
+ X86_BR_IND_JMP = 1 << 17,/* indirect jump */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -113,6 +114,7 @@ enum {
X86_BR_IRQ |\
X86_BR_ABORT |\
X86_BR_IND_CALL |\
+ X86_BR_IND_JMP |\
X86_BR_ZERO_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -262,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
- if (!x86_pmu.lbr_nr)
- return;
-
/*
* If LBR callstack feature is enabled and the stack was saved when
* the task was scheduled out, restore the stack. Otherwise flush
@@ -523,6 +522,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
X86_BR_CALL_STACK;
}
+ if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ mask |= X86_BR_IND_JMP;
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
@@ -736,7 +738,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
break;
case 4:
case 5:
- ret = X86_BR_JMP;
+ ret = X86_BR_IND_JMP;
break;
}
break;
@@ -844,6 +846,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
*/
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
};
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -856,6 +859,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
| LBR_FAR,
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
};
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -870,6 +874,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
| LBR_RETURN | LBR_CALL_STACK,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
};
/* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index ffe666c2c6b5..159887c3a89d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)
de_attr->attr.attr.name = pt_caps[i].name;
- sysfs_attr_init(&de_attrs->attr.attr);
+ sysfs_attr_init(&de_attr->attr.attr);
de_attr->attr.attr.mode = S_IRUGO;
de_attr->attr.show = pt_cap_show;
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
-static bool pt_is_running(void)
-{
- u64 ctl;
-
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-
- return !!(ctl & RTIT_CTL_TRACEEN);
-}
-
static void pt_config(struct perf_event *event)
{
u64 reg;
@@ -609,16 +600,19 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
* @handle: Current output handle.
*
* Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected.
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
*/
static int pt_buffer_reset_markers(struct pt_buffer *buf,
struct perf_output_handle *handle)
{
- unsigned long idx, npages, end;
-
- if (buf->snapshot)
- return 0;
+ unsigned long head = local64_read(&buf->head);
+ unsigned long idx, npages, wakeup;
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 <
@@ -634,17 +628,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
buf->topa_index[buf->stop_pos]->stop = 0;
buf->topa_index[buf->intr_pos]->intr = 0;
- if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
- npages = (handle->size + 1) >> PAGE_SHIFT;
- end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
- /*if (end > handle->wakeup >> PAGE_SHIFT)
- end = handle->wakeup >> PAGE_SHIFT;*/
- idx = end & (buf->nr_pages - 1);
- buf->stop_pos = idx;
- idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;
- idx &= buf->nr_pages - 1;
- buf->intr_pos = idx;
- }
+ /* how many pages till the STOP marker */
+ npages = handle->size >> PAGE_SHIFT;
+
+ /* if it's on a page boundary, fill up one more page */
+ if (!offset_in_page(head + handle->size + 1))
+ npages++;
+
+ idx = (head >> PAGE_SHIFT) + npages;
+ idx &= buf->nr_pages - 1;
+ buf->stop_pos = idx;
+
+ wakeup = handle->wakeup >> PAGE_SHIFT;
+
+ /* in the worst case, wake up the consumer one page before hard stop */
+ idx = (head >> PAGE_SHIFT) + npages - 1;
+ if (idx > wakeup)
+ idx = wakeup;
+
+ idx &= buf->nr_pages - 1;
+ buf->intr_pos = idx;
buf->topa_index[buf->stop_pos]->stop = 1;
buf->topa_index[buf->intr_pos]->intr = 1;
@@ -664,7 +667,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
struct topa *cur = buf->first, *prev = buf->last;
struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
*te_prev = TOPA_ENTRY(prev, prev->last - 1);
- int pg = 0, idx = 0, ntopa = 0;
+ int pg = 0, idx = 0;
while (pg < buf->nr_pages) {
int tidx;
@@ -679,9 +682,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
/* advance to next topa table */
idx = 0;
cur = list_entry(cur->list.next, struct topa, list);
- ntopa++;
- } else
+ } else {
idx++;
+ }
te_cur = TOPA_ENTRY(cur, idx);
}
@@ -693,7 +696,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
* @head: Write pointer (aux_head) from AUX buffer.
*
* Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly.
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
*/
static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
{
@@ -891,6 +901,7 @@ void intel_pt_interrupt(void)
}
pt_buffer_reset_offsets(buf, pt->handle.head);
+ /* snapshot counters don't use PMI, so it's safe */
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0, true);
@@ -913,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
- if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+ if (!buf || pt_buffer_is_full(buf, pt)) {
event->hw.state = PERF_HES_STOPPED;
return;
}
@@ -944,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
event->hw.state = PERF_HES_STOPPED;
if (mode & PERF_EF_UPDATE) {
- struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
if (!buf)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 999289b94025..358c54ad20d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)
break;
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
+ case 61: /* Broadwell */
rapl_cntr_mask = RAPL_IDX_HSW;
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index c635b8b49e93..7c1de1610178 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- hwc = &box->event_list[i]->hw;
c = uncore_get_event_constraint(box, box->event_list[i]);
- hwc->constraint = c;
+ box->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
/* fastpath, try to reuse previous register */
for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
- c = hwc->constraint;
+ c = box->event_constraint[i];
/* never assigned */
if (hwc->idx == -1)
@@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
}
/* slow path */
if (i != n)
- ret = perf_assign_events(box->event_list, n,
- wmin, wmax, assign);
+ ret = perf_assign_events(box->event_constraint, n,
+ wmin, wmax, n, assign);
if (!assign || ret) {
for (i = 0; i < n; i++)
@@ -840,6 +839,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
box->phys_id = phys_id;
box->pci_dev = pdev;
box->pmu = pmu;
+ uncore_box_init(box);
pci_set_drvdata(pdev, box);
raw_spin_lock(&uncore_box_lock);
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
case 69: /* Haswell Celeron */
ret = hsw_uncore_pci_init();
break;
+ case 61: /* Broadwell */
+ ret = bdw_uncore_pci_init();
+ break;
default:
return 0;
}
@@ -1003,8 +1006,10 @@ static int uncore_cpu_starting(int cpu)
pmu = &type->pmus[j];
box = *per_cpu_ptr(pmu->box, cpu);
/* called by uncore_cpu_init? */
- if (box && box->phys_id >= 0)
+ if (box && box->phys_id >= 0) {
+ uncore_box_init(box);
continue;
+ }
for_each_online_cpu(k) {
exist = *per_cpu_ptr(pmu->box, k);
@@ -1020,8 +1025,10 @@ static int uncore_cpu_starting(int cpu)
}
}
- if (box)
+ if (box) {
box->phys_id = phys_id;
+ uncore_box_init(box);
+ }
}
}
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 6c8c1e7e69d8..0f77f0a196e4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,7 @@ struct intel_uncore_box {
atomic_t refcnt;
struct perf_event *events[UNCORE_PMC_IDX_MAX];
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
u64 tags[UNCORE_PMC_IDX_MAX];
struct pci_dev *pci_dev;
@@ -257,14 +258,6 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)
return box->pmu->type->num_counters;
}
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
- if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
- if (box->pmu->type->ops->init_box)
- box->pmu->type->ops->init_box(box);
- }
-}
-
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
@@ -273,8 +266,6 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)
static inline void uncore_enable_box(struct intel_uncore_box *box)
{
- uncore_box_init(box);
-
if (box->pmu->type->ops->enable_box)
box->pmu->type->ops->enable_box(box);
}
@@ -297,6 +288,14 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,
return box->pmu->type->ops->read_counter(box, event);
}
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+ if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->init_box)
+ box->pmu->type->ops->init_box(box);
+ }
+}
+
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
return (box->phys_id < 0);
@@ -326,6 +325,7 @@ extern struct event_constraint uncore_constraint_empty;
int snb_uncore_pci_init(void);
int ivb_uncore_pci_init(void);
int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 3001015b755c..b005a78c7012 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,6 +1,14 @@
/* Nehalem/SandBridge/Haswell uncore support */
#include "perf_event_intel_uncore.h"
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
+
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
@@ -472,6 +480,18 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ },
};
@@ -490,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
.id_table = hsw_uncore_pci_ids,
};
+static struct pci_driver bdw_uncore_pci_driver = {
+ .name = "bdw_uncore",
+ .id_table = bdw_uncore_pci_ids,
+};
+
struct imc_uncore_pci_dev {
__u32 pci_id;
struct pci_driver *driver;
@@ -502,6 +527,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
+ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
+ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
{ /* end marker */ }
};
@@ -549,6 +576,11 @@ int hsw_uncore_pci_init(void)
return imc_uncore_pci_init();
}
+int bdw_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
/* end of Sandy Bridge uncore support */
/* Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 12d9548457e7..6d6e85dd5849 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -164,8 +164,8 @@
((1ULL << (n)) - 1)))
/* Haswell-EP Ubox */
-#define HSWEP_U_MSR_PMON_CTR0 0x705
-#define HSWEP_U_MSR_PMON_CTL0 0x709
+#define HSWEP_U_MSR_PMON_CTR0 0x709
+#define HSWEP_U_MSR_PMON_CTL0 0x705
#define HSWEP_U_MSR_PMON_FILTER 0x707
#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703
@@ -1914,7 +1914,7 @@ static struct intel_uncore_type hswep_uncore_cbox = {
.name = "cbox",
.num_counters = 4,
.num_boxes = 18,
- .perf_ctr_bits = 44,
+ .perf_ctr_bits = 48,
.event_ctl = HSWEP_C0_MSR_PMON_CTL0,
.perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
.event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index e7d8c7608471..18ca99f2798b 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -12,7 +12,8 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
{
#ifdef CONFIG_SMP
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu)));
+ seq_printf(m, "siblings\t: %d\n",
+ cpumask_weight(topology_core_cpumask(cpu)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
seq_printf(m, "apicid\t\t: %d\n", c->apicid);
diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile
new file mode 100644
index 000000000000..68279efb811a
--- /dev/null
+++ b/arch/x86/kernel/fpu/Makefile
@@ -0,0 +1,5 @@
+#
+# Build rules for the FPU support code:
+#
+
+obj-y += init.o bugs.o core.o regset.o signal.o xstate.o
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
new file mode 100644
index 000000000000..dd9ca9b60ff3
--- /dev/null
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -0,0 +1,71 @@
+/*
+ * x86 FPU bug checks:
+ */
+#include <asm/fpu/internal.h>
+
+/*
+ * Boot time CPU/FPU FDIV bug detection code:
+ */
+
+static double __initdata x = 4195835.0;
+static double __initdata y = 3145727.0;
+
+/*
+ * This used to check for exceptions..
+ * However, it turns out that to support that,
+ * the XMM trap handlers basically had to
+ * be buggy. So let's have a correct XMM trap
+ * handler, and forget about printing out
+ * some status at boot.
+ *
+ * We should really only care about bugs here
+ * anyway. Not features.
+ */
+static void __init check_fpu(void)
+{
+ u32 cr0_saved;
+ s32 fdiv_bug;
+
+ /* We might have CR0::TS set already, clear it: */
+ cr0_saved = read_cr0();
+ write_cr0(cr0_saved & ~X86_CR0_TS);
+
+ kernel_fpu_begin();
+
+ /*
+ * trap_init() enabled FXSR and company _before_ testing for FP
+ * problems here.
+ *
+ * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
+ */
+ __asm__("fninit\n\t"
+ "fldl %1\n\t"
+ "fdivl %2\n\t"
+ "fmull %2\n\t"
+ "fldl %1\n\t"
+ "fsubp %%st,%%st(1)\n\t"
+ "fistpl %0\n\t"
+ "fwait\n\t"
+ "fninit"
+ : "=m" (*&fdiv_bug)
+ : "m" (*&x), "m" (*&y));
+
+ kernel_fpu_end();
+
+ write_cr0(cr0_saved);
+
+ if (fdiv_bug) {
+ set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
+ pr_warn("Hmm, FPU with FDIV bug\n");
+ }
+}
+
+void __init fpu__init_check_bugs(void)
+{
+ /*
+ * kernel_fpu_begin/end() in check_fpu() relies on the patched
+ * alternative instructions.
+ */
+ if (cpu_has_fpu)
+ check_fpu();
+}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
new file mode 100644
index 000000000000..79de954626fd
--- /dev/null
+++ b/arch/x86/kernel/fpu/core.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+#include <asm/fpu/internal.h>
+#include <asm/fpu/regset.h>
+#include <asm/fpu/signal.h>
+#include <asm/traps.h>
+
+#include <linux/hardirq.h>
+
+/*
+ * Represents the initial FPU state. It's mostly (but not completely) zeroes,
+ * depending on the FPU hardware format:
+ */
+union fpregs_state init_fpstate __read_mostly;
+
+/*
+ * Track whether the kernel is using the FPU state
+ * currently.
+ *
+ * This flag is used:
+ *
+ * - by IRQ context code to potentially use the FPU
+ * if it's unused.
+ *
+ * - to debug kernel_fpu_begin()/end() correctness
+ */
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+/*
+ * Track which context is using the FPU on the CPU:
+ */
+DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+static void kernel_fpu_disable(void)
+{
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
+ this_cpu_write(in_kernel_fpu, true);
+}
+
+static void kernel_fpu_enable(void)
+{
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+ this_cpu_write(in_kernel_fpu, false);
+}
+
+static bool kernel_fpu_disabled(void)
+{
+ return this_cpu_read(in_kernel_fpu);
+}
+
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ *
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
+ */
+static bool interrupted_kernel_fpu_idle(void)
+{
+ if (kernel_fpu_disabled())
+ return false;
+
+ if (use_eager_fpu())
+ return true;
+
+ return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static bool interrupted_user_mode(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ return regs && user_mode(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+bool irq_fpu_usable(void)
+{
+ return !in_interrupt() ||
+ interrupted_user_mode() ||
+ interrupted_kernel_fpu_idle();
+}
+EXPORT_SYMBOL(irq_fpu_usable);
+
+void __kernel_fpu_begin(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ WARN_ON_FPU(!irq_fpu_usable());
+
+ kernel_fpu_disable();
+
+ if (fpu->fpregs_active) {
+ copy_fpregs_to_fpstate(fpu);
+ } else {
+ this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+ __fpregs_activate_hw();
+ }
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ if (fpu->fpregs_active)
+ copy_kernel_to_fpregs(&fpu->state);
+ else
+ __fpregs_deactivate_hw();
+
+ kernel_fpu_enable();
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ __kernel_fpu_begin();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ __kernel_fpu_end();
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+/*
+ * CR0::TS save/restore functions:
+ */
+int irq_ts_save(void)
+{
+ /*
+ * If in process context and not atomic, we can take a spurious DNA fault.
+ * Otherwise, doing clts() in process context requires disabling preemption
+ * or some heavy lifting like kernel_fpu_begin()
+ */
+ if (!in_atomic())
+ return 0;
+
+ if (read_cr0() & X86_CR0_TS) {
+ clts();
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_ts_save);
+
+void irq_ts_restore(int TS_state)
+{
+ if (TS_state)
+ stts();
+}
+EXPORT_SYMBOL_GPL(irq_ts_restore);
+
+/*
+ * Save the FPU state (mark it for reload if necessary):
+ *
+ * This only ever gets called for the current task.
+ */
+void fpu__save(struct fpu *fpu)
+{
+ WARN_ON_FPU(fpu != &current->thread.fpu);
+
+ preempt_disable();
+ if (fpu->fpregs_active) {
+ if (!copy_fpregs_to_fpstate(fpu))
+ fpregs_deactivate(fpu);
+ }
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(fpu__save);
+
+/*
+ * Legacy x87 fpstate state init:
+ */
+static inline void fpstate_init_fstate(struct fregs_state *fp)
+{
+ fp->cwd = 0xffff037fu;
+ fp->swd = 0xffff0000u;
+ fp->twd = 0xffffffffu;
+ fp->fos = 0xffff0000u;
+}
+
+void fpstate_init(union fpregs_state *state)
+{
+ if (!cpu_has_fpu) {
+ fpstate_init_soft(&state->soft);
+ return;
+ }
+
+ memset(state, 0, xstate_size);
+
+ if (cpu_has_fxsr)
+ fpstate_init_fxstate(&state->fxsave);
+ else
+ fpstate_init_fstate(&state->fsave);
+}
+EXPORT_SYMBOL_GPL(fpstate_init);
+
+/*
+ * Copy the current task's FPU state to a new task's FPU context.
+ *
+ * In both the 'eager' and the 'lazy' case we save hardware registers
+ * directly to the destination buffer.
+ */
+static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+{
+ WARN_ON_FPU(src_fpu != &current->thread.fpu);
+
+ /*
+ * Don't let 'init optimized' areas of the XSAVE area
+ * leak into the child task:
+ */
+ if (use_eager_fpu())
+ memset(&dst_fpu->state.xsave, 0, xstate_size);
+
+ /*
+ * Save current FPU registers directly into the child
+ * FPU context, without any memory-to-memory copying.
+ *
+ * If the FPU context got destroyed in the process (FNSAVE
+ * done on old CPUs) then copy it back into the source
+ * context and mark the current task for lazy restore.
+ *
+ * We have to do all this with preemption disabled,
+ * mostly because of the FNSAVE case, because in that
+ * case we must not allow preemption in the window
+ * between the FNSAVE and us marking the context lazy.
+ *
+ * It shouldn't be an issue as even FNSAVE is plenty
+ * fast in terms of critical section length.
+ */
+ preempt_disable();
+ if (!copy_fpregs_to_fpstate(dst_fpu)) {
+ memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+ fpregs_deactivate(src_fpu);
+ }
+ preempt_enable();
+}
+
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+{
+ dst_fpu->counter = 0;
+ dst_fpu->fpregs_active = 0;
+ dst_fpu->last_cpu = -1;
+
+ if (src_fpu->fpstate_active)
+ fpu_copy(dst_fpu, src_fpu);
+
+ return 0;
+}
+
+/*
+ * Activate the current task's in-memory FPU context,
+ * if it has not been used before:
+ */
+void fpu__activate_curr(struct fpu *fpu)
+{
+ WARN_ON_FPU(fpu != &current->thread.fpu);
+
+ if (!fpu->fpstate_active) {
+ fpstate_init(&fpu->state);
+
+ /* Safe to do for the current task: */
+ fpu->fpstate_active = 1;
+ }
+}
+EXPORT_SYMBOL_GPL(fpu__activate_curr);
+
+/*
+ * This function must be called before we read a task's fpstate.
+ *
+ * If the task has not used the FPU before then initialize its
+ * fpstate.
+ *
+ * If the task has used the FPU before then save it.
+ */
+void fpu__activate_fpstate_read(struct fpu *fpu)
+{
+ /*
+ * If fpregs are active (in the current CPU), then
+ * copy them to the fpstate:
+ */
+ if (fpu->fpregs_active) {
+ fpu__save(fpu);
+ } else {
+ if (!fpu->fpstate_active) {
+ fpstate_init(&fpu->state);
+
+ /* Safe to do for current and for stopped child tasks: */
+ fpu->fpstate_active = 1;
+ }
+ }
+}
+
+/*
+ * This function must be called before we write a task's fpstate.
+ *
+ * If the task has used the FPU before then unlazy it.
+ * If the task has not used the FPU before then initialize its fpstate.
+ *
+ * After this function call, after registers in the fpstate are
+ * modified and the child task has woken up, the child task will
+ * restore the modified FPU state from the modified context. If we
+ * didn't clear its lazy status here then the lazy in-registers
+ * state pending on its former CPU could be restored, corrupting
+ * the modifications.
+ */
+void fpu__activate_fpstate_write(struct fpu *fpu)
+{
+ /*
+ * Only stopped child tasks can be used to modify the FPU
+ * state in the fpstate buffer:
+ */
+ WARN_ON_FPU(fpu == &current->thread.fpu);
+
+ if (fpu->fpstate_active) {
+ /* Invalidate any lazy state: */
+ fpu->last_cpu = -1;
+ } else {
+ fpstate_init(&fpu->state);
+
+ /* Safe to do for stopped child tasks: */
+ fpu->fpstate_active = 1;
+ }
+}
+
+/*
+ * 'fpu__restore()' is called to copy FPU registers from
+ * the FPU fpstate to the live hw registers and to activate
+ * access to the hardware registers, so that FPU instructions
+ * can be used afterwards.
+ *
+ * Must be called with kernel preemption disabled (for example
+ * with local interrupts disabled, as it is in the case of
+ * do_device_not_available()).
+ */
+void fpu__restore(struct fpu *fpu)
+{
+ fpu__activate_curr(fpu);
+
+ /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
+ kernel_fpu_disable();
+ fpregs_activate(fpu);
+ copy_kernel_to_fpregs(&fpu->state);
+ fpu->counter++;
+ kernel_fpu_enable();
+}
+EXPORT_SYMBOL_GPL(fpu__restore);
+
+/*
+ * Drops current FPU state: deactivates the fpregs and
+ * the fpstate. NOTE: it still leaves previous contents
+ * in the fpregs in the eager-FPU case.
+ *
+ * This function can be used in cases where we know that
+ * a state-restore is coming: either an explicit one,
+ * or a reschedule.
+ */
+void fpu__drop(struct fpu *fpu)
+{
+ preempt_disable();
+ fpu->counter = 0;
+
+ if (fpu->fpregs_active) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ fpregs_deactivate(fpu);
+ }
+
+ fpu->fpstate_active = 0;
+
+ preempt_enable();
+}
+
+/*
+ * Clear FPU registers by setting them up from
+ * the init fpstate:
+ */
+static inline void copy_init_fpstate_to_fpregs(void)
+{
+ if (use_xsave())
+ copy_kernel_to_xregs(&init_fpstate.xsave, -1);
+ else
+ copy_kernel_to_fxregs(&init_fpstate.fxsave);
+}
+
+/*
+ * Clear the FPU state back to init state.
+ *
+ * Called by sys_execve(), by the signal handler code and by various
+ * error paths.
+ */
+void fpu__clear(struct fpu *fpu)
+{
+ WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
+
+ if (!use_eager_fpu()) {
+ /* FPU state will be reallocated lazily at the first use. */
+ fpu__drop(fpu);
+ } else {
+ if (!fpu->fpstate_active) {
+ fpu__activate_curr(fpu);
+ user_fpu_begin();
+ }
+ copy_init_fpstate_to_fpregs();
+ }
+}
+
+/*
+ * x87 math exception handling:
+ */
+
+static inline unsigned short get_fpu_cwd(struct fpu *fpu)
+{
+ if (cpu_has_fxsr) {
+ return fpu->state.fxsave.cwd;
+ } else {
+ return (unsigned short)fpu->state.fsave.cwd;
+ }
+}
+
+static inline unsigned short get_fpu_swd(struct fpu *fpu)
+{
+ if (cpu_has_fxsr) {
+ return fpu->state.fxsave.swd;
+ } else {
+ return (unsigned short)fpu->state.fsave.swd;
+ }
+}
+
+static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
+{
+ if (cpu_has_xmm) {
+ return fpu->state.fxsave.mxcsr;
+ } else {
+ return MXCSR_DEFAULT;
+ }
+}
+
+int fpu__exception_code(struct fpu *fpu, int trap_nr)
+{
+ int err;
+
+ if (trap_nr == X86_TRAP_MF) {
+ unsigned short cwd, swd;
+ /*
+ * (~cwd & swd) will mask out exceptions that are not set to unmasked
+ * status. 0x3f is the exception bits in these regs, 0x200 is the
+ * C1 reg you need in case of a stack fault, 0x040 is the stack
+ * fault bit. We should only be taking one exception at a time,
+ * so if this combination doesn't produce any single exception,
+ * then we have a bad program that isn't synchronizing its FPU usage
+ * and it will suffer the consequences since we won't be able to
+ * fully reproduce the context of the exception
+ */
+ cwd = get_fpu_cwd(fpu);
+ swd = get_fpu_swd(fpu);
+
+ err = swd & ~cwd;
+ } else {
+ /*
+ * The SIMD FPU exceptions are handled a little differently, as there
+ * is only a single status/control register. Thus, to determine which
+ * unmasked exception was caught we must mask the exception mask bits
+ * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+ */
+ unsigned short mxcsr = get_fpu_mxcsr(fpu);
+ err = ~(mxcsr >> 7) & mxcsr;
+ }
+
+ if (err & 0x001) { /* Invalid op */
+ /*
+ * swd & 0x240 == 0x040: Stack Underflow
+ * swd & 0x240 == 0x240: Stack Overflow
+ * User must clear the SF bit (0x40) if set
+ */
+ return FPE_FLTINV;
+ } else if (err & 0x004) { /* Divide by Zero */
+ return FPE_FLTDIV;
+ } else if (err & 0x008) { /* Overflow */
+ return FPE_FLTOVF;
+ } else if (err & 0x012) { /* Denormal, Underflow */
+ return FPE_FLTUND;
+ } else if (err & 0x020) { /* Precision */
+ return FPE_FLTRES;
+ }
+
+ /*
+ * If we're using IRQ 13, or supposedly even some trap
+ * X86_TRAP_MF implementations, it's possible
+ * we get a spurious trap, which is not an error.
+ */
+ return 0;
+}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
new file mode 100644
index 000000000000..fc878fee6a51
--- /dev/null
+++ b/arch/x86/kernel/fpu/init.c
@@ -0,0 +1,354 @@
+/*
+ * x86 FPU boot time init code:
+ */
+#include <asm/fpu/internal.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Initialize the TS bit in CR0 according to the style of context-switches
+ * we are using:
+ */
+static void fpu__init_cpu_ctx_switch(void)
+{
+ if (!cpu_has_eager_fpu)
+ stts();
+ else
+ clts();
+}
+
+/*
+ * Initialize the registers found in all CPUs, CR0 and CR4:
+ */
+static void fpu__init_cpu_generic(void)
+{
+ unsigned long cr0;
+ unsigned long cr4_mask = 0;
+
+ if (cpu_has_fxsr)
+ cr4_mask |= X86_CR4_OSFXSR;
+ if (cpu_has_xmm)
+ cr4_mask |= X86_CR4_OSXMMEXCPT;
+ if (cr4_mask)
+ cr4_set_bits(cr4_mask);
+
+ cr0 = read_cr0();
+ cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
+ if (!cpu_has_fpu)
+ cr0 |= X86_CR0_EM;
+ write_cr0(cr0);
+
+ /* Flush out any pending x87 state: */
+ asm volatile ("fninit");
+}
+
+/*
+ * Enable all supported FPU features. Called when a CPU is brought online:
+ */
+void fpu__init_cpu(void)
+{
+ fpu__init_cpu_generic();
+ fpu__init_cpu_xstate();
+ fpu__init_cpu_ctx_switch();
+}
+
+/*
+ * The earliest FPU detection code.
+ *
+ * Set the X86_FEATURE_FPU CPU-capability bit based on
+ * trying to execute an actual sequence of FPU instructions:
+ */
+static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+{
+ unsigned long cr0;
+ u16 fsw, fcw;
+
+ fsw = fcw = 0xffff;
+
+ cr0 = read_cr0();
+ cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
+ write_cr0(cr0);
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+ set_cpu_cap(c, X86_FEATURE_FPU);
+ else
+ clear_cpu_cap(c, X86_FEATURE_FPU);
+
+#ifndef CONFIG_MATH_EMULATION
+ if (!cpu_has_fpu) {
+ pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
+ for (;;)
+ asm volatile("hlt");
+ }
+#endif
+}
+
+/*
+ * Boot time FPU feature detection code:
+ */
+unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+
+static void __init fpu__init_system_mxcsr(void)
+{
+ unsigned int mask = 0;
+
+ if (cpu_has_fxsr) {
+ struct fxregs_state fx_tmp __aligned(32) = { };
+
+ asm volatile("fxsave %0" : "+m" (fx_tmp));
+
+ mask = fx_tmp.mxcsr_mask;
+
+ /*
+ * If zero then use the default features mask,
+ * which has all features set, except the
+ * denormals-are-zero feature bit:
+ */
+ if (mask == 0)
+ mask = 0x0000ffbf;
+ }
+ mxcsr_feature_mask &= mask;
+}
+
+/*
+ * Once per bootup FPU initialization sequences that will run on most x86 CPUs:
+ */
+static void __init fpu__init_system_generic(void)
+{
+ /*
+ * Set up the legacy init FPU context. (xstate init might overwrite this
+ * with a more modern format, if the CPU supports it.)
+ */
+ fpstate_init_fxstate(&init_fpstate.fxsave);
+
+ fpu__init_system_mxcsr();
+}
+
+/*
+ * Size of the FPU context state. All tasks in the system use the
+ * same context size, regardless of what portion they use.
+ * This is inherent to the XSAVE architecture which puts all state
+ * components into a single, continuous memory block:
+ */
+unsigned int xstate_size;
+EXPORT_SYMBOL_GPL(xstate_size);
+
+/*
+ * Set up the xstate_size based on the legacy FPU context size.
+ *
+ * We set this up first, and later it will be overwritten by
+ * fpu__init_system_xstate() if the CPU knows about xstates.
+ */
+static void __init fpu__init_system_xstate_size_legacy(void)
+{
+ static int on_boot_cpu = 1;
+
+ WARN_ON_FPU(!on_boot_cpu);
+ on_boot_cpu = 0;
+
+ /*
+ * Note that xstate_size might be overwriten later during
+ * fpu__init_system_xstate().
+ */
+
+ if (!cpu_has_fpu) {
+ /*
+ * Disable xsave as we do not support it if i387
+ * emulation is enabled.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ xstate_size = sizeof(struct swregs_state);
+ } else {
+ if (cpu_has_fxsr)
+ xstate_size = sizeof(struct fxregs_state);
+ else
+ xstate_size = sizeof(struct fregs_state);
+ }
+ /*
+ * Quirk: we don't yet handle the XSAVES* instructions
+ * correctly, as we don't correctly convert between
+ * standard and compacted format when interfacing
+ * with user-space - so disable it for now.
+ *
+ * The difference is small: with recent CPUs the
+ * compacted format is only marginally smaller than
+ * the standard FPU state format.
+ *
+ * ( This is easy to backport while we are fixing
+ * XSAVES* support. )
+ */
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+}
+
+/*
+ * FPU context switching strategies:
+ *
+ * Against popular belief, we don't do lazy FPU saves, due to the
+ * task migration complications it brings on SMP - we only do
+ * lazy FPU restores.
+ *
+ * 'lazy' is the traditional strategy, which is based on setting
+ * CR0::TS to 1 during context-switch (instead of doing a full
+ * restore of the FPU state), which causes the first FPU instruction
+ * after the context switch (whenever it is executed) to fault - at
+ * which point we lazily restore the FPU state into FPU registers.
+ *
+ * Tasks are of course under no obligation to execute FPU instructions,
+ * so it can easily happen that another context-switch occurs without
+ * a single FPU instruction being executed. If we eventually switch
+ * back to the original task (that still owns the FPU) then we have
+ * not only saved the restores along the way, but we also have the
+ * FPU ready to be used for the original task.
+ *
+ * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * state during every context switch, regardless of whether the task
+ * has used FPU instructions in that time slice or not. This is done
+ * because modern FPU context saving instructions are able to optimize
+ * state saving and restoration in hardware: they can detect both
+ * unused and untouched FPU state and optimize accordingly.
+ *
+ * [ Note that even in 'lazy' mode we might optimize context switches
+ * to use 'eager' restores, if we detect that a task is using the FPU
+ * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
+ */
+static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+
+static int __init eager_fpu_setup(char *s)
+{
+ if (!strcmp(s, "on"))
+ eagerfpu = ENABLE;
+ else if (!strcmp(s, "off"))
+ eagerfpu = DISABLE;
+ else if (!strcmp(s, "auto"))
+ eagerfpu = AUTO;
+ return 1;
+}
+__setup("eagerfpu=", eager_fpu_setup);
+
+/*
+ * Pick the FPU context switching strategy:
+ */
+static void __init fpu__init_system_ctx_switch(void)
+{
+ static bool on_boot_cpu = 1;
+
+ WARN_ON_FPU(!on_boot_cpu);
+ on_boot_cpu = 0;
+
+ WARN_ON_FPU(current->thread.fpu.fpstate_active);
+ current_thread_info()->status = 0;
+
+ /* Auto enable eagerfpu for xsaveopt */
+ if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+ eagerfpu = ENABLE;
+
+ if (xfeatures_mask & XSTATE_EAGER) {
+ if (eagerfpu == DISABLE) {
+ pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
+ xfeatures_mask & XSTATE_EAGER);
+ xfeatures_mask &= ~XSTATE_EAGER;
+ } else {
+ eagerfpu = ENABLE;
+ }
+ }
+
+ if (eagerfpu == ENABLE)
+ setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+
+ printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
+}
+
+/*
+ * Called on the boot CPU once per system bootup, to set up the initial
+ * FPU state that is later cloned into all processes:
+ */
+void __init fpu__init_system(struct cpuinfo_x86 *c)
+{
+ fpu__init_system_early_generic(c);
+
+ /*
+ * The FPU has to be operational for some of the
+ * later FPU init activities:
+ */
+ fpu__init_cpu();
+
+ /*
+ * But don't leave CR0::TS set yet, as some of the FPU setup
+ * methods depend on being able to execute FPU instructions
+ * that will fault on a set TS, such as the FXSAVE in
+ * fpu__init_system_mxcsr().
+ */
+ clts();
+
+ fpu__init_system_generic();
+ fpu__init_system_xstate_size_legacy();
+ fpu__init_system_xstate();
+
+ fpu__init_system_ctx_switch();
+}
+
+/*
+ * Boot parameter to turn off FPU support and fall back to math-emu:
+ */
+static int __init no_387(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
+ return 1;
+}
+__setup("no387", no_387);
+
+/*
+ * Disable all xstate CPU features:
+ */
+static int __init x86_noxsave_setup(char *s)
+{
+ if (strlen(s))
+ return 0;
+
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ setup_clear_cpu_cap(X86_FEATURE_AVX2);
+
+ return 1;
+}
+__setup("noxsave", x86_noxsave_setup);
+
+/*
+ * Disable the XSAVEOPT instruction specifically:
+ */
+static int __init x86_noxsaveopt_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+ return 1;
+}
+__setup("noxsaveopt", x86_noxsaveopt_setup);
+
+/*
+ * Disable the XSAVES instruction:
+ */
+static int __init x86_noxsaves_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+ return 1;
+}
+__setup("noxsaves", x86_noxsaves_setup);
+
+/*
+ * Disable FX save/restore and SSE support:
+ */
+static int __init x86_nofxsr_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
+ setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
+ setup_clear_cpu_cap(X86_FEATURE_XMM);
+
+ return 1;
+}
+__setup("nofxsr", x86_nofxsr_setup);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
new file mode 100644
index 000000000000..dc60810c1c74
--- /dev/null
+++ b/arch/x86/kernel/fpu/regset.c
@@ -0,0 +1,356 @@
+/*
+ * FPU register's regset abstraction, for ptrace, core dumps, etc.
+ */
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
+
+/*
+ * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
+ * as the "regset->n" for the xstate regset will be updated based on the feature
+ * capabilites supported by the xsave.
+ */
+int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ struct fpu *target_fpu = &target->thread.fpu;
+
+ return target_fpu->fpstate_active ? regset->n : 0;
+}
+
+int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ struct fpu *target_fpu = &target->thread.fpu;
+
+ return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+}
+
+int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+
+ if (!cpu_has_fxsr)
+ return -ENODEV;
+
+ fpu__activate_fpstate_read(fpu);
+ fpstate_sanitize_xstate(fpu);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &fpu->state.fxsave, 0, -1);
+}
+
+int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ int ret;
+
+ if (!cpu_has_fxsr)
+ return -ENODEV;
+
+ fpu__activate_fpstate_write(fpu);
+ fpstate_sanitize_xstate(fpu);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &fpu->state.fxsave, 0, -1);
+
+ /*
+ * mxcsr reserved bits must be masked to zero for security reasons.
+ */
+ fpu->state.fxsave.mxcsr &= mxcsr_feature_mask;
+
+ /*
+ * update the header bits in the xsave header, indicating the
+ * presence of FP and SSE state.
+ */
+ if (cpu_has_xsave)
+ fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE;
+
+ return ret;
+}
+
+int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ struct xregs_state *xsave;
+ int ret;
+
+ if (!cpu_has_xsave)
+ return -ENODEV;
+
+ fpu__activate_fpstate_read(fpu);
+
+ xsave = &fpu->state.xsave;
+
+ /*
+ * Copy the 48bytes defined by the software first into the xstate
+ * memory layout in the thread struct, so that we can copy the entire
+ * xstateregs to the user using one user_regset_copyout().
+ */
+ memcpy(&xsave->i387.sw_reserved,
+ xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
+ /*
+ * Copy the xstate memory layout.
+ */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+ return ret;
+}
+
+int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ struct xregs_state *xsave;
+ int ret;
+
+ if (!cpu_has_xsave)
+ return -ENODEV;
+
+ fpu__activate_fpstate_write(fpu);
+
+ xsave = &fpu->state.xsave;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+ /*
+ * mxcsr reserved bits must be masked to zero for security reasons.
+ */
+ xsave->i387.mxcsr &= mxcsr_feature_mask;
+ xsave->header.xfeatures &= xfeatures_mask;
+ /*
+ * These bits must be zero.
+ */
+ memset(&xsave->header.reserved, 0, 48);
+
+ return ret;
+}
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+ tmp = ~twd;
+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+ /* and move the valid bits to the lower byte. */
+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+
+ return tmp;
+}
+
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
+#define FP_EXP_TAG_VALID 0
+#define FP_EXP_TAG_ZERO 1
+#define FP_EXP_TAG_SPECIAL 2
+#define FP_EXP_TAG_EMPTY 3
+
+static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave)
+{
+ struct _fpxreg *st;
+ u32 tos = (fxsave->swd >> 11) & 7;
+ u32 twd = (unsigned long) fxsave->twd;
+ u32 tag;
+ u32 ret = 0xffff0000u;
+ int i;
+
+ for (i = 0; i < 8; i++, twd >>= 1) {
+ if (twd & 0x1) {
+ st = FPREG_ADDR(fxsave, (i - tos) & 7);
+
+ switch (st->exponent & 0x7fff) {
+ case 0x7fff:
+ tag = FP_EXP_TAG_SPECIAL;
+ break;
+ case 0x0000:
+ if (!st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3])
+ tag = FP_EXP_TAG_ZERO;
+ else
+ tag = FP_EXP_TAG_SPECIAL;
+ break;
+ default:
+ if (st->significand[3] & 0x8000)
+ tag = FP_EXP_TAG_VALID;
+ else
+ tag = FP_EXP_TAG_SPECIAL;
+ break;
+ }
+ } else {
+ tag = FP_EXP_TAG_EMPTY;
+ }
+ ret |= tag << (2 * i);
+ }
+ return ret;
+}
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+void
+convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
+{
+ struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
+ struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
+ struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
+ int i;
+
+ env->cwd = fxsave->cwd | 0xffff0000u;
+ env->swd = fxsave->swd | 0xffff0000u;
+ env->twd = twd_fxsr_to_i387(fxsave);
+
+#ifdef CONFIG_X86_64
+ env->fip = fxsave->rip;
+ env->foo = fxsave->rdp;
+ /*
+ * should be actually ds/cs at fpu exception time, but
+ * that information is not available in 64bit mode.
+ */
+ env->fcs = task_pt_regs(tsk)->cs;
+ if (tsk == current) {
+ savesegment(ds, env->fos);
+ } else {
+ env->fos = tsk->thread.ds;
+ }
+ env->fos |= 0xffff0000;
+#else
+ env->fip = fxsave->fip;
+ env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
+ env->foo = fxsave->foo;
+ env->fos = fxsave->fos;
+#endif
+
+ for (i = 0; i < 8; ++i)
+ memcpy(&to[i], &from[i], sizeof(to[0]));
+}
+
+void convert_to_fxsr(struct task_struct *tsk,
+ const struct user_i387_ia32_struct *env)
+
+{
+ struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
+ struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
+ struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
+ int i;
+
+ fxsave->cwd = env->cwd;
+ fxsave->swd = env->swd;
+ fxsave->twd = twd_i387_to_fxsr(env->twd);
+ fxsave->fop = (u16) ((u32) env->fcs >> 16);
+#ifdef CONFIG_X86_64
+ fxsave->rip = env->fip;
+ fxsave->rdp = env->foo;
+ /* cs and ds ignored */
+#else
+ fxsave->fip = env->fip;
+ fxsave->fcs = (env->fcs & 0xffff);
+ fxsave->foo = env->foo;
+ fxsave->fos = env->fos;
+#endif
+
+ for (i = 0; i < 8; ++i)
+ memcpy(&to[i], &from[i], sizeof(from[0]));
+}
+
+int fpregs_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ struct user_i387_ia32_struct env;
+
+ fpu__activate_fpstate_read(fpu);
+
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+
+ if (!cpu_has_fxsr)
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &fpu->state.fsave, 0,
+ -1);
+
+ fpstate_sanitize_xstate(fpu);
+
+ if (kbuf && pos == 0 && count == sizeof(env)) {
+ convert_from_fxsr(kbuf, target);
+ return 0;
+ }
+
+ convert_from_fxsr(&env, target);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+}
+
+int fpregs_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct fpu *fpu = &target->thread.fpu;
+ struct user_i387_ia32_struct env;
+ int ret;
+
+ fpu__activate_fpstate_write(fpu);
+ fpstate_sanitize_xstate(fpu);
+
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+
+ if (!cpu_has_fxsr)
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &fpu->state.fsave, 0,
+ -1);
+
+ if (pos > 0 || count < sizeof(env))
+ convert_from_fxsr(&env, target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
+ if (!ret)
+ convert_to_fxsr(target, &env);
+
+ /*
+ * update the header bit in the xsave header, indicating the
+ * presence of FP.
+ */
+ if (cpu_has_xsave)
+ fpu->state.xsave.header.xfeatures |= XSTATE_FP;
+ return ret;
+}
+
+/*
+ * FPU state for core dumps.
+ * This is only used for a.out dumps now.
+ * It is declared generically using elf_fpregset_t (which is
+ * struct user_i387_struct) but is in fact only used for 32-bit
+ * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
+ */
+int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
+{
+ struct task_struct *tsk = current;
+ struct fpu *fpu = &tsk->thread.fpu;
+ int fpvalid;
+
+ fpvalid = fpu->fpstate_active;
+ if (fpvalid)
+ fpvalid = !fpregs_get(tsk, NULL,
+ 0, sizeof(struct user_i387_ia32_struct),
+ ufpu, NULL);
+
+ return fpvalid;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
new file mode 100644
index 000000000000..50ec9af1bd51
--- /dev/null
+++ b/arch/x86/kernel/fpu/signal.c
@@ -0,0 +1,404 @@
+/*
+ * FPU signal frame handling routines.
+ */
+
+#include <linux/compat.h>
+#include <linux/cpu.h>
+
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
+
+#include <asm/sigframe.h>
+
+static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
+
+/*
+ * Check for the presence of extended state information in the
+ * user fpstate pointer in the sigcontext.
+ */
+static inline int check_for_xstate(struct fxregs_state __user *buf,
+ void __user *fpstate,
+ struct _fpx_sw_bytes *fx_sw)
+{
+ int min_xstate_size = sizeof(struct fxregs_state) +
+ sizeof(struct xstate_header);
+ unsigned int magic2;
+
+ if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
+ return -1;
+
+ /* Check for the first magic field and other error scenarios. */
+ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
+ fx_sw->xstate_size < min_xstate_size ||
+ fx_sw->xstate_size > xstate_size ||
+ fx_sw->xstate_size > fx_sw->extended_size)
+ return -1;
+
+ /*
+ * Check for the presence of second magic word at the end of memory
+ * layout. This detects the case where the user just copied the legacy
+ * fpstate layout with out copying the extended state information
+ * in the memory layout.
+ */
+ if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
+ || magic2 != FP_XSTATE_MAGIC2)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Signal frame handlers.
+ */
+static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
+{
+ if (use_fxsr()) {
+ struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct user_i387_ia32_struct env;
+ struct _fpstate_ia32 __user *fp = buf;
+
+ convert_from_fxsr(&env, tsk);
+
+ if (__copy_to_user(buf, &env, sizeof(env)) ||
+ __put_user(xsave->i387.swd, &fp->status) ||
+ __put_user(X86_FXSR_MAGIC, &fp->magic))
+ return -1;
+ } else {
+ struct fregs_state __user *fp = buf;
+ u32 swd;
+ if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
+{
+ struct xregs_state __user *x = buf;
+ struct _fpx_sw_bytes *sw_bytes;
+ u32 xfeatures;
+ int err;
+
+ /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
+ sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
+ err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
+
+ if (!use_xsave())
+ return err;
+
+ err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+
+ /*
+ * Read the xfeatures which we copied (directly from the cpu or
+ * from the state in task struct) to the user buffers.
+ */
+ err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures);
+
+ /*
+ * For legacy compatible, we always set FP/SSE bits in the bit
+ * vector while saving the state to the user context. This will
+ * enable us capturing any changes(during sigreturn) to
+ * the FP/SSE bits by the legacy applications which don't touch
+ * xfeatures in the xsave header.
+ *
+ * xsave aware apps can change the xfeatures in the xsave
+ * header as well as change any contents in the memory layout.
+ * xrestore as part of sigreturn will capture all the changes.
+ */
+ xfeatures |= XSTATE_FPSSE;
+
+ err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures);
+
+ return err;
+}
+
+static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
+{
+ int err;
+
+ if (use_xsave())
+ err = copy_xregs_to_user(buf);
+ else if (use_fxsr())
+ err = copy_fxregs_to_user((struct fxregs_state __user *) buf);
+ else
+ err = copy_fregs_to_user((struct fregs_state __user *) buf);
+
+ if (unlikely(err) && __clear_user(buf, xstate_size))
+ err = -EFAULT;
+ return err;
+}
+
+/*
+ * Save the fpu, extended register state to the user signal frame.
+ *
+ * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
+ * state is copied.
+ * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
+ *
+ * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
+ * buf != buf_fx for 32-bit frames with fxstate.
+ *
+ * If the fpu, extended register state is live, save the state directly
+ * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
+ * copy the thread's fpu state to the user frame starting at 'buf_fx'.
+ *
+ * If this is a 32-bit frame with fxstate, put a fsave header before
+ * the aligned state at 'buf_fx'.
+ *
+ * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
+ * indicating the absence/presence of the extended state to the user.
+ */
+int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
+{
+ struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+ struct task_struct *tsk = current;
+ int ia32_fxstate = (buf != buf_fx);
+
+ ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+ config_enabled(CONFIG_IA32_EMULATION));
+
+ if (!access_ok(VERIFY_WRITE, buf, size))
+ return -EACCES;
+
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return fpregs_soft_get(current, NULL, 0,
+ sizeof(struct user_i387_ia32_struct), NULL,
+ (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
+
+ if (fpregs_active()) {
+ /* Save the live register state to the user directly. */
+ if (copy_fpregs_to_sigframe(buf_fx))
+ return -1;
+ /* Update the thread's fxstate to save the fsave header. */
+ if (ia32_fxstate)
+ copy_fxregs_to_kernel(&tsk->thread.fpu);
+ } else {
+ fpstate_sanitize_xstate(&tsk->thread.fpu);
+ if (__copy_to_user(buf_fx, xsave, xstate_size))
+ return -1;
+ }
+
+ /* Save the fsave header for the 32-bit frames. */
+ if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
+ return -1;
+
+ if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
+ return -1;
+
+ return 0;
+}
+
+static inline void
+sanitize_restored_xstate(struct task_struct *tsk,
+ struct user_i387_ia32_struct *ia32_env,
+ u64 xfeatures, int fx_only)
+{
+ struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct xstate_header *header = &xsave->header;
+
+ if (use_xsave()) {
+ /* These bits must be zero. */
+ memset(header->reserved, 0, 48);
+
+ /*
+ * Init the state that is not present in the memory
+ * layout and not enabled by the OS.
+ */
+ if (fx_only)
+ header->xfeatures = XSTATE_FPSSE;
+ else
+ header->xfeatures &= (xfeatures_mask & xfeatures);
+ }
+
+ if (use_fxsr()) {
+ /*
+ * mscsr reserved bits must be masked to zero for security
+ * reasons.
+ */
+ xsave->i387.mxcsr &= mxcsr_feature_mask;
+
+ convert_to_fxsr(tsk, ia32_env);
+ }
+}
+
+/*
+ * Restore the extended state if present. Otherwise, restore the FP/SSE state.
+ */
+static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
+{
+ if (use_xsave()) {
+ if ((unsigned long)buf % 64 || fx_only) {
+ u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE;
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+ return copy_user_to_fxregs(buf);
+ } else {
+ u64 init_bv = xfeatures_mask & ~xbv;
+ if (unlikely(init_bv))
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+ return copy_user_to_xregs(buf, xbv);
+ }
+ } else if (use_fxsr()) {
+ return copy_user_to_fxregs(buf);
+ } else
+ return copy_user_to_fregs(buf);
+}
+
+static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
+{
+ int ia32_fxstate = (buf != buf_fx);
+ struct task_struct *tsk = current;
+ struct fpu *fpu = &tsk->thread.fpu;
+ int state_size = xstate_size;
+ u64 xfeatures = 0;
+ int fx_only = 0;
+
+ ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
+ config_enabled(CONFIG_IA32_EMULATION));
+
+ if (!buf) {
+ fpu__clear(fpu);
+ return 0;
+ }
+
+ if (!access_ok(VERIFY_READ, buf, size))
+ return -EACCES;
+
+ fpu__activate_curr(fpu);
+
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return fpregs_soft_set(current, NULL,
+ 0, sizeof(struct user_i387_ia32_struct),
+ NULL, buf) != 0;
+
+ if (use_xsave()) {
+ struct _fpx_sw_bytes fx_sw_user;
+ if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
+ /*
+ * Couldn't find the extended state information in the
+ * memory layout. Restore just the FP/SSE and init all
+ * the other extended state.
+ */
+ state_size = sizeof(struct fxregs_state);
+ fx_only = 1;
+ } else {
+ state_size = fx_sw_user.xstate_size;
+ xfeatures = fx_sw_user.xfeatures;
+ }
+ }
+
+ if (ia32_fxstate) {
+ /*
+ * For 32-bit frames with fxstate, copy the user state to the
+ * thread's fpu state, reconstruct fxstate from the fsave
+ * header. Sanitize the copied state etc.
+ */
+ struct fpu *fpu = &tsk->thread.fpu;
+ struct user_i387_ia32_struct env;
+ int err = 0;
+
+ /*
+ * Drop the current fpu which clears fpu->fpstate_active. This ensures
+ * that any context-switch during the copy of the new state,
+ * avoids the intermediate state from getting restored/saved.
+ * Thus avoiding the new restored state from getting corrupted.
+ * We will be ready to restore/save the state only after
+ * fpu->fpstate_active is again set.
+ */
+ fpu__drop(fpu);
+
+ if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
+ __copy_from_user(&env, buf, sizeof(env))) {
+ fpstate_init(&fpu->state);
+ err = -1;
+ } else {
+ sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
+ }
+
+ fpu->fpstate_active = 1;
+ if (use_eager_fpu()) {
+ preempt_disable();
+ fpu__restore(fpu);
+ preempt_enable();
+ }
+
+ return err;
+ } else {
+ /*
+ * For 64-bit frames and 32-bit fsave frames, restore the user
+ * state to the registers directly (with exceptions handled).
+ */
+ user_fpu_begin();
+ if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
+ fpu__clear(fpu);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static inline int xstate_sigframe_size(void)
+{
+ return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+}
+
+/*
+ * Restore FPU state from a sigframe:
+ */
+int fpu__restore_sig(void __user *buf, int ia32_frame)
+{
+ void __user *buf_fx = buf;
+ int size = xstate_sigframe_size();
+
+ if (ia32_frame && use_fxsr()) {
+ buf_fx = buf + sizeof(struct fregs_state);
+ size += sizeof(struct fregs_state);
+ }
+
+ return __fpu__restore_sig(buf, buf_fx, size);
+}
+
+unsigned long
+fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
+ unsigned long *buf_fx, unsigned long *size)
+{
+ unsigned long frame_size = xstate_sigframe_size();
+
+ *buf_fx = sp = round_down(sp - frame_size, 64);
+ if (ia32_frame && use_fxsr()) {
+ frame_size += sizeof(struct fregs_state);
+ sp -= sizeof(struct fregs_state);
+ }
+
+ *size = frame_size;
+
+ return sp;
+}
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed by the fpstate pointer in the sigcontext.
+ * This will be saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+void fpu__init_prepare_fx_sw_frame(void)
+{
+ int fsave_header_size = sizeof(struct fregs_state);
+ int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+
+ if (config_enabled(CONFIG_X86_32))
+ size += fsave_header_size;
+
+ fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
+ fx_sw_reserved.extended_size = size;
+ fx_sw_reserved.xfeatures = xfeatures_mask;
+ fx_sw_reserved.xstate_size = xstate_size;
+
+ if (config_enabled(CONFIG_IA32_EMULATION)) {
+ fx_sw_reserved_ia32 = fx_sw_reserved;
+ fx_sw_reserved_ia32.extended_size += fsave_header_size;
+ }
+}
+
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
new file mode 100644
index 000000000000..62fc001c7846
--- /dev/null
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -0,0 +1,461 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/compat.h>
+#include <linux/cpu.h>
+
+#include <asm/fpu/api.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
+
+#include <asm/tlbflush.h>
+
+static const char *xfeature_names[] =
+{
+ "x87 floating point registers" ,
+ "SSE registers" ,
+ "AVX registers" ,
+ "MPX bounds registers" ,
+ "MPX CSR" ,
+ "AVX-512 opmask" ,
+ "AVX-512 Hi256" ,
+ "AVX-512 ZMM_Hi256" ,
+ "unknown xstate feature" ,
+};
+
+/*
+ * Mask of xstate features supported by the CPU and the kernel:
+ */
+u64 xfeatures_mask __read_mostly;
+
+static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
+static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
+static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+
+/* The number of supported xfeatures in xfeatures_mask: */
+static unsigned int xfeatures_nr;
+
+/*
+ * Return whether the system supports a given xfeature.
+ *
+ * Also return the name of the (most advanced) feature that the caller requested:
+ */
+int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
+{
+ u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
+
+ if (unlikely(feature_name)) {
+ long xfeature_idx, max_idx;
+ u64 xfeatures_print;
+ /*
+ * So we use FLS here to be able to print the most advanced
+ * feature that was requested but is missing. So if a driver
+ * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the
+ * missing AVX feature - this is the most informative message
+ * to users:
+ */
+ if (xfeatures_missing)
+ xfeatures_print = xfeatures_missing;
+ else
+ xfeatures_print = xfeatures_needed;
+
+ xfeature_idx = fls64(xfeatures_print)-1;
+ max_idx = ARRAY_SIZE(xfeature_names)-1;
+ xfeature_idx = min(xfeature_idx, max_idx);
+
+ *feature_name = xfeature_names[xfeature_idx];
+ }
+
+ if (xfeatures_missing)
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
+
+/*
+ * When executing XSAVEOPT (or other optimized XSAVE instructions), if
+ * a processor implementation detects that an FPU state component is still
+ * (or is again) in its initialized state, it may clear the corresponding
+ * bit in the header.xfeatures field, and can skip the writeout of registers
+ * to the corresponding memory layout.
+ *
+ * This means that when the bit is zero, the state component might still contain
+ * some previous - non-initialized register state.
+ *
+ * Before writing xstate information to user-space we sanitize those components,
+ * to always ensure that the memory layout of a feature will be in the init state
+ * if the corresponding header bit is zero. This is to ensure that user-space doesn't
+ * see some stale state in the memory layout during signal handling, debugging etc.
+ */
+void fpstate_sanitize_xstate(struct fpu *fpu)
+{
+ struct fxregs_state *fx = &fpu->state.fxsave;
+ int feature_bit;
+ u64 xfeatures;
+
+ if (!use_xsaveopt())
+ return;
+
+ xfeatures = fpu->state.xsave.header.xfeatures;
+
+ /*
+ * None of the feature bits are in init state. So nothing else
+ * to do for us, as the memory layout is up to date.
+ */
+ if ((xfeatures & xfeatures_mask) == xfeatures_mask)
+ return;
+
+ /*
+ * FP is in init state
+ */
+ if (!(xfeatures & XSTATE_FP)) {
+ fx->cwd = 0x37f;
+ fx->swd = 0;
+ fx->twd = 0;
+ fx->fop = 0;
+ fx->rip = 0;
+ fx->rdp = 0;
+ memset(&fx->st_space[0], 0, 128);
+ }
+
+ /*
+ * SSE is in init state
+ */
+ if (!(xfeatures & XSTATE_SSE))
+ memset(&fx->xmm_space[0], 0, 256);
+
+ /*
+ * First two features are FPU and SSE, which above we handled
+ * in a special way already:
+ */
+ feature_bit = 0x2;
+ xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
+
+ /*
+ * Update all the remaining memory layouts according to their
+ * standard xstate layout, if their header bit is in the init
+ * state:
+ */
+ while (xfeatures) {
+ if (xfeatures & 0x1) {
+ int offset = xstate_offsets[feature_bit];
+ int size = xstate_sizes[feature_bit];
+
+ memcpy((void *)fx + offset,
+ (void *)&init_fpstate.xsave + offset,
+ size);
+ }
+
+ xfeatures >>= 1;
+ feature_bit++;
+ }
+}
+
+/*
+ * Enable the extended processor state save/restore feature.
+ * Called once per CPU onlining.
+ */
+void fpu__init_cpu_xstate(void)
+{
+ if (!cpu_has_xsave || !xfeatures_mask)
+ return;
+
+ cr4_set_bits(X86_CR4_OSXSAVE);
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+}
+
+/*
+ * Record the offsets and sizes of various xstates contained
+ * in the XSAVE state memory layout.
+ *
+ * ( Note that certain features might be non-present, for them
+ * we'll have 0 offset and 0 size. )
+ */
+static void __init setup_xstate_features(void)
+{
+ u32 eax, ebx, ecx, edx, leaf;
+
+ xfeatures_nr = fls64(xfeatures_mask);
+
+ for (leaf = 2; leaf < xfeatures_nr; leaf++) {
+ cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
+
+ xstate_offsets[leaf] = ebx;
+ xstate_sizes[leaf] = eax;
+
+ printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax);
+ }
+}
+
+static void __init print_xstate_feature(u64 xstate_mask)
+{
+ const char *feature_name;
+
+ if (cpu_has_xfeatures(xstate_mask, &feature_name))
+ pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name);
+}
+
+/*
+ * Print out all the supported xstate features:
+ */
+static void __init print_xstate_features(void)
+{
+ print_xstate_feature(XSTATE_FP);
+ print_xstate_feature(XSTATE_SSE);
+ print_xstate_feature(XSTATE_YMM);
+ print_xstate_feature(XSTATE_BNDREGS);
+ print_xstate_feature(XSTATE_BNDCSR);
+ print_xstate_feature(XSTATE_OPMASK);
+ print_xstate_feature(XSTATE_ZMM_Hi256);
+ print_xstate_feature(XSTATE_Hi16_ZMM);
+}
+
+/*
+ * This function sets up offsets and sizes of all extended states in
+ * xsave area. This supports both standard format and compacted format
+ * of the xsave aread.
+ */
+static void __init setup_xstate_comp(void)
+{
+ unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+ int i;
+
+ /*
+ * The FP xstates and SSE xstates are legacy states. They are always
+ * in the fixed offsets in the xsave area in either compacted form
+ * or standard form.
+ */
+ xstate_comp_offsets[0] = 0;
+ xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+
+ if (!cpu_has_xsaves) {
+ for (i = 2; i < xfeatures_nr; i++) {
+ if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
+ xstate_comp_offsets[i] = xstate_offsets[i];
+ xstate_comp_sizes[i] = xstate_sizes[i];
+ }
+ }
+ return;
+ }
+
+ xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = 2; i < xfeatures_nr; i++) {
+ if (test_bit(i, (unsigned long *)&xfeatures_mask))
+ xstate_comp_sizes[i] = xstate_sizes[i];
+ else
+ xstate_comp_sizes[i] = 0;
+
+ if (i > 2)
+ xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+ + xstate_comp_sizes[i-1];
+
+ }
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+static void __init setup_init_fpu_buf(void)
+{
+ static int on_boot_cpu = 1;
+
+ WARN_ON_FPU(!on_boot_cpu);
+ on_boot_cpu = 0;
+
+ if (!cpu_has_xsave)
+ return;
+
+ setup_xstate_features();
+ print_xstate_features();
+
+ if (cpu_has_xsaves) {
+ init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
+ init_fpstate.xsave.header.xfeatures = xfeatures_mask;
+ }
+
+ /*
+ * Init all the features state with header_bv being 0x0
+ */
+ copy_kernel_to_xregs_booting(&init_fpstate.xsave);
+
+ /*
+ * Dump the init state again. This is to identify the init state
+ * of any feature which is not represented by all zero's.
+ */
+ copy_xregs_to_kernel_booting(&init_fpstate.xsave);
+}
+
+/*
+ * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ */
+static void __init init_xstate_size(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int i;
+
+ if (!cpu_has_xsaves) {
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ xstate_size = ebx;
+ return;
+ }
+
+ xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ for (i = 2; i < 64; i++) {
+ if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
+ cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ xstate_size += eax;
+ }
+ }
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ * Called once per system bootup.
+ */
+void __init fpu__init_system_xstate(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ static int on_boot_cpu = 1;
+
+ WARN_ON_FPU(!on_boot_cpu);
+ on_boot_cpu = 0;
+
+ if (!cpu_has_xsave) {
+ pr_info("x86/fpu: Legacy x87 FPU detected.\n");
+ return;
+ }
+
+ if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
+ WARN_ON_FPU(1);
+ return;
+ }
+
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ xfeatures_mask = eax + ((u64)edx << 32);
+
+ if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
+ BUG();
+ }
+
+ /* Support only the state known to the OS: */
+ xfeatures_mask = xfeatures_mask & XCNTXT_MASK;
+
+ /* Enable xstate instructions to be able to continue with initialization: */
+ fpu__init_cpu_xstate();
+
+ /* Recompute the context size for enabled features: */
+ init_xstate_size();
+
+ update_regset_xstate_info(xstate_size, xfeatures_mask);
+ fpu__init_prepare_fx_sw_frame();
+ setup_init_fpu_buf();
+ setup_xstate_comp();
+
+ pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n",
+ xfeatures_mask,
+ xstate_size,
+ cpu_has_xsaves ? "compacted" : "standard");
+}
+
+/*
+ * Restore minimal FPU state after suspend:
+ */
+void fpu__resume_cpu(void)
+{
+ /*
+ * Restore XCR0 on xsave capable CPUs:
+ */
+ if (cpu_has_xsave)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+}
+
+/*
+ * Given the xsave area and a state inside, this function returns the
+ * address of the state.
+ *
+ * This is the API that is called to get xstate address in either
+ * standard format or compacted format of xsave area.
+ *
+ * Note that if there is no data for the field in the xsave buffer
+ * this will return NULL.
+ *
+ * Inputs:
+ * xstate: the thread's storage area for all FPU data
+ * xstate_feature: state which is defined in xsave.h (e.g.
+ * XSTATE_FP, XSTATE_SSE, etc...)
+ * Output:
+ * address of the state in the xsave area, or NULL if the
+ * field is not present in the xsave buffer.
+ */
+void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+{
+ int feature_nr = fls64(xstate_feature) - 1;
+ /*
+ * Do we even *have* xsave state?
+ */
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
+ return NULL;
+
+ xsave = &current->thread.fpu.state.xsave;
+ /*
+ * We should not ever be requesting features that we
+ * have not enabled. Remember that pcntxt_mask is
+ * what we write to the XCR0 register.
+ */
+ WARN_ONCE(!(xfeatures_mask & xstate_feature),
+ "get of unsupported state");
+ /*
+ * This assumes the last 'xsave*' instruction to
+ * have requested that 'xstate_feature' be saved.
+ * If it did not, we might be seeing and old value
+ * of the field in the buffer.
+ *
+ * This can happen because the last 'xsave' did not
+ * request that this feature be saved (unlikely)
+ * or because the "init optimization" caused it
+ * to not be saved.
+ */
+ if (!(xsave->header.xfeatures & xstate_feature))
+ return NULL;
+
+ return (void *)xsave + xstate_comp_offsets[feature_nr];
+}
+EXPORT_SYMBOL_GPL(get_xsave_addr);
+
+/*
+ * This wraps up the common operations that need to occur when retrieving
+ * data from xsave state. It first ensures that the current task was
+ * using the FPU and retrieves the data in to a buffer. It then calculates
+ * the offset of the requested field in the buffer.
+ *
+ * This function is safe to call whether the FPU is in use or not.
+ *
+ * Note that this only works on the current task.
+ *
+ * Inputs:
+ * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP,
+ * XSTATE_SSE, etc...)
+ * Output:
+ * address of the state in the xsave area or NULL if the state
+ * is not present or is in its 'init state'.
+ */
+const void *get_xsave_field_ptr(int xsave_state)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ if (!fpu->fpstate_active)
+ return NULL;
+ /*
+ * fpu__save() takes the CPU's xstate registers
+ * and saves them off to the 'fpu memory buffer.
+ */
+ fpu__save(fpu);
+
+ return get_xsave_addr(&fpu->state.xsave, xsave_state);
+}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b55ee6db053..5a4668136e98 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
- set_intr_gate(i, early_idt_handlers[i]);
+ set_intr_gate(i, early_idt_handler_array[i]);
load_idt((const struct desc_ptr *)&idt_descr);
copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d031bad9e07e..53eeb226657c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -478,21 +478,22 @@ is486:
__INIT
setup_once:
/*
- * Set up a idt with 256 entries pointing to ignore_int,
- * interrupt gates. It doesn't actually load idt - that needs
- * to be done on each CPU. Interrupts are enabled elsewhere,
- * when we can be relatively sure everything is ok.
+ * Set up a idt with 256 interrupt gates that push zero if there
+ * is no error code and then jump to early_idt_handler_common.
+ * It doesn't actually load the idt - that needs to be done on
+ * each CPU. Interrupts are enabled elsewhere, when we can be
+ * relatively sure everything is ok.
*/
movl $idt_table,%edi
- movl $early_idt_handlers,%eax
+ movl $early_idt_handler_array,%eax
movl $NUM_EXCEPTION_VECTORS,%ecx
1:
movl %eax,(%edi)
movl %eax,4(%edi)
/* interrupt gate, dpl=0, present */
movl $(0x8E000000 + __KERNEL_CS),2(%edi)
- addl $9,%eax
+ addl $EARLY_IDT_HANDLER_SIZE,%eax
addl $8,%edi
loop 1b
@@ -524,26 +525,28 @@ setup_once:
andl $0,setup_once_ref /* Once is enough, thanks */
ret
-ENTRY(early_idt_handlers)
+ENTRY(early_idt_handler_array)
# 36(%esp) %eflags
# 32(%esp) %cs
# 28(%esp) %eip
# 24(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .if (EXCEPTION_ERRCODE_MASK >> i) & 1
- ASM_NOP2
- .else
+ .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
pushl $0 # Dummy error code, to make stack frame uniform
.endif
pushl $i # 20(%esp) Vector number
- jmp early_idt_handler
+ jmp early_idt_handler_common
i = i + 1
+ .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-ENDPROC(early_idt_handlers)
+ENDPROC(early_idt_handler_array)
- /* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+ /*
+ * The stack is the hardware frame, an error code or zero, and the
+ * vector number.
+ */
cld
cmpl $2,(%esp) # X86_TRAP_NMI
@@ -603,7 +606,7 @@ ex_entry:
is_nmi:
addl $8,%esp /* drop vector number and error code */
iret
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
/* This is the default interrupt "handler" :-) */
ALIGN
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ae6588b301c2..df7e78057ae0 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -321,26 +321,28 @@ bad_address:
jmp bad_address
__INIT
- .globl early_idt_handlers
-early_idt_handlers:
+ENTRY(early_idt_handler_array)
# 104(%rsp) %rflags
# 96(%rsp) %cs
# 88(%rsp) %rip
# 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .if (EXCEPTION_ERRCODE_MASK >> i) & 1
- ASM_NOP2
- .else
+ .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
pushq $0 # Dummy error code, to make stack frame uniform
.endif
pushq $i # 72(%rsp) Vector number
- jmp early_idt_handler
+ jmp early_idt_handler_common
i = i + 1
+ .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
+ENDPROC(early_idt_handler_array)
-/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+ /*
+ * The stack is the hardware frame, an error code or zero, and the
+ * vector number.
+ */
cld
cmpl $2,(%rsp) # X86_TRAP_NMI
@@ -412,7 +414,7 @@ ENTRY(early_idt_handler)
is_nmi:
addq $16,%rsp # drop vector number and error code
INTERRUPT_RETURN
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
__INITDATA
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 05fd74f537d6..64341aa485ae 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -40,7 +40,5 @@ EXPORT_SYMBOL(empty_zero_page);
#ifdef CONFIG_PREEMPT
EXPORT_SYMBOL(___preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
-EXPORT_SYMBOL(___preempt_schedule_context);
-#endif
+EXPORT_SYMBOL(___preempt_schedule_notrace);
#endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
deleted file mode 100644
index 009183276bb7..000000000000
--- a/arch/x86/kernel/i387.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-#include <linux/module.h>
-#include <linux/regset.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include <asm/sigcontext.h>
-#include <asm/processor.h>
-#include <asm/math_emu.h>
-#include <asm/tlbflush.h>
-#include <asm/uaccess.h>
-#include <asm/ptrace.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
-#include <asm/user.h>
-
-static DEFINE_PER_CPU(bool, in_kernel_fpu);
-
-void kernel_fpu_disable(void)
-{
- WARN_ON(this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, true);
-}
-
-void kernel_fpu_enable(void)
-{
- this_cpu_write(in_kernel_fpu, false);
-}
-
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
-static inline bool interrupted_kernel_fpu_idle(void)
-{
- if (this_cpu_read(in_kernel_fpu))
- return false;
-
- if (use_eager_fpu())
- return true;
-
- return !__thread_has_fpu(current) &&
- (read_cr0() & X86_CR0_TS);
-}
-
-/*
- * Were we in user mode (or vm86 mode) when we were
- * interrupted?
- *
- * Doing kernel_fpu_begin/end() is ok if we are running
- * in an interrupt context from user mode - we'll just
- * save the FPU state as required.
- */
-static inline bool interrupted_user_mode(void)
-{
- struct pt_regs *regs = get_irq_regs();
- return regs && user_mode(regs);
-}
-
-/*
- * Can we use the FPU in kernel mode with the
- * whole "kernel_fpu_begin/end()" sequence?
- *
- * It's always ok in process context (ie "not interrupt")
- * but it is sometimes ok even from an irq.
- */
-bool irq_fpu_usable(void)
-{
- return !in_interrupt() ||
- interrupted_user_mode() ||
- interrupted_kernel_fpu_idle();
-}
-EXPORT_SYMBOL(irq_fpu_usable);
-
-void __kernel_fpu_begin(void)
-{
- struct task_struct *me = current;
-
- this_cpu_write(in_kernel_fpu, true);
-
- if (__thread_has_fpu(me)) {
- __save_init_fpu(me);
- } else {
- this_cpu_write(fpu_owner_task, NULL);
- if (!use_eager_fpu())
- clts();
- }
-}
-EXPORT_SYMBOL(__kernel_fpu_begin);
-
-void __kernel_fpu_end(void)
-{
- struct task_struct *me = current;
-
- if (__thread_has_fpu(me)) {
- if (WARN_ON(restore_fpu_checking(me)))
- fpu_reset_state(me);
- } else if (!use_eager_fpu()) {
- stts();
- }
-
- this_cpu_write(in_kernel_fpu, false);
-}
-EXPORT_SYMBOL(__kernel_fpu_end);
-
-void unlazy_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- if (__thread_has_fpu(tsk)) {
- if (use_eager_fpu()) {
- __save_fpu(tsk);
- } else {
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- }
- }
- preempt_enable();
-}
-EXPORT_SYMBOL(unlazy_fpu);
-
-unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
-static struct i387_fxsave_struct fx_scratch;
-
-static void mxcsr_feature_mask_init(void)
-{
- unsigned long mask = 0;
-
- if (cpu_has_fxsr) {
- memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : "+m" (fx_scratch));
- mask = fx_scratch.mxcsr_mask;
- if (mask == 0)
- mask = 0x0000ffbf;
- }
- mxcsr_feature_mask &= mask;
-}
-
-static void init_thread_xstate(void)
-{
- /*
- * Note that xstate_size might be overwriten later during
- * xsave_init().
- */
-
- if (!cpu_has_fpu) {
- /*
- * Disable xsave as we do not support it if i387
- * emulation is enabled.
- */
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
- xstate_size = sizeof(struct i387_soft_struct);
- return;
- }
-
- if (cpu_has_fxsr)
- xstate_size = sizeof(struct i387_fxsave_struct);
- else
- xstate_size = sizeof(struct i387_fsave_struct);
-}
-
-/*
- * Called at bootup to set up the initial FPU state that is later cloned
- * into all processes.
- */
-
-void fpu_init(void)
-{
- unsigned long cr0;
- unsigned long cr4_mask = 0;
-
-#ifndef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu) {
- pr_emerg("No FPU found and no math emulation present\n");
- pr_emerg("Giving up\n");
- for (;;)
- asm volatile("hlt");
- }
-#endif
- if (cpu_has_fxsr)
- cr4_mask |= X86_CR4_OSFXSR;
- if (cpu_has_xmm)
- cr4_mask |= X86_CR4_OSXMMEXCPT;
- if (cr4_mask)
- cr4_set_bits(cr4_mask);
-
- cr0 = read_cr0();
- cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
- if (!cpu_has_fpu)
- cr0 |= X86_CR0_EM;
- write_cr0(cr0);
-
- /*
- * init_thread_xstate is only called once to avoid overriding
- * xstate_size during boot time or during CPU hotplug.
- */
- if (xstate_size == 0)
- init_thread_xstate();
-
- mxcsr_feature_mask_init();
- xsave_init();
- eager_fpu_init();
-}
-
-void fpu_finit(struct fpu *fpu)
-{
- if (!cpu_has_fpu) {
- finit_soft_fpu(&fpu->state->soft);
- return;
- }
-
- memset(fpu->state, 0, xstate_size);
-
- if (cpu_has_fxsr) {
- fx_finit(&fpu->state->fxsave);
- } else {
- struct i387_fsave_struct *fp = &fpu->state->fsave;
- fp->cwd = 0xffff037fu;
- fp->swd = 0xffff0000u;
- fp->twd = 0xffffffffu;
- fp->fos = 0xffff0000u;
- }
-}
-EXPORT_SYMBOL_GPL(fpu_finit);
-
-/*
- * The _current_ task is using the FPU for the first time
- * so initialize it and set the mxcsr to its default
- * value at reset if we support XMM instructions and then
- * remember the current task has used the FPU.
- */
-int init_fpu(struct task_struct *tsk)
-{
- int ret;
-
- if (tsk_used_math(tsk)) {
- if (cpu_has_fpu && tsk == current)
- unlazy_fpu(tsk);
- task_disable_lazy_fpu_restore(tsk);
- return 0;
- }
-
- /*
- * Memory allocation at the first usage of the FPU and other state.
- */
- ret = fpu_alloc(&tsk->thread.fpu);
- if (ret)
- return ret;
-
- fpu_finit(&tsk->thread.fpu);
-
- set_stopped_child_used_math(tsk);
- return 0;
-}
-EXPORT_SYMBOL_GPL(init_fpu);
-
-/*
- * The xstateregs_active() routine is the same as the fpregs_active() routine,
- * as the "regset->n" for the xstate regset will be updated based on the feature
- * capabilites supported by the xsave.
- */
-int fpregs_active(struct task_struct *target, const struct user_regset *regset)
-{
- return tsk_used_math(target) ? regset->n : 0;
-}
-
-int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
-{
- return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
-}
-
-int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_fxsr)
- return -ENODEV;
-
- ret = init_fpu(target);
- if (ret)
- return ret;
-
- sanitize_i387_state(target);
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->fxsave, 0, -1);
-}
-
-int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_fxsr)
- return -ENODEV;
-
- ret = init_fpu(target);
- if (ret)
- return ret;
-
- sanitize_i387_state(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->fxsave, 0, -1);
-
- /*
- * mxcsr reserved bits must be masked to zero for security reasons.
- */
- target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
- /*
- * update the header bits in the xsave header, indicating the
- * presence of FP and SSE state.
- */
- if (cpu_has_xsave)
- target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
-
- return ret;
-}
-
-int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- struct xsave_struct *xsave;
- int ret;
-
- if (!cpu_has_xsave)
- return -ENODEV;
-
- ret = init_fpu(target);
- if (ret)
- return ret;
-
- xsave = &target->thread.fpu.state->xsave;
-
- /*
- * Copy the 48bytes defined by the software first into the xstate
- * memory layout in the thread struct, so that we can copy the entire
- * xstateregs to the user using one user_regset_copyout().
- */
- memcpy(&xsave->i387.sw_reserved,
- xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
- /*
- * Copy the xstate memory layout.
- */
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
- return ret;
-}
-
-int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- struct xsave_struct *xsave;
- int ret;
-
- if (!cpu_has_xsave)
- return -ENODEV;
-
- ret = init_fpu(target);
- if (ret)
- return ret;
-
- xsave = &target->thread.fpu.state->xsave;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
- /*
- * mxcsr reserved bits must be masked to zero for security reasons.
- */
- xsave->i387.mxcsr &= mxcsr_feature_mask;
- xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
- /*
- * These bits must be zero.
- */
- memset(&xsave->xsave_hdr.reserved, 0, 48);
- return ret;
-}
-
-#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
- unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
- /* Transform each pair of bits into 01 (valid) or 00 (empty) */
- tmp = ~twd;
- tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
- /* and move the valid bits to the lower byte. */
- tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
- tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
- tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-
- return tmp;
-}
-
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
-#define FP_EXP_TAG_VALID 0
-#define FP_EXP_TAG_ZERO 1
-#define FP_EXP_TAG_SPECIAL 2
-#define FP_EXP_TAG_EMPTY 3
-
-static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
-{
- struct _fpxreg *st;
- u32 tos = (fxsave->swd >> 11) & 7;
- u32 twd = (unsigned long) fxsave->twd;
- u32 tag;
- u32 ret = 0xffff0000u;
- int i;
-
- for (i = 0; i < 8; i++, twd >>= 1) {
- if (twd & 0x1) {
- st = FPREG_ADDR(fxsave, (i - tos) & 7);
-
- switch (st->exponent & 0x7fff) {
- case 0x7fff:
- tag = FP_EXP_TAG_SPECIAL;
- break;
- case 0x0000:
- if (!st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
- !st->significand[3])
- tag = FP_EXP_TAG_ZERO;
- else
- tag = FP_EXP_TAG_SPECIAL;
- break;
- default:
- if (st->significand[3] & 0x8000)
- tag = FP_EXP_TAG_VALID;
- else
- tag = FP_EXP_TAG_SPECIAL;
- break;
- }
- } else {
- tag = FP_EXP_TAG_EMPTY;
- }
- ret |= tag << (2 * i);
- }
- return ret;
-}
-
-/*
- * FXSR floating point environment conversions.
- */
-
-void
-convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
-{
- struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
- struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
- struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
- int i;
-
- env->cwd = fxsave->cwd | 0xffff0000u;
- env->swd = fxsave->swd | 0xffff0000u;
- env->twd = twd_fxsr_to_i387(fxsave);
-
-#ifdef CONFIG_X86_64
- env->fip = fxsave->rip;
- env->foo = fxsave->rdp;
- /*
- * should be actually ds/cs at fpu exception time, but
- * that information is not available in 64bit mode.
- */
- env->fcs = task_pt_regs(tsk)->cs;
- if (tsk == current) {
- savesegment(ds, env->fos);
- } else {
- env->fos = tsk->thread.ds;
- }
- env->fos |= 0xffff0000;
-#else
- env->fip = fxsave->fip;
- env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
- env->foo = fxsave->foo;
- env->fos = fxsave->fos;
-#endif
-
- for (i = 0; i < 8; ++i)
- memcpy(&to[i], &from[i], sizeof(to[0]));
-}
-
-void convert_to_fxsr(struct task_struct *tsk,
- const struct user_i387_ia32_struct *env)
-
-{
- struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
- struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
- struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
- int i;
-
- fxsave->cwd = env->cwd;
- fxsave->swd = env->swd;
- fxsave->twd = twd_i387_to_fxsr(env->twd);
- fxsave->fop = (u16) ((u32) env->fcs >> 16);
-#ifdef CONFIG_X86_64
- fxsave->rip = env->fip;
- fxsave->rdp = env->foo;
- /* cs and ds ignored */
-#else
- fxsave->fip = env->fip;
- fxsave->fcs = (env->fcs & 0xffff);
- fxsave->foo = env->foo;
- fxsave->fos = env->fos;
-#endif
-
- for (i = 0; i < 8; ++i)
- memcpy(&to[i], &from[i], sizeof(from[0]));
-}
-
-int fpregs_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- struct user_i387_ia32_struct env;
- int ret;
-
- ret = init_fpu(target);
- if (ret)
- return ret;
-
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
-
- if (!cpu_has_fxsr)
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->fsave, 0,
- -1);
-
- sanitize_i387_state(target);
-
- if (kbuf && pos == 0 && count == sizeof(env)) {
- convert_from_fxsr(kbuf, target);
- return 0;
- }
-
- convert_from_fxsr(&env, target);
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
-}
-
-int fpregs_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- struct user_i387_ia32_struct env;
- int ret;
-
- ret = init_fpu(target);
- if (ret)
- return ret;
-
- sanitize_i387_state(target);
-
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
-
- if (!cpu_has_fxsr)
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->fsave, 0,
- -1);
-
- if (pos > 0 || count < sizeof(env))
- convert_from_fxsr(&env, target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
- if (!ret)
- convert_to_fxsr(target, &env);
-
- /*
- * update the header bit in the xsave header, indicating the
- * presence of FP.
- */
- if (cpu_has_xsave)
- target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
- return ret;
-}
-
-/*
- * FPU state for core dumps.
- * This is only used for a.out dumps now.
- * It is declared generically using elf_fpregset_t (which is
- * struct user_i387_struct) but is in fact only used for 32-bit
- * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
- */
-int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
-{
- struct task_struct *tsk = current;
- int fpvalid;
-
- fpvalid = !!used_math();
- if (fpvalid)
- fpvalid = !fpregs_get(tsk, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- fpu, NULL);
-
- return fpvalid;
-}
-EXPORT_SYMBOL(dump_fpu);
-
-#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
-
-static int __init no_387(char *s)
-{
- setup_clear_cpu_cap(X86_FEATURE_FPU);
- return 1;
-}
-
-__setup("no387", no_387);
-
-void fpu_detect(struct cpuinfo_x86 *c)
-{
- unsigned long cr0;
- u16 fsw, fcw;
-
- fsw = fcw = 0xffff;
-
- cr0 = read_cr0();
- cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
- write_cr0(cr0);
-
- asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
- : "+m" (fsw), "+m" (fcw));
-
- if (fsw == 0 && (fcw & 0x103f) == 0x003f)
- set_cpu_cap(c, X86_FEATURE_FPU);
- else
- clear_cpu_cap(c, X86_FEATURE_FPU);
-
- /* The final cr0 value is set in fpu_init() */
-}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620062df..1681504e44a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void kvm_wait(u8 *ptr, u8 val)
+{
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ local_irq_save(flags);
+
+ if (READ_ONCE(*ptr) != val)
+ goto out;
+
+ /*
+ * halt until it's our turn and kicked. Note that we do safe halt
+ * for irq enabled case to avoid hang when lock info is overwritten
+ * in irq spinlock slowpath and no spurious interrupt occur to save us.
+ */
+ if (arch_irqs_disabled_flags(flags))
+ halt();
+ else
+ safe_halt();
+
+out:
+ local_irq_restore(flags);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
enum kvm_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
}
}
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+
/*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ __pv_init_lock_hash();
+ pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_lock_ops.wait = kvm_wait;
+ pv_lock_ops.kick = kvm_kick_cpu;
+#else /* !CONFIG_QUEUED_SPINLOCKS */
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
pv_lock_ops.unlock_kick = kvm_unlock_kick;
+#endif
}
static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c7316341..33ee3e0efd65 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
#include <asm/paravirt.h>
+#ifdef CONFIG_QUEUED_SPINLOCKS
+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+
+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
+
+bool pv_is_native_spin_unlock(void)
+{
+ return pv_lock_ops.queued_spin_unlock.func ==
+ __raw_callee_save___native_queued_spin_unlock;
+}
+#endif
+
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+ .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+ .wait = paravirt_nop,
+ .kick = paravirt_nop,
+#else /* !CONFIG_QUEUED_SPINLOCKS */
.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
.unlock_kick = paravirt_nop,
-#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab6..e1b013696dde 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+#endif
+
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
/* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
return 0;
}
+extern bool pv_is_native_spin_unlock(void);
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_cpu_ops, read_tsc);
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ if (pv_is_native_spin_unlock()) {
+ start = start_pv_lock_ops_queued_spin_unlock;
+ end = end_pv_lock_ops_queued_spin_unlock;
+ goto patch_site;
+ }
+#endif
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
+
+patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
}
#undef PATCH_SITE
return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index a1da6737ba5b..a1fa86782186 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+#endif
+
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
start__mov64, end__mov64);
}
+extern bool pv_is_native_spin_unlock(void);
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ if (pv_is_native_spin_unlock()) {
+ start = start_pv_lock_ops_queued_spin_unlock;
+ end = end_pv_lock_ops_queued_spin_unlock;
+ goto patch_site;
+ }
+#endif
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
+
+patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
}
#undef PATCH_SITE
return ret;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a25e202bb319..353972c1946c 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -140,6 +140,51 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, get_order(size));
}
+void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, struct dma_attrs *attrs)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+ void *memory;
+
+ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+
+ if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
+ return memory;
+
+ if (!dev)
+ dev = &x86_dma_fallback_dev;
+
+ if (!is_device_dma_capable(dev))
+ return NULL;
+
+ if (!ops->alloc)
+ return NULL;
+
+ memory = ops->alloc(dev, size, dma_handle,
+ dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
+ debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
+
+ return memory;
+}
+EXPORT_SYMBOL(dma_alloc_attrs);
+
+void dma_free_attrs(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t bus,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+
+ WARN_ON(irqs_disabled()); /* for portability */
+
+ if (dma_release_from_coherent(dev, get_order(size), vaddr))
+ return;
+
+ debug_dma_free_coherent(dev, size, vaddr, bus);
+ if (ops->free)
+ ops->free(dev, size, vaddr, bus, attrs);
+}
+EXPORT_SYMBOL(dma_free_attrs);
+
/*
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
* parameter documentation.
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 77dd0ad58be4..adf0392d549a 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -20,6 +20,13 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
{
void *vaddr;
+ /*
+ * Don't print a warning when the first allocation attempt fails.
+ * swiotlb_alloc_coherent() will print a warning when the DMA
+ * memory allocation ultimately failed.
+ */
+ flags |= __GFP_NOWARN;
+
vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
attrs);
if (vaddr)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8213da62b1b7..9cad694ed7c4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -25,8 +25,7 @@
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/mwait.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/debugreg.h>
#include <asm/nmi.h>
#include <asm/tlbflush.h>
@@ -57,7 +56,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -76,9 +75,6 @@ void idle_notifier_unregister(struct notifier_block *n)
EXPORT_SYMBOL_GPL(idle_notifier_unregister);
#endif
-struct kmem_cache *task_xstate_cachep;
-EXPORT_SYMBOL_GPL(task_xstate_cachep);
-
/*
* this gets called so that we can store lazy state into memory and copy the
* current task into the new thread.
@@ -87,36 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
- dst->thread.fpu_counter = 0;
- dst->thread.fpu.has_fpu = 0;
- dst->thread.fpu.state = NULL;
- task_disable_lazy_fpu_restore(dst);
- if (tsk_used_math(src)) {
- int err = fpu_alloc(&dst->thread.fpu);
- if (err)
- return err;
- fpu_copy(dst, src);
- }
- return 0;
-}
-
-void free_thread_xstate(struct task_struct *tsk)
-{
- fpu_free(&tsk->thread.fpu);
-}
-
-void arch_release_task_struct(struct task_struct *tsk)
-{
- free_thread_xstate(tsk);
-}
-
-void arch_task_cache_init(void)
-{
- task_xstate_cachep =
- kmem_cache_create("task_xstate", xstate_size,
- __alignof__(union thread_xstate),
- SLAB_PANIC | SLAB_NOTRACK, NULL);
- setup_xstate_comp();
+ return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
/*
@@ -127,6 +94,7 @@ void exit_thread(void)
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
unsigned long *bp = t->io_bitmap_ptr;
+ struct fpu *fpu = &t->fpu;
if (bp) {
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
@@ -142,7 +110,7 @@ void exit_thread(void)
kfree(bp);
}
- drop_fpu(me);
+ fpu__drop(fpu);
}
void flush_thread(void)
@@ -152,17 +120,7 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- if (!use_eager_fpu()) {
- /* FPU state will be reallocated lazily at the first use. */
- drop_fpu(tsk);
- free_thread_xstate(tsk);
- } else if (!used_math()) {
- /* kthread execs. TODO: cleanup this horror. */
- if (WARN_ON(init_fpu(tsk)))
- force_sig(SIGKILL, tsk);
- user_fpu_begin();
- restore_init_xstate();
- }
+ fpu__clear(&tsk->thread.fpu);
}
static void hard_disable_TSC(void)
@@ -443,11 +401,10 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
}
/*
- * MONITOR/MWAIT with no hints, used for default default C1 state.
- * This invokes MWAIT with interrutps enabled and no flags,
- * which is backwards compatible with the original MWAIT implementation.
+ * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
+ * with interrupts enabled and no flags, which is backwards compatible with the
+ * original MWAIT implementation.
*/
-
static void mwait_idle(void)
{
if (!current_set_polling_and_test()) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8ed2106b06da..deff651835b4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -39,8 +39,7 @@
#include <asm/pgtable.h>
#include <asm/ldt.h>
#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
@@ -242,14 +241,16 @@ __visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
- *next = &next_p->thread;
+ *next = &next_p->thread;
+ struct fpu *prev_fpu = &prev->fpu;
+ struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- fpu_switch_t fpu;
+ fpu_switch_t fpu_switch;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- fpu = switch_fpu_prepare(prev_p, next_p, cpu);
+ fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -296,7 +297,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
* the GDT and LDT are properly updated, and must be
- * done before math_state_restore, so the TS bit is up
+ * done before fpu__restore(), so the TS bit is up
* to date.
*/
arch_end_context_switch(next_p);
@@ -319,7 +320,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
- switch_fpu_finish(next_p, fpu);
+ switch_fpu_finish(next_fpu, fpu_switch);
this_cpu_write(current_task, next_p);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ddfdbf74f174..c50e013b57d2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -38,8 +38,7 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
@@ -274,12 +273,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
+ struct fpu *prev_fpu = &prev->fpu;
+ struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned fsindex, gsindex;
- fpu_switch_t fpu;
+ fpu_switch_t fpu_switch;
- fpu = switch_fpu_prepare(prev_p, next_p, cpu);
+ fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -299,7 +300,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Leave lazy mode, flushing any hypercalls made here. This
* must be done after loading TLS entries in the GDT but before
* loading segments that might reference them, and and it must
- * be done before math_state_restore, so the TS bit is up to
+ * be done before fpu__restore(), so the TS bit is up to
* date.
*/
arch_end_context_switch(next_p);
@@ -391,7 +392,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
- switch_fpu_finish(next_p, fpu);
+ switch_fpu_finish(next_fpu, fpu_switch);
/*
* Switch the PDA and FPU contexts.
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a7bc79480719..9be72bc3613f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -11,7 +11,6 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/ptrace.h>
-#include <linux/regset.h>
#include <linux/tracehook.h>
#include <linux/user.h>
#include <linux/elf.h>
@@ -28,8 +27,9 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/regset.h>
#include <asm/debugreg.h>
#include <asm/ldt.h>
#include <asm/desc.h>
@@ -1297,7 +1297,7 @@ static struct user_regset x86_64_regsets[] __read_mostly = {
.core_note_type = NT_PRFPREG,
.n = sizeof(struct user_i387_struct) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
- .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+ .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
},
[REGSET_XSTATE] = {
.core_note_type = NT_X86_XSTATE,
@@ -1338,13 +1338,13 @@ static struct user_regset x86_32_regsets[] __read_mostly = {
.core_note_type = NT_PRFPREG,
.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
- .active = fpregs_active, .get = fpregs_get, .set = fpregs_set
+ .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
- .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
+ .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
},
[REGSET_XSTATE] = {
.core_note_type = NT_X86_XSTATE,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d74ac33290ae..cba828892790 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -531,12 +531,14 @@ static void __init reserve_crashkernel_low(void)
if (ret != 0) {
/*
* two parts from lib/swiotlb.c:
- * swiotlb size: user specified with swiotlb= or default.
- * swiotlb overflow buffer: now is hardcoded to 32k.
- * We round it to 8M for other buffers that
- * may need to stay low too.
+ * -swiotlb size: user-specified with swiotlb= or default.
+ *
+ * -swiotlb overflow buffer: now hardcoded to 32k. We round it
+ * to 8M for other buffers that may need to stay low too. Also
+ * make sure we allocate enough extra low memory so that we
+ * don't run out of DMA buffers for 32-bit devices.
*/
- low_size = swiotlb_size_or_default() + (8UL<<20);
+ low_size = max(swiotlb_size_or_default() + (8UL<<20), 256UL<<20);
auto_set = true;
} else {
/* passed with crashkernel=0,low ? */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 1ea14fd53933..206996c1669d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -26,8 +26,8 @@
#include <asm/processor.h>
#include <asm/ucontext.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/signal.h>
#include <asm/vdso.h>
#include <asm/mce.h>
#include <asm/sighandling.h>
@@ -103,7 +103,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
get_user_ex(buf, &sc->fpstate);
} get_user_catch(err);
- err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
+ err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
force_iret();
@@ -199,6 +199,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
+ struct fpu *fpu = &current->thread.fpu;
/* redzone */
if (config_enabled(CONFIG_X86_64))
@@ -218,9 +219,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
}
- if (used_math()) {
- sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
- &buf_fx, &math_size);
+ if (fpu->fpstate_active) {
+ sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+ &buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
@@ -234,8 +235,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)-1L;
/* save i387 and extended state */
- if (used_math() &&
- save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
+ if (fpu->fpstate_active &&
+ copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L;
return (void __user *)sp;
@@ -593,6 +594,22 @@ badframe:
return 0;
}
+static inline int is_ia32_compat_frame(void)
+{
+ return config_enabled(CONFIG_IA32_EMULATION) &&
+ test_thread_flag(TIF_IA32);
+}
+
+static inline int is_ia32_frame(void)
+{
+ return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+}
+
+static inline int is_x32_frame(void)
+{
+ return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+}
+
static int
setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
@@ -617,6 +634,7 @@ static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
bool stepping, failed;
+ struct fpu *fpu = &current->thread.fpu;
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
@@ -665,8 +683,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Ensure the signal handler starts with the new fpu state.
*/
- if (used_math())
- fpu_reset_state(current);
+ if (fpu->fpstate_active)
+ fpu__clear(fpu);
}
signal_setup_done(failed, ksig, stepping);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 50e547eac8cd..6d4bfea25874 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -68,8 +68,7 @@
#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -314,10 +313,10 @@ topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
}
-#define link_mask(_m, c1, c2) \
+#define link_mask(mfunc, c1, c2) \
do { \
- cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
- cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
+ cpumask_set_cpu((c1), mfunc(c2)); \
+ cpumask_set_cpu((c2), mfunc(c1)); \
} while (0)
static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@ -398,9 +397,9 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
if (!has_mp) {
- cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
- cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+ cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
c->booted_cores = 1;
return;
}
@@ -409,32 +408,34 @@ void set_cpu_sibling_map(int cpu)
o = &cpu_data(i);
if ((i == cpu) || (has_smt && match_smt(c, o)))
- link_mask(sibling, cpu, i);
+ link_mask(topology_sibling_cpumask, cpu, i);
if ((i == cpu) || (has_mp && match_llc(c, o)))
- link_mask(llc_shared, cpu, i);
+ link_mask(cpu_llc_shared_mask, cpu, i);
}
/*
* This needs a separate iteration over the cpus because we rely on all
- * cpu_sibling_mask links to be set-up.
+ * topology_sibling_cpumask links to be set-up.
*/
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
if ((i == cpu) || (has_mp && match_die(c, o))) {
- link_mask(core, cpu, i);
+ link_mask(topology_core_cpumask, cpu, i);
/*
* Does this new cpu bringup a new core?
*/
- if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
+ if (cpumask_weight(
+ topology_sibling_cpumask(cpu)) == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
*/
- if (cpumask_first(cpu_sibling_mask(i)) == i)
+ if (cpumask_first(
+ topology_sibling_cpumask(i)) == i)
c->booted_cores++;
/*
* increment the core count for all
@@ -1009,8 +1010,8 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
physid_set_mask_of_physid(0, &phys_cpu_present_map);
- cpumask_set_cpu(0, cpu_sibling_mask(0));
- cpumask_set_cpu(0, cpu_core_mask(0));
+ cpumask_set_cpu(0, topology_sibling_cpumask(0));
+ cpumask_set_cpu(0, topology_core_cpumask(0));
}
enum {
@@ -1293,22 +1294,22 @@ static void remove_siblinginfo(int cpu)
int sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- for_each_cpu(sibling, cpu_core_mask(cpu)) {
- cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
+ for_each_cpu(sibling, topology_core_cpumask(cpu)) {
+ cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
/*/
* last thread sibling in this cpu core going down
*/
- if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
+ if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
cpu_data(sibling).booted_cores--;
}
- for_each_cpu(sibling, cpu_sibling_mask(cpu))
- cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+ for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+ cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
cpumask_clear(cpu_llc_shared_mask(cpu));
- cpumask_clear(cpu_sibling_mask(cpu));
- cpumask_clear(cpu_core_mask(cpu));
+ cpumask_clear(topology_sibling_cpumask(cpu));
+ cpumask_clear(topology_core_cpumask(cpu));
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab5247687..36cb15b7b367 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,12 +54,13 @@
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
#include <asm/alternative.h>
+#include <asm/fpu/xstate.h>
+#include <asm/trace/mpx.h>
#include <asm/mpx.h>
#ifdef CONFIG_X86_64
@@ -371,10 +372,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
{
- struct task_struct *tsk = current;
- struct xsave_struct *xsave_buf;
enum ctx_state prev_state;
- struct bndcsr *bndcsr;
+ const struct bndcsr *bndcsr;
siginfo_t *info;
prev_state = exception_enter();
@@ -393,15 +392,15 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
/*
* We need to look at BNDSTATUS to resolve this exception.
- * It is not directly accessible, though, so we need to
- * do an xsave and then pull it out of the xsave buffer.
+ * A NULL here might mean that it is in its 'init state',
+ * which is all zeros which indicates MPX was not
+ * responsible for the exception.
*/
- fpu_save_init(&tsk->thread.fpu);
- xsave_buf = &(tsk->thread.fpu.state->xsave);
- bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
if (!bndcsr)
goto exit_trap;
+ trace_bounds_exception_mpx(bndcsr);
/*
* The error code field of the BNDSTATUS register communicates status
* information of a bound range exception #BR or operation involving
@@ -409,11 +408,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
*/
switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
case 2: /* Bound directory has invalid entry. */
- if (mpx_handle_bd_fault(xsave_buf))
+ if (mpx_handle_bd_fault())
goto exit_trap;
break; /* Success, it was handled */
case 1: /* Bound violation. */
- info = mpx_generate_siginfo(regs, xsave_buf);
+ info = mpx_generate_siginfo(regs);
if (IS_ERR(info)) {
/*
* We failed to decode the MPX instruction. Act as if
@@ -709,8 +708,8 @@ NOKPROBE_SYMBOL(do_debug);
static void math_error(struct pt_regs *regs, int error_code, int trapnr)
{
struct task_struct *task = current;
+ struct fpu *fpu = &task->thread.fpu;
siginfo_t info;
- unsigned short err;
char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
"simd exception";
@@ -718,8 +717,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return;
conditional_sti(regs);
- if (!user_mode(regs))
- {
+ if (!user_mode(regs)) {
if (!fixup_exception(regs)) {
task->thread.error_code = error_code;
task->thread.trap_nr = trapnr;
@@ -731,62 +729,20 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
/*
* Save the info for the exception handler and clear the error.
*/
- unlazy_fpu(task);
- task->thread.trap_nr = trapnr;
+ fpu__save(fpu);
+
+ task->thread.trap_nr = trapnr;
task->thread.error_code = error_code;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
- if (trapnr == X86_TRAP_MF) {
- unsigned short cwd, swd;
- /*
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
- * status. 0x3f is the exception bits in these regs, 0x200 is the
- * C1 reg you need in case of a stack fault, 0x040 is the stack
- * fault bit. We should only be taking one exception at a time,
- * so if this combination doesn't produce any single exception,
- * then we have a bad program that isn't synchronizing its FPU usage
- * and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
- */
- cwd = get_fpu_cwd(task);
- swd = get_fpu_swd(task);
+ info.si_signo = SIGFPE;
+ info.si_errno = 0;
+ info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
- err = swd & ~cwd;
- } else {
- /*
- * The SIMD FPU exceptions are handled a little differently, as there
- * is only a single status/control register. Thus, to determine which
- * unmasked exception was caught we must mask the exception mask bits
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
- */
- unsigned short mxcsr = get_fpu_mxcsr(task);
- err = ~(mxcsr >> 7) & mxcsr;
- }
+ info.si_code = fpu__exception_code(fpu, trapnr);
- if (err & 0x001) { /* Invalid op */
- /*
- * swd & 0x240 == 0x040: Stack Underflow
- * swd & 0x240 == 0x240: Stack Overflow
- * User must clear the SF bit (0x40) if set
- */
- info.si_code = FPE_FLTINV;
- } else if (err & 0x004) { /* Divide by Zero */
- info.si_code = FPE_FLTDIV;
- } else if (err & 0x008) { /* Overflow */
- info.si_code = FPE_FLTOVF;
- } else if (err & 0x012) { /* Denormal, Underflow */
- info.si_code = FPE_FLTUND;
- } else if (err & 0x020) { /* Precision */
- info.si_code = FPE_FLTRES;
- } else {
- /*
- * If we're using IRQ 13, or supposedly even some trap
- * X86_TRAP_MF implementations, it's possible
- * we get a spurious trap, which is not an error.
- */
+ /* Retry when we get spurious exceptions: */
+ if (!info.si_code)
return;
- }
+
force_sig_info(SIGFPE, &info, task);
}
@@ -827,48 +783,6 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
-/*
- * 'math_state_restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
- *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
- *
- * Must be called with kernel preemption disabled (eg with local
- * local interrupts as in the case of do_device_not_available).
- */
-void math_state_restore(void)
-{
- struct task_struct *tsk = current;
-
- if (!tsk_used_math(tsk)) {
- local_irq_enable();
- /*
- * does a slab alloc which can sleep
- */
- if (init_fpu(tsk)) {
- /*
- * ran out of memory!
- */
- do_group_exit(SIGKILL);
- return;
- }
- local_irq_disable();
- }
-
- /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
- kernel_fpu_disable();
- __thread_fpu_begin(tsk);
- if (unlikely(restore_fpu_checking(tsk))) {
- fpu_reset_state(tsk);
- force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
- } else {
- tsk->thread.fpu_counter++;
- }
- kernel_fpu_enable();
-}
-EXPORT_SYMBOL_GPL(math_state_restore);
-
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
@@ -889,7 +803,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
return;
}
#endif
- math_state_restore(); /* interrupts still off */
+ fpu__restore(&current->thread.fpu); /* interrupts still off */
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 26488487bc61..dd8d0791dfb5 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ static void check_tsc_warp(unsigned int timeout)
*/
static inline unsigned int loop_timeout(int cpu)
{
- return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+ return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
}
/*
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0b81ad67da07..66476244731e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -29,6 +29,7 @@
#include <linux/kdebug.h>
#include <asm/processor.h>
#include <asm/insn.h>
+#include <asm/mmu_context.h>
/* Post-execution fixups. */
@@ -312,11 +313,6 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
}
#ifdef CONFIG_X86_64
-static inline bool is_64bit_mm(struct mm_struct *mm)
-{
- return !config_enabled(CONFIG_IA32_EMULATION) ||
- !(mm->context.ia32_compat == TIF_IA32);
-}
/*
* If arch_uprobe->insn doesn't use rip-relative addressing, return
* immediately. Otherwise, rewrite the instruction so that it accesses
@@ -497,10 +493,6 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
}
#else /* 32-bit: */
-static inline bool is_64bit_mm(struct mm_struct *mm)
-{
- return false;
-}
/*
* No RIP-relative addressing on 32-bit
*/
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 37d8fa4438f0..a0695be19864 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -75,7 +75,5 @@ EXPORT_SYMBOL(native_load_gs_index);
#ifdef CONFIG_PREEMPT
EXPORT_SYMBOL(___preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
-EXPORT_SYMBOL(___preempt_schedule_context);
-#endif
+EXPORT_SYMBOL(___preempt_schedule_notrace);
#endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
deleted file mode 100644
index 87a815b85f3e..000000000000
--- a/arch/x86/kernel/xsave.c
+++ /dev/null
@@ -1,724 +0,0 @@
-/*
- * xsave/xrstor support.
- *
- * Author: Suresh Siddha <suresh.b.siddha@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bootmem.h>
-#include <linux/compat.h>
-#include <linux/cpu.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
-#include <asm/sigframe.h>
-#include <asm/tlbflush.h>
-#include <asm/xcr.h>
-
-/*
- * Supported feature mask by the CPU and the kernel.
- */
-u64 pcntxt_mask;
-
-/*
- * Represents init state for the supported extended state.
- */
-struct xsave_struct *init_xstate_buf;
-
-static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
-static unsigned int *xstate_offsets, *xstate_sizes;
-static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
-static unsigned int xstate_features;
-
-/*
- * If a processor implementation discern that a processor state component is
- * in its initialized state it may modify the corresponding bit in the
- * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory
- * layout in the case of xsaveopt. While presenting the xstate information to
- * the user, we always ensure that the memory layout of a feature will be in
- * the init state if the corresponding header bit is zero. This is to ensure
- * that the user doesn't see some stale state in the memory layout during
- * signal handling, debugging etc.
- */
-void __sanitize_i387_state(struct task_struct *tsk)
-{
- struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
- int feature_bit = 0x2;
- u64 xstate_bv;
-
- if (!fx)
- return;
-
- xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
-
- /*
- * None of the feature bits are in init state. So nothing else
- * to do for us, as the memory layout is up to date.
- */
- if ((xstate_bv & pcntxt_mask) == pcntxt_mask)
- return;
-
- /*
- * FP is in init state
- */
- if (!(xstate_bv & XSTATE_FP)) {
- fx->cwd = 0x37f;
- fx->swd = 0;
- fx->twd = 0;
- fx->fop = 0;
- fx->rip = 0;
- fx->rdp = 0;
- memset(&fx->st_space[0], 0, 128);
- }
-
- /*
- * SSE is in init state
- */
- if (!(xstate_bv & XSTATE_SSE))
- memset(&fx->xmm_space[0], 0, 256);
-
- xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2;
-
- /*
- * Update all the other memory layouts for which the corresponding
- * header bit is in the init state.
- */
- while (xstate_bv) {
- if (xstate_bv & 0x1) {
- int offset = xstate_offsets[feature_bit];
- int size = xstate_sizes[feature_bit];
-
- memcpy(((void *) fx) + offset,
- ((void *) init_xstate_buf) + offset,
- size);
- }
-
- xstate_bv >>= 1;
- feature_bit++;
- }
-}
-
-/*
- * Check for the presence of extended state information in the
- * user fpstate pointer in the sigcontext.
- */
-static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
- void __user *fpstate,
- struct _fpx_sw_bytes *fx_sw)
-{
- int min_xstate_size = sizeof(struct i387_fxsave_struct) +
- sizeof(struct xsave_hdr_struct);
- unsigned int magic2;
-
- if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
- return -1;
-
- /* Check for the first magic field and other error scenarios. */
- if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
- fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > xstate_size ||
- fx_sw->xstate_size > fx_sw->extended_size)
- return -1;
-
- /*
- * Check for the presence of second magic word at the end of memory
- * layout. This detects the case where the user just copied the legacy
- * fpstate layout with out copying the extended state information
- * in the memory layout.
- */
- if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
- || magic2 != FP_XSTATE_MAGIC2)
- return -1;
-
- return 0;
-}
-
-/*
- * Signal frame handlers.
- */
-static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
-{
- if (use_fxsr()) {
- struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
- struct user_i387_ia32_struct env;
- struct _fpstate_ia32 __user *fp = buf;
-
- convert_from_fxsr(&env, tsk);
-
- if (__copy_to_user(buf, &env, sizeof(env)) ||
- __put_user(xsave->i387.swd, &fp->status) ||
- __put_user(X86_FXSR_MAGIC, &fp->magic))
- return -1;
- } else {
- struct i387_fsave_struct __user *fp = buf;
- u32 swd;
- if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
- return -1;
- }
-
- return 0;
-}
-
-static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
-{
- struct xsave_struct __user *x = buf;
- struct _fpx_sw_bytes *sw_bytes;
- u32 xstate_bv;
- int err;
-
- /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
- sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
- err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
-
- if (!use_xsave())
- return err;
-
- err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
-
- /*
- * Read the xstate_bv which we copied (directly from the cpu or
- * from the state in task struct) to the user buffers.
- */
- err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
-
- /*
- * For legacy compatible, we always set FP/SSE bits in the bit
- * vector while saving the state to the user context. This will
- * enable us capturing any changes(during sigreturn) to
- * the FP/SSE bits by the legacy applications which don't touch
- * xstate_bv in the xsave header.
- *
- * xsave aware apps can change the xstate_bv in the xsave
- * header as well as change any contents in the memory layout.
- * xrestore as part of sigreturn will capture all the changes.
- */
- xstate_bv |= XSTATE_FPSSE;
-
- err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
-
- return err;
-}
-
-static inline int save_user_xstate(struct xsave_struct __user *buf)
-{
- int err;
-
- if (use_xsave())
- err = xsave_user(buf);
- else if (use_fxsr())
- err = fxsave_user((struct i387_fxsave_struct __user *) buf);
- else
- err = fsave_user((struct i387_fsave_struct __user *) buf);
-
- if (unlikely(err) && __clear_user(buf, xstate_size))
- err = -EFAULT;
- return err;
-}
-
-/*
- * Save the fpu, extended register state to the user signal frame.
- *
- * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
- * state is copied.
- * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
- *
- * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
- * buf != buf_fx for 32-bit frames with fxstate.
- *
- * If the fpu, extended register state is live, save the state directly
- * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
- * copy the thread's fpu state to the user frame starting at 'buf_fx'.
- *
- * If this is a 32-bit frame with fxstate, put a fsave header before
- * the aligned state at 'buf_fx'.
- *
- * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
- * indicating the absence/presence of the extended state to the user.
- */
-int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
-{
- struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
- struct task_struct *tsk = current;
- int ia32_fxstate = (buf != buf_fx);
-
- ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
- config_enabled(CONFIG_IA32_EMULATION));
-
- if (!access_ok(VERIFY_WRITE, buf, size))
- return -EACCES;
-
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_get(current, NULL, 0,
- sizeof(struct user_i387_ia32_struct), NULL,
- (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
-
- if (user_has_fpu()) {
- /* Save the live register state to the user directly. */
- if (save_user_xstate(buf_fx))
- return -1;
- /* Update the thread's fxstate to save the fsave header. */
- if (ia32_fxstate)
- fpu_fxsave(&tsk->thread.fpu);
- } else {
- sanitize_i387_state(tsk);
- if (__copy_to_user(buf_fx, xsave, xstate_size))
- return -1;
- }
-
- /* Save the fsave header for the 32-bit frames. */
- if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
- return -1;
-
- if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
- return -1;
-
- return 0;
-}
-
-static inline void
-sanitize_restored_xstate(struct task_struct *tsk,
- struct user_i387_ia32_struct *ia32_env,
- u64 xstate_bv, int fx_only)
-{
- struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
- struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
-
- if (use_xsave()) {
- /* These bits must be zero. */
- memset(xsave_hdr->reserved, 0, 48);
-
- /*
- * Init the state that is not present in the memory
- * layout and not enabled by the OS.
- */
- if (fx_only)
- xsave_hdr->xstate_bv = XSTATE_FPSSE;
- else
- xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
- }
-
- if (use_fxsr()) {
- /*
- * mscsr reserved bits must be masked to zero for security
- * reasons.
- */
- xsave->i387.mxcsr &= mxcsr_feature_mask;
-
- convert_to_fxsr(tsk, ia32_env);
- }
-}
-
-/*
- * Restore the extended state if present. Otherwise, restore the FP/SSE state.
- */
-static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
-{
- if (use_xsave()) {
- if ((unsigned long)buf % 64 || fx_only) {
- u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
- xrstor_state(init_xstate_buf, init_bv);
- return fxrstor_user(buf);
- } else {
- u64 init_bv = pcntxt_mask & ~xbv;
- if (unlikely(init_bv))
- xrstor_state(init_xstate_buf, init_bv);
- return xrestore_user(buf, xbv);
- }
- } else if (use_fxsr()) {
- return fxrstor_user(buf);
- } else
- return frstor_user(buf);
-}
-
-int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
-{
- int ia32_fxstate = (buf != buf_fx);
- struct task_struct *tsk = current;
- int state_size = xstate_size;
- u64 xstate_bv = 0;
- int fx_only = 0;
-
- ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
- config_enabled(CONFIG_IA32_EMULATION));
-
- if (!buf) {
- fpu_reset_state(tsk);
- return 0;
- }
-
- if (!access_ok(VERIFY_READ, buf, size))
- return -EACCES;
-
- if (!used_math() && init_fpu(tsk))
- return -1;
-
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_set(current, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) != 0;
-
- if (use_xsave()) {
- struct _fpx_sw_bytes fx_sw_user;
- if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
- /*
- * Couldn't find the extended state information in the
- * memory layout. Restore just the FP/SSE and init all
- * the other extended state.
- */
- state_size = sizeof(struct i387_fxsave_struct);
- fx_only = 1;
- } else {
- state_size = fx_sw_user.xstate_size;
- xstate_bv = fx_sw_user.xstate_bv;
- }
- }
-
- if (ia32_fxstate) {
- /*
- * For 32-bit frames with fxstate, copy the user state to the
- * thread's fpu state, reconstruct fxstate from the fsave
- * header. Sanitize the copied state etc.
- */
- struct fpu *fpu = &tsk->thread.fpu;
- struct user_i387_ia32_struct env;
- int err = 0;
-
- /*
- * Drop the current fpu which clears used_math(). This ensures
- * that any context-switch during the copy of the new state,
- * avoids the intermediate state from getting restored/saved.
- * Thus avoiding the new restored state from getting corrupted.
- * We will be ready to restore/save the state only after
- * set_used_math() is again set.
- */
- drop_fpu(tsk);
-
- if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
- __copy_from_user(&env, buf, sizeof(env))) {
- fpu_finit(fpu);
- err = -1;
- } else {
- sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
- }
-
- set_used_math();
- if (use_eager_fpu()) {
- preempt_disable();
- math_state_restore();
- preempt_enable();
- }
-
- return err;
- } else {
- /*
- * For 64-bit frames and 32-bit fsave frames, restore the user
- * state to the registers directly (with exceptions handled).
- */
- user_fpu_begin();
- if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
- fpu_reset_state(tsk);
- return -1;
- }
- }
-
- return 0;
-}
-
-/*
- * Prepare the SW reserved portion of the fxsave memory layout, indicating
- * the presence of the extended state information in the memory layout
- * pointed by the fpstate pointer in the sigcontext.
- * This will be saved when ever the FP and extended state context is
- * saved on the user stack during the signal handler delivery to the user.
- */
-static void prepare_fx_sw_frame(void)
-{
- int fsave_header_size = sizeof(struct i387_fsave_struct);
- int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
-
- if (config_enabled(CONFIG_X86_32))
- size += fsave_header_size;
-
- fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
- fx_sw_reserved.extended_size = size;
- fx_sw_reserved.xstate_bv = pcntxt_mask;
- fx_sw_reserved.xstate_size = xstate_size;
-
- if (config_enabled(CONFIG_IA32_EMULATION)) {
- fx_sw_reserved_ia32 = fx_sw_reserved;
- fx_sw_reserved_ia32.extended_size += fsave_header_size;
- }
-}
-
-/*
- * Enable the extended processor state save/restore feature
- */
-static inline void xstate_enable(void)
-{
- cr4_set_bits(X86_CR4_OSXSAVE);
- xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-}
-
-/*
- * Record the offsets and sizes of different state managed by the xsave
- * memory layout.
- */
-static void __init setup_xstate_features(void)
-{
- int eax, ebx, ecx, edx, leaf = 0x2;
-
- xstate_features = fls64(pcntxt_mask);
- xstate_offsets = alloc_bootmem(xstate_features * sizeof(int));
- xstate_sizes = alloc_bootmem(xstate_features * sizeof(int));
-
- do {
- cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
-
- if (eax == 0)
- break;
-
- xstate_offsets[leaf] = ebx;
- xstate_sizes[leaf] = eax;
-
- leaf++;
- } while (1);
-}
-
-/*
- * This function sets up offsets and sizes of all extended states in
- * xsave area. This supports both standard format and compacted format
- * of the xsave aread.
- *
- * Input: void
- * Output: void
- */
-void setup_xstate_comp(void)
-{
- unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
- int i;
-
- /*
- * The FP xstates and SSE xstates are legacy states. They are always
- * in the fixed offsets in the xsave area in either compacted form
- * or standard form.
- */
- xstate_comp_offsets[0] = 0;
- xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
-
- if (!cpu_has_xsaves) {
- for (i = 2; i < xstate_features; i++) {
- if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
- xstate_comp_offsets[i] = xstate_offsets[i];
- xstate_comp_sizes[i] = xstate_sizes[i];
- }
- }
- return;
- }
-
- xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
-
- for (i = 2; i < xstate_features; i++) {
- if (test_bit(i, (unsigned long *)&pcntxt_mask))
- xstate_comp_sizes[i] = xstate_sizes[i];
- else
- xstate_comp_sizes[i] = 0;
-
- if (i > 2)
- xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
- + xstate_comp_sizes[i-1];
-
- }
-}
-
-/*
- * setup the xstate image representing the init state
- */
-static void __init setup_init_fpu_buf(void)
-{
- /*
- * Setup init_xstate_buf to represent the init state of
- * all the features managed by the xsave
- */
- init_xstate_buf = alloc_bootmem_align(xstate_size,
- __alignof__(struct xsave_struct));
- fx_finit(&init_xstate_buf->i387);
-
- if (!cpu_has_xsave)
- return;
-
- setup_xstate_features();
-
- if (cpu_has_xsaves) {
- init_xstate_buf->xsave_hdr.xcomp_bv =
- (u64)1 << 63 | pcntxt_mask;
- init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
- }
-
- /*
- * Init all the features state with header_bv being 0x0
- */
- xrstor_state_booting(init_xstate_buf, -1);
- /*
- * Dump the init state again. This is to identify the init state
- * of any feature which is not represented by all zero's.
- */
- xsave_state_booting(init_xstate_buf, -1);
-}
-
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
-static int __init eager_fpu_setup(char *s)
-{
- if (!strcmp(s, "on"))
- eagerfpu = ENABLE;
- else if (!strcmp(s, "off"))
- eagerfpu = DISABLE;
- else if (!strcmp(s, "auto"))
- eagerfpu = AUTO;
- return 1;
-}
-__setup("eagerfpu=", eager_fpu_setup);
-
-
-/*
- * Calculate total size of enabled xstates in XCR0/pcntxt_mask.
- */
-static void __init init_xstate_size(void)
-{
- unsigned int eax, ebx, ecx, edx;
- int i;
-
- if (!cpu_has_xsaves) {
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xstate_size = ebx;
- return;
- }
-
- xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
- for (i = 2; i < 64; i++) {
- if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
- cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
- xstate_size += eax;
- }
- }
-}
-
-/*
- * Enable and initialize the xsave feature.
- */
-static void __init xstate_enable_boot_cpu(void)
-{
- unsigned int eax, ebx, ecx, edx;
-
- if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
- WARN(1, KERN_ERR "XSTATE_CPUID missing\n");
- return;
- }
-
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- pcntxt_mask = eax + ((u64)edx << 32);
-
- if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
- pr_err("FP/SSE not shown under xsave features 0x%llx\n",
- pcntxt_mask);
- BUG();
- }
-
- /*
- * Support only the state known to OS.
- */
- pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
-
- xstate_enable();
-
- /*
- * Recompute the context size for enabled features
- */
- init_xstate_size();
-
- update_regset_xstate_info(xstate_size, pcntxt_mask);
- prepare_fx_sw_frame();
- setup_init_fpu_buf();
-
- /* Auto enable eagerfpu for xsaveopt */
- if (cpu_has_xsaveopt && eagerfpu != DISABLE)
- eagerfpu = ENABLE;
-
- if (pcntxt_mask & XSTATE_EAGER) {
- if (eagerfpu == DISABLE) {
- pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n",
- pcntxt_mask & XSTATE_EAGER);
- pcntxt_mask &= ~XSTATE_EAGER;
- } else {
- eagerfpu = ENABLE;
- }
- }
-
- pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
- pcntxt_mask, xstate_size,
- cpu_has_xsaves ? "compacted form" : "standard form");
-}
-
-/*
- * For the very first instance, this calls xstate_enable_boot_cpu();
- * for all subsequent instances, this calls xstate_enable().
- *
- * This is somewhat obfuscated due to the lack of powerful enough
- * overrides for the section checks.
- */
-void xsave_init(void)
-{
- static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
- void (*this_func)(void);
-
- if (!cpu_has_xsave)
- return;
-
- this_func = next_func;
- next_func = xstate_enable;
- this_func();
-}
-
-/*
- * setup_init_fpu_buf() is __init and it is OK to call it here because
- * init_xstate_buf will be unset only once during boot.
- */
-void __init_refok eager_fpu_init(void)
-{
- WARN_ON(used_math());
- current_thread_info()->status = 0;
-
- if (eagerfpu == ENABLE)
- setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
- if (!cpu_has_eager_fpu) {
- stts();
- return;
- }
-
- if (!init_xstate_buf)
- setup_init_fpu_buf();
-}
-
-/*
- * Given the xsave area and a state inside, this function returns the
- * address of the state.
- *
- * This is the API that is called to get xstate address in either
- * standard format or compacted format of xsave area.
- *
- * Inputs:
- * xsave: base address of the xsave area;
- * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
- * etc.)
- * Output:
- * address of the state in the xsave area.
- */
-void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
-{
- int feature = fls64(xstate) - 1;
- if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
- return NULL;
-
- return (void *)xsave + xstate_comp_offsets[feature];
-}
-EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59b69f6a2844..9f705e618af5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -17,7 +17,7 @@
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <asm/user.h>
-#include <asm/xsave.h>
+#include <asm/fpu/xstate.h>
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
@@ -95,6 +95,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+ vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+
/*
* The existing code assumes virtual address is 48-bit in the canonical
* address checks; exit if it is ever changed.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c3b1ad9fca81..496b3695d3d3 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 629af0f1c5c4..4c7deb4f78a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1090,6 +1090,17 @@ static void update_divide_count(struct kvm_lapic *apic)
apic->divide_count);
}
+static void apic_update_lvtt(struct kvm_lapic *apic)
+{
+ u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
+ apic->lapic_timer.timer_mode_mask;
+
+ if (apic->lapic_timer.timer_mode != timer_mode) {
+ apic->lapic_timer.timer_mode = timer_mode;
+ hrtimer_cancel(&apic->lapic_timer.timer);
+ }
+}
+
static void apic_timer_expired(struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1298,6 +1309,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
apic_set_reg(apic, APIC_LVTT + 0x10 * i,
lvt_val | APIC_LVT_MASKED);
}
+ apic_update_lvtt(apic);
atomic_set(&apic->lapic_timer.pending, 0);
}
@@ -1330,20 +1342,13 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
- case APIC_LVTT: {
- u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
-
- if (apic->lapic_timer.timer_mode != timer_mode) {
- apic->lapic_timer.timer_mode = timer_mode;
- hrtimer_cancel(&apic->lapic_timer.timer);
- }
-
+ case APIC_LVTT:
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
apic_set_reg(apic, APIC_LVTT, val);
+ apic_update_lvtt(apic);
break;
- }
case APIC_TMICT:
if (apic_lvtt_tscdeadline(apic))
@@ -1576,7 +1581,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
for (i = 0; i < APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
- apic->lapic_timer.timer_mode = 0;
+ apic_update_lvtt(apic);
apic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
@@ -1802,6 +1807,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
apic_update_ppr(apic);
hrtimer_cancel(&apic->lapic_timer.timer);
+ apic_update_lvtt(apic);
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d43867c33bc4..b73337634214 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
}
}
-void update_permission_bitmask(struct kvm_vcpu *vcpu,
- struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
struct kvm_mmu *context = &vcpu->arch.mmu;
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
context->base_role.cr0_wp = is_write_protection(vcpu);
context->base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu);
+ context->base_role.smap_andnot_wp
+ = smap && !is_write_protection(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
- union kvm_mmu_page_role mask = { .word = 0 };
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
bool remote_flush, local_flush, zap_page;
+ union kvm_mmu_page_role mask = { };
+
+ mask.cr0_wp = 1;
+ mask.cr4_pae = 1;
+ mask.nxe = 1;
+ mask.smep_andnot_wp = 1;
+ mask.smap_andnot_wp = 1;
/*
* If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
- mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index c7d65637c851..0ada65ecddcf 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,8 +71,6 @@ enum {
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+ WARN_ON(pfec & PFERR_RSVD_MASK);
+
return (mmu->permissions[index] >> pte_access) & 1;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fd49c867b25a..6e6d115fe9b5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID))
return r;
+
+ /*
+ * page fault with PFEC.RSVD = 1 is caused by shadow
+ * page fault, should not be used to walk guest page
+ * table.
+ */
+ error_code &= ~PFERR_RSVD_MASK;
};
r = mmu_topup_memory_caches(vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8650f6..9afa233b5482 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
+ .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b61687bd79..e11dd59398f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -40,8 +40,7 @@
#include <asm/vmx.h>
#include <asm/virtext.h>
#include <asm/mce.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
+#include <asm/fpu/internal.h>
#include <asm/perf_event.h>
#include <asm/debugreg.h>
#include <asm/kexec.h>
@@ -1883,7 +1882,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
* If the FPU is not active (through the host task or
* the guest vcpu), then restore the cr0.TS bit.
*/
- if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
+ if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
stts();
load_gdt(this_cpu_ptr(&host_gdt));
}
@@ -10185,6 +10184,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
+ .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd03e29..26eaeb522cab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -59,9 +59,8 @@
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h> /* Ugh! */
-#include <asm/xcr.h>
+#include <linux/kernel_stat.h>
+#include <asm/fpu/internal.h> /* Ugh! */
#include <asm/pvclock.h>
#include <asm/div64.h>
@@ -702,8 +701,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
- X86_CR4_PAE | X86_CR4_SMEP;
+ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+ X86_CR4_SMEP | X86_CR4_SMAP;
+
if (cr4 & CR4_RESERVED_BITS)
return 1;
@@ -744,9 +744,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
- if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
- update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -3196,8 +3193,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
{
- struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
- u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+ struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+ u64 xstate_bv = xsave->header.xfeatures;
u64 valid;
/*
@@ -3232,7 +3229,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
{
- struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+ struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
u64 valid;
@@ -3243,9 +3240,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
memcpy(xsave, src, XSAVE_HDR_OFFSET);
/* Set XSTATE_BV and possibly XCOMP_BV. */
- xsave->xsave_hdr.xstate_bv = xstate_bv;
+ xsave->header.xfeatures = xstate_bv;
if (cpu_has_xsaves)
- xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+ xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
* Copy each region from the non-compacted offset to the
@@ -3277,8 +3274,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
fill_xsave((u8 *) guest_xsave->region, vcpu);
} else {
memcpy(guest_xsave->region,
- &vcpu->arch.guest_fpu.state->fxsave,
- sizeof(struct i387_fxsave_struct));
+ &vcpu->arch.guest_fpu.state.fxsave,
+ sizeof(struct fxregs_state));
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
XSTATE_FPSSE;
}
@@ -3302,8 +3299,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
} else {
if (xstate_bv & ~XSTATE_FPSSE)
return -EINVAL;
- memcpy(&vcpu->arch.guest_fpu.state->fxsave,
- guest_xsave->region, sizeof(struct i387_fxsave_struct));
+ memcpy(&vcpu->arch.guest_fpu.state.fxsave,
+ guest_xsave->region, sizeof(struct fxregs_state));
}
return 0;
}
@@ -6197,6 +6194,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_page(page))
+ return;
kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
/*
@@ -6597,11 +6596,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
+ struct fpu *fpu = &current->thread.fpu;
int r;
sigset_t sigsaved;
- if (!tsk_used_math(current) && init_fpu(current))
- return -ENOMEM;
+ fpu__activate_curr(fpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@ -6971,8 +6970,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct i387_fxsave_struct *fxsave =
- &vcpu->arch.guest_fpu.state->fxsave;
+ struct fxregs_state *fxsave =
+ &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
@@ -6988,8 +6987,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct i387_fxsave_struct *fxsave =
- &vcpu->arch.guest_fpu.state->fxsave;
+ struct fxregs_state *fxsave =
+ &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -7003,17 +7002,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
-int fx_init(struct kvm_vcpu *vcpu)
+static void fx_init(struct kvm_vcpu *vcpu)
{
- int err;
-
- err = fpu_alloc(&vcpu->arch.guest_fpu);
- if (err)
- return err;
-
- fpu_finit(&vcpu->arch.guest_fpu);
+ fpstate_init(&vcpu->arch.guest_fpu.state);
if (cpu_has_xsaves)
- vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+ vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
@@ -7022,14 +7015,6 @@ int fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.xcr0 = XSTATE_FP;
vcpu->arch.cr0 |= X86_CR0_ET;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(fx_init);
-
-static void fx_free(struct kvm_vcpu *vcpu)
-{
- fpu_free(&vcpu->arch.guest_fpu);
}
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
@@ -7045,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
__kernel_fpu_begin();
- fpu_restore_checking(&vcpu->arch.guest_fpu);
+ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
trace_kvm_fpu(1);
}
@@ -7057,10 +7042,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
return;
vcpu->guest_fpu_loaded = 0;
- fpu_save_init(&vcpu->arch.guest_fpu);
+ copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+ if (!vcpu->arch.eager_fpu)
+ kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+
trace_kvm_fpu(0);
}
@@ -7069,18 +7056,27 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvmclock_reset(vcpu);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
- fx_free(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
+ struct kvm_vcpu *vcpu;
+
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
- return kvm_x86_ops->vcpu_create(kvm, id);
+
+ vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+ /*
+ * Activate fpu unconditionally in case the guest needs eager FPU. It will be
+ * deactivated soon if it doesn't.
+ */
+ kvm_x86_ops->fpu_activate(vcpu);
+ return vcpu;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -7125,7 +7121,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
- fx_free(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
@@ -7351,9 +7346,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
goto fail_free_mce_banks;
}
- r = fx_init(vcpu);
- if (r)
- goto fail_free_wbinvd_dirty_mask;
+ fx_init(vcpu);
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
vcpu->arch.pv_time_enabled = false;
@@ -7367,8 +7360,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_pmu_init(vcpu);
return 0;
-fail_free_wbinvd_dirty_mask:
- free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
+
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
fail_free_lapic:
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8f9a133cc099..27f8eea0d6eb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -70,7 +70,7 @@
#include <asm/e820.h>
#include <asm/mce.h>
#include <asm/io.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/stackprotector.h>
#include <asm/reboot.h> /* for struct machine_ops */
#include <asm/kvm_para.h>
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index c9f2d9ba8dd8..e5e3ed8dc079 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -22,7 +22,7 @@
#include <linux/sched.h>
#include <linux/types.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/asm.h>
void *_mmx_memcpy(void *to, const void *from, size_t len)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 5eb715087b80..e407941d0488 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -38,8 +38,6 @@
#ifdef CONFIG_PREEMPT
THUNK ___preempt_schedule, preempt_schedule
-#ifdef CONFIG_CONTEXT_TRACKING
- THUNK ___preempt_schedule_context, preempt_schedule_context
-#endif
+ THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index f89ba4e93025..2198902329b5 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -49,9 +49,7 @@
#ifdef CONFIG_PREEMPT
THUNK ___preempt_schedule, preempt_schedule
-#ifdef CONFIG_CONTEXT_TRACKING
- THUNK ___preempt_schedule_context, preempt_schedule_context
-#endif
+ THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
#endif
#if defined(CONFIG_TRACE_IRQFLAGS) \
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e2f5e21c03b3..91d93b95bd86 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from kernel space to user space.
*
@@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user);
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
*
* Copy data from user space to kernel space.
*
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index dc8adad10a2f..dd76a05729b0 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -30,7 +30,7 @@ static void fclex(void)
}
/* Needs to be externally visible */
-void finit_soft_fpu(struct i387_soft_struct *soft)
+void fpstate_init_soft(struct swregs_state *soft)
{
struct address *oaddr, *iaddr;
memset(soft, 0, sizeof(*soft));
@@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft)
void finit(void)
{
- finit_soft_fpu(&current->thread.fpu.state->soft);
+ fpstate_init_soft(&current->thread.fpu.state.soft);
}
/*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 9b868124128d..f37e84ab49f3 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -31,7 +31,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/user.h>
-#include <asm/i387.h>
+#include <asm/fpu/internal.h>
#include "fpu_system.h"
#include "fpu_emu.h"
@@ -147,13 +147,9 @@ void math_emulate(struct math_emu_info *info)
unsigned long code_base = 0;
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
struct desc_struct code_descriptor;
+ struct fpu *fpu = &current->thread.fpu;
- if (!used_math()) {
- if (init_fpu(current)) {
- do_group_exit(SIGKILL);
- return;
- }
- }
+ fpu__activate_curr(fpu);
#ifdef RE_ENTRANT_CHECKING
if (emulating) {
@@ -673,7 +669,7 @@ void math_abort(struct math_emu_info *info, unsigned int signal)
#endif /* PARANOID */
}
-#define S387 ((struct i387_soft_struct *)s387)
+#define S387 ((struct swregs_state *)s387)
#define sstatus_word() \
((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
@@ -682,14 +678,14 @@ int fpregs_soft_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct i387_soft_struct *s387 = &target->thread.fpu.state->soft;
+ struct swregs_state *s387 = &target->thread.fpu.state.soft;
void *space = s387->st_space;
int ret;
int offset, other, i, tags, regnr, tag, newtop;
RE_ENTRANT_CHECK_OFF;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0,
- offsetof(struct i387_soft_struct, st_space));
+ offsetof(struct swregs_state, st_space));
RE_ENTRANT_CHECK_ON;
if (ret)
@@ -734,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- struct i387_soft_struct *s387 = &target->thread.fpu.state->soft;
+ struct swregs_state *s387 = &target->thread.fpu.state.soft;
const void *space = s387->st_space;
int ret;
int offset = (S387->ftop & 7) * 10, other = 80 - offset;
@@ -752,7 +748,7 @@ int fpregs_soft_get(struct task_struct *target,
#endif /* PECULIAR_486 */
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0,
- offsetof(struct i387_soft_struct, st_space));
+ offsetof(struct swregs_state, st_space));
/* Copy all registers in stack order. */
if (!ret)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 2c614410a5f3..9ccecb61a4fa 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -31,7 +31,7 @@
#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
== (1 << 10))
-#define I387 (current->thread.fpu.state)
+#define I387 (&current->thread.fpu.state)
#define FPU_info (I387->soft.info)
#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 181c53bac3a7..9dc909841739 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -13,6 +13,7 @@
#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <linux/prefetch.h> /* prefetchw */
#include <linux/context_tracking.h> /* exception_enter(), ... */
+#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -1126,9 +1127,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
/*
* If we're in an interrupt, have no user context or are running
- * in an atomic region then we must not take the fault:
+ * in a region with pagefaults disabled then we must not take the fault
*/
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(faulthandler_disabled() || !mm)) {
bad_area_nosemaphore(regs, error_code, address);
return;
}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 4500142bc4aa..eecb207a2037 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
unsigned long vaddr;
int idx, type;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
@@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr)
#endif
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9ca35fc60cfe..2b7ece0e103a 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
unsigned long vaddr;
int idx, type;
+ preempt_disable();
pagefault_disable();
type = kmap_atomic_idx_push();
@@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr)
}
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5ead4d6cf3a7..27ff21216dfa 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -351,18 +351,20 @@ int arch_ioremap_pmd_supported(void)
*/
void *xlate_dev_mem_ptr(phys_addr_t phys)
{
- void *addr;
- unsigned long start = phys & PAGE_MASK;
+ unsigned long start = phys & PAGE_MASK;
+ unsigned long offset = phys & ~PAGE_MASK;
+ void *vaddr;
/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
if (page_is_ram(start >> PAGE_SHIFT))
return __va(phys);
- addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
- if (addr)
- addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+ vaddr = ioremap_cache(start, PAGE_SIZE);
+ /* Only add the offset on success and return NULL if the ioremap() failed: */
+ if (vaddr)
+ vaddr += offset;
- return addr;
+ return vaddr;
}
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
@@ -371,7 +373,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
return;
iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
- return;
}
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c439ec478216..7a657f58bbea 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -10,13 +10,15 @@
#include <linux/syscalls.h>
#include <linux/sched/sysctl.h>
-#include <asm/i387.h>
#include <asm/insn.h>
#include <asm/mman.h>
#include <asm/mmu_context.h>
#include <asm/mpx.h>
#include <asm/processor.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/mpx.h>
static const char *mpx_mapping_name(struct vm_area_struct *vma)
{
@@ -32,6 +34,22 @@ static int is_mpx_vma(struct vm_area_struct *vma)
return (vma->vm_ops == &mpx_vma_ops);
}
+static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
+{
+ if (is_64bit_mm(mm))
+ return MPX_BD_SIZE_BYTES_64;
+ else
+ return MPX_BD_SIZE_BYTES_32;
+}
+
+static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
+{
+ if (is_64bit_mm(mm))
+ return MPX_BT_SIZE_BYTES_64;
+ else
+ return MPX_BT_SIZE_BYTES_32;
+}
+
/*
* This is really a simplified "vm_mmap". it only handles MPX
* bounds tables (the bounds directory is user-allocated).
@@ -47,8 +65,8 @@ static unsigned long mpx_mmap(unsigned long len)
vm_flags_t vm_flags;
struct vm_area_struct *vma;
- /* Only bounds table and bounds directory can be allocated here */
- if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
+ /* Only bounds table can be allocated here */
+ if (len != mpx_bt_size_bytes(mm))
return -EINVAL;
down_write(&mm->mmap_sem);
@@ -272,10 +290,9 @@ bad_opcode:
*
* The caller is expected to kfree() the returned siginfo_t.
*/
-siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
- struct xsave_struct *xsave_buf)
+siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
{
- struct bndreg *bndregs, *bndreg;
+ const struct bndreg *bndregs, *bndreg;
siginfo_t *info = NULL;
struct insn insn;
uint8_t bndregno;
@@ -295,8 +312,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
err = -EINVAL;
goto err_out;
}
- /* get the bndregs _area_ of the xsave structure */
- bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS);
+ /* get bndregs field from current task's xsave area */
+ bndregs = get_xsave_field_ptr(XSTATE_BNDREGS);
if (!bndregs) {
err = -EINVAL;
goto err_out;
@@ -334,6 +351,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
err = -EINVAL;
goto err_out;
}
+ trace_mpx_bounds_register_exception(info->si_addr, bndreg);
return info;
err_out:
/* info might be NULL, but kfree() handles that */
@@ -341,25 +359,18 @@ err_out:
return ERR_PTR(err);
}
-static __user void *task_get_bounds_dir(struct task_struct *tsk)
+static __user void *mpx_get_bounds_dir(void)
{
- struct bndcsr *bndcsr;
+ const struct bndcsr *bndcsr;
if (!cpu_feature_enabled(X86_FEATURE_MPX))
return MPX_INVALID_BOUNDS_DIR;
/*
- * 32-bit binaries on 64-bit kernels are currently
- * unsupported.
- */
- if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
- return MPX_INVALID_BOUNDS_DIR;
- /*
* The bounds directory pointer is stored in a register
* only accessible if we first do an xsave.
*/
- fpu_save_init(&tsk->thread.fpu);
- bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
if (!bndcsr)
return MPX_INVALID_BOUNDS_DIR;
@@ -378,10 +389,10 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk)
(bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
}
-int mpx_enable_management(struct task_struct *tsk)
+int mpx_enable_management(void)
{
void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
- struct mm_struct *mm = tsk->mm;
+ struct mm_struct *mm = current->mm;
int ret = 0;
/*
@@ -390,11 +401,12 @@ int mpx_enable_management(struct task_struct *tsk)
* directory into XSAVE/XRSTOR Save Area and enable MPX through
* XRSTOR instruction.
*
- * fpu_xsave() is expected to be very expensive. Storing the bounds
- * directory here means that we do not have to do xsave in the unmap
- * path; we can just use mm->bd_addr instead.
+ * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
+ * expected to be relatively expensive. Storing the bounds
+ * directory here means that we do not have to do xsave in the
+ * unmap path; we can just use mm->bd_addr instead.
*/
- bd_base = task_get_bounds_dir(tsk);
+ bd_base = mpx_get_bounds_dir();
down_write(&mm->mmap_sem);
mm->bd_addr = bd_base;
if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
@@ -404,7 +416,7 @@ int mpx_enable_management(struct task_struct *tsk)
return ret;
}
-int mpx_disable_management(struct task_struct *tsk)
+int mpx_disable_management(void)
{
struct mm_struct *mm = current->mm;
@@ -417,29 +429,59 @@ int mpx_disable_management(struct task_struct *tsk)
return 0;
}
+static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
+ unsigned long *curval,
+ unsigned long __user *addr,
+ unsigned long old_val, unsigned long new_val)
+{
+ int ret;
+ /*
+ * user_atomic_cmpxchg_inatomic() actually uses sizeof()
+ * the pointer that we pass to it to figure out how much
+ * data to cmpxchg. We have to be careful here not to
+ * pass a pointer to a 64-bit data type when we only want
+ * a 32-bit copy.
+ */
+ if (is_64bit_mm(mm)) {
+ ret = user_atomic_cmpxchg_inatomic(curval,
+ addr, old_val, new_val);
+ } else {
+ u32 uninitialized_var(curval_32);
+ u32 old_val_32 = old_val;
+ u32 new_val_32 = new_val;
+ u32 __user *addr_32 = (u32 __user *)addr;
+
+ ret = user_atomic_cmpxchg_inatomic(&curval_32,
+ addr_32, old_val_32, new_val_32);
+ *curval = curval_32;
+ }
+ return ret;
+}
+
/*
- * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each
- * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB,
+ * With 32-bit mode, a bounds directory is 4MB, and the size of each
+ * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
* and the size of each bounds table is 4MB.
*/
-static int allocate_bt(long __user *bd_entry)
+static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
{
unsigned long expected_old_val = 0;
unsigned long actual_old_val = 0;
unsigned long bt_addr;
+ unsigned long bd_new_entry;
int ret = 0;
/*
* Carve the virtual space out of userspace for the new
* bounds table:
*/
- bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES);
+ bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
if (IS_ERR((void *)bt_addr))
return PTR_ERR((void *)bt_addr);
/*
* Set the valid flag (kinda like _PAGE_PRESENT in a pte)
*/
- bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
+ bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
/*
* Go poke the address of the new bounds table in to the
@@ -452,8 +494,8 @@ static int allocate_bt(long __user *bd_entry)
* mmap_sem at this point, unlike some of the other part
* of the MPX code that have to pagefault_disable().
*/
- ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
- expected_old_val, bt_addr);
+ ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
+ expected_old_val, bd_new_entry);
if (ret)
goto out_unmap;
@@ -481,9 +523,10 @@ static int allocate_bt(long __user *bd_entry)
ret = -EINVAL;
goto out_unmap;
}
+ trace_mpx_new_bounds_table(bt_addr);
return 0;
out_unmap:
- vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES);
+ vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
return ret;
}
@@ -498,12 +541,13 @@ out_unmap:
* bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
* and the size of each bound table is 4MB.
*/
-static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
+static int do_mpx_bt_fault(void)
{
unsigned long bd_entry, bd_base;
- struct bndcsr *bndcsr;
+ const struct bndcsr *bndcsr;
+ struct mm_struct *mm = current->mm;
- bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
if (!bndcsr)
return -EINVAL;
/*
@@ -520,13 +564,13 @@ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
* the directory is.
*/
if ((bd_entry < bd_base) ||
- (bd_entry >= bd_base + MPX_BD_SIZE_BYTES))
+ (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
return -EINVAL;
- return allocate_bt((long __user *)bd_entry);
+ return allocate_bt(mm, (long __user *)bd_entry);
}
-int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+int mpx_handle_bd_fault(void)
{
/*
* Userspace never asked us to manage the bounds tables,
@@ -535,7 +579,7 @@ int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
if (!kernel_managing_mpx_tables(current->mm))
return -EINVAL;
- if (do_mpx_bt_fault(xsave_buf)) {
+ if (do_mpx_bt_fault()) {
force_sig(SIGSEGV, current);
/*
* The force_sig() is essentially "handling" this
@@ -572,29 +616,55 @@ static int mpx_resolve_fault(long __user *addr, int write)
return 0;
}
+static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
+ unsigned long bd_entry)
+{
+ unsigned long bt_addr = bd_entry;
+ int align_to_bytes;
+ /*
+ * Bit 0 in a bt_entry is always the valid bit.
+ */
+ bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
+ /*
+ * Tables are naturally aligned at 8-byte boundaries
+ * on 64-bit and 4-byte boundaries on 32-bit. The
+ * documentation makes it appear that the low bits
+ * are ignored by the hardware, so we do the same.
+ */
+ if (is_64bit_mm(mm))
+ align_to_bytes = 8;
+ else
+ align_to_bytes = 4;
+ bt_addr &= ~(align_to_bytes-1);
+ return bt_addr;
+}
+
/*
* Get the base of bounds tables pointed by specific bounds
* directory entry.
*/
static int get_bt_addr(struct mm_struct *mm,
- long __user *bd_entry, unsigned long *bt_addr)
+ long __user *bd_entry_ptr,
+ unsigned long *bt_addr_result)
{
int ret;
int valid_bit;
+ unsigned long bd_entry;
+ unsigned long bt_addr;
- if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry)))
+ if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
return -EFAULT;
while (1) {
int need_write = 0;
pagefault_disable();
- ret = get_user(*bt_addr, bd_entry);
+ ret = get_user(bd_entry, bd_entry_ptr);
pagefault_enable();
if (!ret)
break;
if (ret == -EFAULT)
- ret = mpx_resolve_fault(bd_entry, need_write);
+ ret = mpx_resolve_fault(bd_entry_ptr, need_write);
/*
* If we could not resolve the fault, consider it
* userspace's fault and error out.
@@ -603,8 +673,8 @@ static int get_bt_addr(struct mm_struct *mm,
return ret;
}
- valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG;
- *bt_addr &= MPX_BT_ADDR_MASK;
+ valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
+ bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
/*
* When the kernel is managing bounds tables, a bounds directory
@@ -613,7 +683,7 @@ static int get_bt_addr(struct mm_struct *mm,
* data in the address field, we know something is wrong. This
* -EINVAL return will cause a SIGSEGV.
*/
- if (!valid_bit && *bt_addr)
+ if (!valid_bit && bt_addr)
return -EINVAL;
/*
* Do we have an completely zeroed bt entry? That is OK. It
@@ -624,19 +694,100 @@ static int get_bt_addr(struct mm_struct *mm,
if (!valid_bit)
return -ENOENT;
+ *bt_addr_result = bt_addr;
return 0;
}
+static inline int bt_entry_size_bytes(struct mm_struct *mm)
+{
+ if (is_64bit_mm(mm))
+ return MPX_BT_ENTRY_BYTES_64;
+ else
+ return MPX_BT_ENTRY_BYTES_32;
+}
+
+/*
+ * Take a virtual address and turns it in to the offset in bytes
+ * inside of the bounds table where the bounds table entry
+ * controlling 'addr' can be found.
+ */
+static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
+ unsigned long addr)
+{
+ unsigned long bt_table_nr_entries;
+ unsigned long offset = addr;
+
+ if (is_64bit_mm(mm)) {
+ /* Bottom 3 bits are ignored on 64-bit */
+ offset >>= 3;
+ bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
+ } else {
+ /* Bottom 2 bits are ignored on 32-bit */
+ offset >>= 2;
+ bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
+ }
+ /*
+ * We know the size of the table in to which we are
+ * indexing, and we have eliminated all the low bits
+ * which are ignored for indexing.
+ *
+ * Mask out all the high bits which we do not need
+ * to index in to the table. Note that the tables
+ * are always powers of two so this gives us a proper
+ * mask.
+ */
+ offset &= (bt_table_nr_entries-1);
+ /*
+ * We now have an entry offset in terms of *entries* in
+ * the table. We need to scale it back up to bytes.
+ */
+ offset *= bt_entry_size_bytes(mm);
+ return offset;
+}
+
+/*
+ * How much virtual address space does a single bounds
+ * directory entry cover?
+ *
+ * Note, we need a long long because 4GB doesn't fit in
+ * to a long on 32-bit.
+ */
+static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
+{
+ unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
+ if (is_64bit_mm(mm))
+ return virt_space / MPX_BD_NR_ENTRIES_64;
+ else
+ return virt_space / MPX_BD_NR_ENTRIES_32;
+}
+
/*
* Free the backing physical pages of bounds table 'bt_addr'.
* Assume start...end is within that bounds table.
*/
-static int zap_bt_entries(struct mm_struct *mm,
+static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
unsigned long bt_addr,
- unsigned long start, unsigned long end)
+ unsigned long start_mapping, unsigned long end_mapping)
{
struct vm_area_struct *vma;
unsigned long addr, len;
+ unsigned long start;
+ unsigned long end;
+
+ /*
+ * if we 'end' on a boundary, the offset will be 0 which
+ * is not what we want. Back it up a byte to get the
+ * last bt entry. Then once we have the entry itself,
+ * move 'end' back up by the table entry size.
+ */
+ start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
+ end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
+ /*
+ * Move end back up by one entry. Among other things
+ * this ensures that it remains page-aligned and does
+ * not screw up zap_page_range()
+ */
+ end += bt_entry_size_bytes(mm);
/*
* Find the first overlapping vma. If vma->vm_start > start, there
@@ -648,7 +799,7 @@ static int zap_bt_entries(struct mm_struct *mm,
return -EINVAL;
/*
- * A NUMA policy on a VM_MPX VMA could cause this bouds table to
+ * A NUMA policy on a VM_MPX VMA could cause this bounds table to
* be split. So we need to look across the entire 'start -> end'
* range of this bounds table, find all of the VM_MPX VMAs, and
* zap only those.
@@ -666,27 +817,65 @@ static int zap_bt_entries(struct mm_struct *mm,
len = min(vma->vm_end, end) - addr;
zap_page_range(vma, addr, len, NULL);
+ trace_mpx_unmap_zap(addr, addr+len);
vma = vma->vm_next;
addr = vma->vm_start;
}
-
return 0;
}
-static int unmap_single_bt(struct mm_struct *mm,
+static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
+ unsigned long addr)
+{
+ /*
+ * There are several ways to derive the bd offsets. We
+ * use the following approach here:
+ * 1. We know the size of the virtual address space
+ * 2. We know the number of entries in a bounds table
+ * 3. We know that each entry covers a fixed amount of
+ * virtual address space.
+ * So, we can just divide the virtual address by the
+ * virtual space used by one entry to determine which
+ * entry "controls" the given virtual address.
+ */
+ if (is_64bit_mm(mm)) {
+ int bd_entry_size = 8; /* 64-bit pointer */
+ /*
+ * Take the 64-bit addressing hole in to account.
+ */
+ addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
+ return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
+ } else {
+ int bd_entry_size = 4; /* 32-bit pointer */
+ /*
+ * 32-bit has no hole so this case needs no mask
+ */
+ return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
+ }
+ /*
+ * The two return calls above are exact copies. If we
+ * pull out a single copy and put it in here, gcc won't
+ * realize that we're doing a power-of-2 divide and use
+ * shifts. It uses a real divide. If we put them up
+ * there, it manages to figure it out (gcc 4.8.3).
+ */
+}
+
+static int unmap_entire_bt(struct mm_struct *mm,
long __user *bd_entry, unsigned long bt_addr)
{
unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
- unsigned long actual_old_val = 0;
+ unsigned long uninitialized_var(actual_old_val);
int ret;
while (1) {
int need_write = 1;
+ unsigned long cleared_bd_entry = 0;
pagefault_disable();
- ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
- expected_old_val, 0);
+ ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
+ bd_entry, expected_old_val, cleared_bd_entry);
pagefault_enable();
if (!ret)
break;
@@ -705,9 +894,8 @@ static int unmap_single_bt(struct mm_struct *mm,
if (actual_old_val != expected_old_val) {
/*
* Someone else raced with us to unmap the table.
- * There was no bounds table pointed to by the
- * directory, so declare success. Somebody freed
- * it.
+ * That is OK, since we were both trying to do
+ * the same thing. Declare success.
*/
if (!actual_old_val)
return 0;
@@ -720,176 +908,113 @@ static int unmap_single_bt(struct mm_struct *mm,
*/
return -EINVAL;
}
-
/*
* Note, we are likely being called under do_munmap() already. To
* avoid recursion, do_munmap() will check whether it comes
* from one bounds table through VM_MPX flag.
*/
- return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES);
+ return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
}
-/*
- * If the bounds table pointed by bounds directory 'bd_entry' is
- * not shared, unmap this whole bounds table. Otherwise, only free
- * those backing physical pages of bounds table entries covered
- * in this virtual address region start...end.
- */
-static int unmap_shared_bt(struct mm_struct *mm,
- long __user *bd_entry, unsigned long start,
- unsigned long end, bool prev_shared, bool next_shared)
+static int try_unmap_single_bt(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
- unsigned long bt_addr;
- int ret;
-
- ret = get_bt_addr(mm, bd_entry, &bt_addr);
+ struct vm_area_struct *next;
+ struct vm_area_struct *prev;
/*
- * We could see an "error" ret for not-present bounds
- * tables (not really an error), or actual errors, but
- * stop unmapping either way.
+ * "bta" == Bounds Table Area: the area controlled by the
+ * bounds table that we are unmapping.
*/
- if (ret)
- return ret;
-
- if (prev_shared && next_shared)
- ret = zap_bt_entries(mm, bt_addr,
- bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
- bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
- else if (prev_shared)
- ret = zap_bt_entries(mm, bt_addr,
- bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
- bt_addr+MPX_BT_SIZE_BYTES);
- else if (next_shared)
- ret = zap_bt_entries(mm, bt_addr, bt_addr,
- bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
- else
- ret = unmap_single_bt(mm, bd_entry, bt_addr);
-
- return ret;
-}
-
-/*
- * A virtual address region being munmap()ed might share bounds table
- * with adjacent VMAs. We only need to free the backing physical
- * memory of these shared bounds tables entries covered in this virtual
- * address region.
- */
-static int unmap_edge_bts(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
+ unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
+ unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
+ unsigned long uninitialized_var(bt_addr);
+ void __user *bde_vaddr;
int ret;
- long __user *bde_start, *bde_end;
- struct vm_area_struct *prev, *next;
- bool prev_shared = false, next_shared = false;
-
- bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
- bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
-
/*
- * Check whether bde_start and bde_end are shared with adjacent
- * VMAs.
- *
- * We already unliked the VMAs from the mm's rbtree so 'start'
+ * We already unlinked the VMAs from the mm's rbtree so 'start'
* is guaranteed to be in a hole. This gets us the first VMA
* before the hole in to 'prev' and the next VMA after the hole
* in to 'next'.
*/
next = find_vma_prev(mm, start, &prev);
- if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1))
- == bde_start)
- prev_shared = true;
- if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start))
- == bde_end)
- next_shared = true;
-
/*
- * This virtual address region being munmap()ed is only
- * covered by one bounds table.
- *
- * In this case, if this table is also shared with adjacent
- * VMAs, only part of the backing physical memory of the bounds
- * table need be freeed. Otherwise the whole bounds table need
- * be unmapped.
- */
- if (bde_start == bde_end) {
- return unmap_shared_bt(mm, bde_start, start, end,
- prev_shared, next_shared);
+ * Do not count other MPX bounds table VMAs as neighbors.
+ * Although theoretically possible, we do not allow bounds
+ * tables for bounds tables so our heads do not explode.
+ * If we count them as neighbors here, we may end up with
+ * lots of tables even though we have no actual table
+ * entries in use.
+ */
+ while (next && is_mpx_vma(next))
+ next = next->vm_next;
+ while (prev && is_mpx_vma(prev))
+ prev = prev->vm_prev;
+ /*
+ * We know 'start' and 'end' lie within an area controlled
+ * by a single bounds table. See if there are any other
+ * VMAs controlled by that bounds table. If there are not
+ * then we can "expand" the are we are unmapping to possibly
+ * cover the entire table.
+ */
+ next = find_vma_prev(mm, start, &prev);
+ if ((!prev || prev->vm_end <= bta_start_vaddr) &&
+ (!next || next->vm_start >= bta_end_vaddr)) {
+ /*
+ * No neighbor VMAs controlled by same bounds
+ * table. Try to unmap the whole thing
+ */
+ start = bta_start_vaddr;
+ end = bta_end_vaddr;
}
+ bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
+ ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
/*
- * If more than one bounds tables are covered in this virtual
- * address region being munmap()ed, we need to separately check
- * whether bde_start and bde_end are shared with adjacent VMAs.
+ * No bounds table there, so nothing to unmap.
*/
- ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false);
- if (ret)
- return ret;
- ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared);
+ if (ret == -ENOENT) {
+ ret = 0;
+ return 0;
+ }
if (ret)
return ret;
-
- return 0;
+ /*
+ * We are unmapping an entire table. Either because the
+ * unmap that started this whole process was large enough
+ * to cover an entire table, or that the unmap was small
+ * but was the area covered by a bounds table.
+ */
+ if ((start == bta_start_vaddr) &&
+ (end == bta_end_vaddr))
+ return unmap_entire_bt(mm, bde_vaddr, bt_addr);
+ return zap_bt_entries_mapping(mm, bt_addr, start, end);
}
static int mpx_unmap_tables(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- int ret;
- long __user *bd_entry, *bde_start, *bde_end;
- unsigned long bt_addr;
-
- /*
- * "Edge" bounds tables are those which are being used by the region
- * (start -> end), but that may be shared with adjacent areas. If they
- * turn out to be completely unshared, they will be freed. If they are
- * shared, we will free the backing store (like an MADV_DONTNEED) for
- * areas used by this region.
- */
- ret = unmap_edge_bts(mm, start, end);
- switch (ret) {
- /* non-present tables are OK */
- case 0:
- case -ENOENT:
- /* Success, or no tables to unmap */
- break;
- case -EINVAL:
- case -EFAULT:
- default:
- return ret;
- }
-
- /*
- * Only unmap the bounds table that are
- * 1. fully covered
- * 2. not at the edges of the mapping, even if full aligned
- */
- bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
- bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
- for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) {
- ret = get_bt_addr(mm, bd_entry, &bt_addr);
- switch (ret) {
- case 0:
- break;
- case -ENOENT:
- /* No table here, try the next one */
- continue;
- case -EINVAL:
- case -EFAULT:
- default:
- /*
- * Note: we are being strict here.
- * Any time we run in to an issue
- * unmapping tables, we stop and
- * SIGSEGV.
- */
- return ret;
- }
-
- ret = unmap_single_bt(mm, bd_entry, bt_addr);
+ unsigned long one_unmap_start;
+ trace_mpx_unmap_search(start, end);
+
+ one_unmap_start = start;
+ while (one_unmap_start < end) {
+ int ret;
+ unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
+ bd_entry_virt_space(mm));
+ unsigned long one_unmap_end = end;
+ /*
+ * if the end is beyond the current bounds table,
+ * move it back so we only deal with a single one
+ * at a time
+ */
+ if (one_unmap_end > next_unmap_start)
+ one_unmap_end = next_unmap_start;
+ ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
if (ret)
return ret;
- }
+ one_unmap_start = next_unmap_start;
+ }
return 0;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 987514396c1e..ddeff4844a10 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (is_ereg(dst_reg))
EMIT1(0x41);
EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
+
+ /* emit 'movzwl eax, ax' */
+ if (is_ereg(dst_reg))
+ EMIT3(0x45, 0x0F, 0xB7);
+ else
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break;
case 32:
/* emit 'bswap eax' to swap lower 4 bytes */
@@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
case BPF_ALU | BPF_END | BPF_FROM_LE:
+ switch (imm32) {
+ case 16:
+ /* emit 'movzwl eax, ax' to zero extend 16-bit
+ * into 64 bit
+ */
+ if (is_ereg(dst_reg))
+ EMIT3(0x45, 0x0F, 0xB7);
+ else
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
+ break;
+ case 32:
+ /* emit 'mov eax, eax' to clear upper 32-bits */
+ if (is_ereg(dst_reg))
+ EMIT1(0x45);
+ EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
+ break;
+ case 64:
+ /* nop */
+ break;
+ }
break;
/* ST: *(u8*)(dst_reg + off) = imm */
@@ -938,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.cleanup_addr = proglen;
- for (pass = 0; pass < 10; pass++) {
+ /* JITed image shrinks with every pass and the loop iterates
+ * until the image stops shrinking. Very large bpf programs
+ * may converge on the last pass. In such case do one more
+ * pass to emit the final image
+ */
+ for (pass = 0; pass < 10 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
image = NULL;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index e4695985f9de..14a63ed6fe09 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
kfree(info);
}
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ * IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ * to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+ return (res->flags & IORESOURCE_IO) &&
+ res->start == 0xCF8 && res->end == 0xCFF;
+}
+
static void probe_pci_root_info(struct pci_root_info *info,
struct acpi_device *device,
int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
"no IO and memory resources present in _CRS\n");
else
resource_list_for_each_entry_safe(entry, tmp, list) {
- if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
- (entry->res->flags & IORESOURCE_DISABLED))
+ if ((entry->res->flags & IORESOURCE_DISABLED) ||
+ resource_is_pcicfg_ioport(entry->res))
resource_list_destroy_entry(entry);
else
entry->res->name = info->name;
@@ -462,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
- struct pci_sysdata *sd = bridge->bus->sysdata;
-
- ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+ /*
+ * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+ * here, pci_create_root_bus() has been called by someone else and
+ * sysdata is likely to be different from what we expect. Let it go in
+ * that case.
+ */
+ if (!bridge->dev.parent) {
+ struct pci_sysdata *sd = bridge->bus->sysdata;
+ ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+ }
return 0;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 02744df576d5..3b984c3aa1b0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -501,6 +501,8 @@ void __init efi_init(void)
if (efi_enabled(EFI_DBG))
print_efi_memmap();
+
+ efi_esrt_init();
}
void __init efi_late_init(void)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 757678fb26e1..0d7dd1f5ac36 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,10 +18,9 @@
#include <asm/mtrr.h>
#include <asm/page.h>
#include <asm/mce.h>
-#include <asm/xcr.h>
#include <asm/suspend.h>
+#include <asm/fpu/internal.h>
#include <asm/debugreg.h>
-#include <asm/fpu-internal.h> /* pcntxt_mask */
#include <asm/cpu.h>
#ifdef CONFIG_X86_32
@@ -155,6 +154,8 @@ static void fix_processor_context(void)
#endif
load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */
+
+ fpu__resume_cpu();
}
/**
@@ -221,12 +222,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
#endif
- /*
- * restore XCR0 for xsave capable cpu's.
- */
- if (cpu_has_xsave)
- xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-
fix_processor_context();
do_fpu_end();
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 7e8a1a650435..b9531d343134 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 275a3a8b78af..e97032069f88 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94578efd3067..98088bf5906a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu)
return;
/*
* For BSP, PSE PGE are set in probe_page_size_mask(), for APs
- * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init.
+ * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
*/
if (cpu_has_pse)
cr4_set_bits_and_update_boot(X86_CR4_PSE);
@@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
static void __init xen_hvm_guest_init(void)
{
+ if (xen_pv_domain())
+ return;
+
init_hvm_pv_info();
xen_hvm_init_shared_info();
@@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
}
+#endif
static bool xen_nopv = false;
static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg)
}
early_param("xen_nopv", xen_parse_nopv);
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
{
if (xen_nopv)
return 0;
- if (xen_pv_domain())
- return 0;
-
return xen_cpuid_base();
}
@@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void)
}
EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
- .name = "Xen HVM",
- .detect = xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+ if (xen_pv_domain())
+ clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+ .name = "Xen",
+ .detect = xen_platform,
+#ifdef CONFIG_XEN_PVHVM
.init_platform = xen_hvm_guest_init,
+#endif
.x2apic_available = xen_x2apic_para_available,
+ .set_cpu_features = xen_set_cpu_features,
};
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
+EXPORT_SYMBOL(x86_hyper_xen);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c1edbc..9e2ba5c6e1dd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
#include "xen-ops.h"
#include "debugfs.h"
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(char *, irq_name);
+static bool xen_pvspin = true;
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void xen_qlock_kick(int cpu)
+{
+ xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+}
+
+/*
+ * Halt the current CPU & release it back to the host
+ */
+static void xen_qlock_wait(u8 *byte, u8 val)
+{
+ int irq = __this_cpu_read(lock_kicker_irq);
+
+ /* If kicker interrupts not initialized yet, just spin */
+ if (irq == -1)
+ return;
+
+ /* clear pending */
+ xen_clear_irq_pending(irq);
+ barrier();
+
+ /*
+ * We check the byte value after clearing pending IRQ to make sure
+ * that we won't miss a wakeup event because of the clearing.
+ *
+ * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
+ * So it is effectively a memory barrier for x86.
+ */
+ if (READ_ONCE(*byte) != val)
+ return;
+
+ /*
+ * If an interrupt happens here, it will leave the wakeup irq
+ * pending, which will cause xen_poll_irq() to return
+ * immediately.
+ */
+
+ /* Block until irq becomes pending (or perhaps a spurious wakeup) */
+ xen_poll_irq(irq);
+}
+
+#else /* CONFIG_QUEUED_SPINLOCKS */
+
enum xen_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
__ticket_t want;
};
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(char *, irq_name);
static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
static cpumask_t waiting_cpus;
-static bool xen_pvspin = true;
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
{
int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
}
}
}
+#endif /* CONFIG_QUEUED_SPINLOCKS */
static irqreturn_t dummy_handler(int irq, void *dev_id)
{
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ __pv_init_lock_hash();
+ pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_lock_ops.wait = xen_qlock_wait;
+ pv_lock_ops.kick = xen_qlock_kick;
+#else
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
+#endif
}
/*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
}
early_param("xen_nopvspin", xen_parse_nopvspin);
-#ifdef CONFIG_XEN_DEBUG_FS
+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
static struct dentry *d_spin_debug;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9497698645a..53b4c0811f4f 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
tick_resume_local();
}
+static void xen_vcpu_notify_suspend(void *data)
+{
+ tick_suspend_local();
+}
+
void xen_arch_resume(void)
{
on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
}
+
+void xen_arch_suspend(void)
+{
+ on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 172a02a6ad14..ba78ccf651e7 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
return -EINVAL;
}
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ return NULL;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+}
+
#endif /* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 9e3571a6535c..83a44a33cfa1 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -15,10 +15,10 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/hardirq.h>
+#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/hardirq.h>
-#include <asm/uaccess.h>
#include <asm/pgalloc.h>
DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
/* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm) {
+ if (faulthandler_disabled() || !mm) {
bad_page_fault(regs, address, SIGSEGV);
return;
}
diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c
index 8cfb71ec0937..184ceadccc1a 100644
--- a/arch/xtensa/mm/highmem.c
+++ b/arch/xtensa/mm/highmem.c
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
enum fixed_addresses idx;
unsigned long vaddr;
+ preempt_disable();
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr)
}
pagefault_enable();
+ preempt_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);