summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-04-18 12:14:55 +0200
committerIngo Molnar <mingo@kernel.org>2014-04-18 12:14:55 +0200
commit1111b680d34bc19190f02a1b4479c3fcc592c22e (patch)
tree9d45ad88b9354205a2d1fd76a338ab8693a16d86 /arch
parent37b6cb475a6d74bc047c53bc323d6eb3113e27a4 (diff)
parent24223657806a0ebd0ae5c9caaf7b021091889cf2 (diff)
Merge branch 'perf/urgent' into perf/core, to pick up PMU driver fixes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/configs/bcm_defconfig2
-rw-r--r--arch/ia64/kernel/head.S2
-rw-r--r--arch/ia64/kernel/ivt.S2
-rw-r--r--arch/ia64/kvm/vmm_ivt.S2
-rw-r--r--arch/s390/include/asm/sigp.h19
-rw-r--r--arch/s390/include/asm/smp.h13
-rw-r--r--arch/s390/include/uapi/asm/unistd.h3
-rw-r--r--arch/s390/kernel/compat_wrapper.c3
-rw-r--r--arch/s390/kernel/dumpstack.c8
-rw-r--r--arch/s390/kernel/ptrace.c2
-rw-r--r--arch/s390/kernel/setup.c32
-rw-r--r--arch/s390/kernel/smp.c15
-rw-r--r--arch/s390/kernel/syscalls.S1
-rw-r--r--arch/s390/lib/uaccess.c5
-rw-r--r--arch/s390/mm/fault.c140
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c12
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c16
-rw-r--r--arch/x86/kernel/reboot.c72
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mmu.c38
-rw-r--r--arch/x86/kvm/mmu.h44
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/syscalls/Makefile2
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/tools/Makefile2
31 files changed, 369 insertions, 108 deletions
diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig
index 01004640ee4d..3df3f3a79ef4 100644
--- a/arch/arm/configs/bcm_defconfig
+++ b/arch/arm/configs/bcm_defconfig
@@ -132,7 +132,7 @@ CONFIG_CRC_ITU_T=y
CONFIG_CRC7=y
CONFIG_XZ_DEC=y
CONFIG_AVERAGE=y
-CONFIG_PINCTRL_CAPRI=y
+CONFIG_PINCTRL_BCM281XX=y
CONFIG_WATCHDOG=y
CONFIG_BCM_KONA_WDT=y
CONFIG_BCM_KONA_WDT_DEBUG=y
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index e6f80fcf013b..a4acddad0c78 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -259,7 +259,7 @@ start_ap:
* Switch into virtual mode:
*/
movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
- |IA64_PSR_DI|IA64_PSR_AC)
+ |IA64_PSR_DI)
;;
mov cr.ipsr=r16
movl r17=1f
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 689ffcaa284e..18e794a57248 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -58,7 +58,7 @@
#include <asm/unistd.h>
#include <asm/errno.h>
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 24018484c6e9..397e34a63e18 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -64,7 +64,7 @@
#include "kvm_minstate.h"
#include "vti.h"
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index d091aa1aaf11..bf9c823d4020 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -31,4 +31,23 @@
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
+#ifndef __ASSEMBLY__
+
+static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+{
+ register unsigned int reg1 asm ("1") = parm;
+ int cc;
+
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ if (status && cc == 1)
+ *status = reg1;
+ return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 160779394096..21703f85b48d 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,6 +7,8 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+#include <asm/sigp.h>
+
#ifdef CONFIG_SMP
#include <asm/lowcore.h>
@@ -50,9 +52,18 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_yield(void) { }
-static inline void smp_stop_cpu(void) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_stop_cpu(void)
+{
+ u16 pcpu = stap();
+
+ for (;;) {
+ __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+ cpu_relax();
+ }
+}
+
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 5eb5c9ddb120..3802d2d3a18d 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -282,7 +282,8 @@
#define __NR_finit_module 344
#define __NR_sched_setattr 345
#define __NR_sched_getattr 346
-#define NR_syscalls 345
+#define __NR_renameat2 347
+#define NR_syscalls 348
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 824c39dfddfc..45cdb37aa6f8 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -1,5 +1,5 @@
/*
- * Compat sytem call wrappers.
+ * Compat system call wrappers.
*
* Copyright IBM Corp. 2014
*/
@@ -213,3 +213,4 @@ COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, i
COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index e6af9406987c..acb412442e5e 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -144,10 +144,10 @@ void show_registers(struct pt_regs *regs)
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p (%pSR)\n",
- mode, (void *) regs->psw.mask,
- (void *) regs->psw.addr,
- (void *) regs->psw.addr);
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ printk(" (%pSR)", (void *)regs->psw.addr);
+ printk("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
"P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 4ac8fafec95f..1c82619eb4f7 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -64,7 +64,7 @@ void update_cr_regs(struct task_struct *task)
if (task->thread.per_flags & PER_FLAG_NO_TE)
cr_new &= ~(1UL << 55);
if (cr_new != cr)
- __ctl_load(cr, 0, 0);
+ __ctl_load(cr_new, 0, 0);
/* Set or clear transaction execution TDC bits 62 and 63. */
__ctl_store(cr, 2, 2);
cr_new = cr & ~3UL;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f70f2489fa5f..88d1ca81e2dd 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1027,3 +1027,35 @@ void __init setup_arch(char **cmdline_p)
/* Setup zfcpdump support */
setup_zfcpdump();
}
+
+#ifdef CONFIG_32BIT
+static int no_removal_warning __initdata;
+
+static int __init parse_no_removal_warning(char *str)
+{
+ no_removal_warning = 1;
+ return 0;
+}
+__setup("no_removal_warning", parse_no_removal_warning);
+
+static int __init removal_warning(void)
+{
+ if (no_removal_warning)
+ return 0;
+ printk(KERN_ALERT "\n\n");
+ printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n");
+ printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n");
+ printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n");
+ printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n");
+ printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n");
+ printk(KERN_CONT "please let us know. Please write to:\n");
+ printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n");
+ printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n");
+ printk(KERN_CONT "Thank you!\n\n");
+ printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n");
+ printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n");
+ schedule_timeout_uninterruptible(300 * HZ);
+ return 0;
+}
+early_initcall(removal_warning);
+#endif
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 512ce1cde2a4..86e65ec3422b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -82,21 +82,6 @@ DEFINE_MUTEX(smp_cpu_state_mutex);
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
-{
- register unsigned int reg1 asm ("1") = parm;
- int cc;
-
- asm volatile(
- " sigp %1,%2,0(%3)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
- if (status && cc == 1)
- *status = reg1;
- return cc;
-}
-
static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
{
int cc;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 542ef488bac1..fe5cdf29a001 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -355,3 +355,4 @@ SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp)
SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 23f866b4c7f1..7416efe8eae4 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -338,9 +338,6 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
register unsigned long reg0 asm("0") = 0;
unsigned long tmp1, tmp2;
- if (unlikely(!size))
- return 0;
- update_primary_asce(current);
asm volatile(
" la %2,0(%1)\n"
" la %3,0(%0,%1)\n"
@@ -359,6 +356,8 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
unsigned long __strnlen_user(const char __user *src, unsigned long size)
{
+ if (unlikely(!size))
+ return 0;
update_primary_asce(current);
return strnlen_user_srst(src, size);
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 19f623f1f21c..2f51a998a67e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -126,6 +126,133 @@ static inline int user_space_fault(struct pt_regs *regs)
return 0;
}
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+#ifdef CONFIG_64BIT
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%016lx ", asce);
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table = table + ((address >> 53) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R1:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table = table + ((address >> 42) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R2:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table = table + ((address >> 31) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R3:%016lx ", *table);
+ if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont(KERN_CONT "S:%016lx ", *table);
+ if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ }
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%016lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#else /* CONFIG_64BIT */
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%08lx ", asce);
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("S:%08lx ", *table);
+ if (*table & _SEGMENT_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%08lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#endif /* CONFIG_64BIT */
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+ unsigned long asce;
+
+ pr_alert("Fault in ");
+ switch (regs->int_parm_long & 3) {
+ case 3:
+ pr_cont("home space ");
+ break;
+ case 2:
+ pr_cont("secondary space ");
+ break;
+ case 1:
+ pr_cont("access register ");
+ break;
+ case 0:
+ pr_cont("primary space ");
+ break;
+ }
+ pr_cont("mode while using ");
+ if (!user_space_fault(regs)) {
+ asce = S390_lowcore.kernel_asce;
+ pr_cont("kernel ");
+ }
+#ifdef CONFIG_PGSTE
+ else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ asce = gmap->asce;
+ pr_cont("gmap ");
+ }
+#endif
+ else {
+ asce = S390_lowcore.user_asce;
+ pr_cont("user ");
+ }
+ pr_cont("ASCE.\n");
+ dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
static inline void report_user_fault(struct pt_regs *regs, long signr)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
@@ -138,8 +265,9 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
regs->int_code);
print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %lX\n",
- regs->int_parm_long & __FAIL_ADDR_MASK);
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -177,11 +305,13 @@ static noinline void do_no_context(struct pt_regs *regs)
address = regs->int_parm_long & __FAIL_ADDR_MASK;
if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
- " at virtual kernel address %p\n", (void *)address);
+ " in virtual kernel address space\n");
else
printk(KERN_ALERT "Unable to handle kernel paging request"
- " at virtual user address %p\n", (void *)address);
-
+ " in virtual user address space\n");
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
}
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 602f57e590b5..d1b7c377a234 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -250,8 +250,8 @@ archclean:
PHONY += kvmconfig
kvmconfig:
$(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config
- $(Q)yes "" | $(MAKE) oldconfig
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config $(srctree)/arch/x86/configs/kvm_guest.config
+ $(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fcaf9c961265..7de069afb382 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 4b9a9e9466bd..7c87424d4140 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -535,6 +535,7 @@ static int rapl_cpu_prepare(int cpu)
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
int phys_id = topology_physical_package_id(cpu);
u64 ms;
+ u64 msr_rapl_power_unit_bits;
if (pmu)
return 0;
@@ -542,6 +543,9 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0)
return -1;
+ if (!rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ return -1;
+
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -1;
@@ -555,8 +559,7 @@ static int rapl_cpu_prepare(int cpu)
*
* we cache in local PMU instance
*/
- rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit);
- pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL;
+ pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
pmu->pmu = &rapl_pmu_class;
/*
@@ -677,7 +680,9 @@ static int __init rapl_pmu_init(void)
cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
- rapl_cpu_prepare(cpu);
+ ret = rapl_cpu_prepare(cpu);
+ if (ret)
+ goto out;
rapl_cpu_init(cpu);
}
@@ -700,6 +705,7 @@ static int __init rapl_pmu_init(void)
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
+out:
cpu_notifier_register_done();
return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index b0cc3809723d..6e2537c32190 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -240,7 +240,7 @@ static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_s
return base;
}
-#define KB(x) ((x) * 1024)
+#define KB(x) ((x) * 1024UL)
#define MB(x) (KB (KB (x)))
#define GB(x) (MB (KB (x)))
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 79a3f9682871..61b17dc2c277 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -897,9 +897,10 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- switch (kcb->kprobe_status) {
- case KPROBE_HIT_SS:
- case KPROBE_REENTER:
+ if (unlikely(regs->ip == (unsigned long)cur->ainsn.insn)) {
+ /* This must happen on single-stepping */
+ WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS &&
+ kcb->kprobe_status != KPROBE_REENTER);
/*
* We are here because the instruction being single
* stepped caused a page fault. We reset the current
@@ -914,9 +915,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
else
reset_current_kprobe();
preempt_enable_no_resched();
- break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
+ } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
+ kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
* We increment the nmissed count for accounting,
* we can also use npre/npostfault count for accounting
@@ -945,10 +945,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* fixup routine could not handle it,
* Let do_page_fault() fix it.
*/
- break;
- default:
- break;
}
+
return 0;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 654b46574b91..3399d3a99730 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -114,8 +114,8 @@ EXPORT_SYMBOL(machine_real_restart);
*/
static int __init set_pci_reboot(const struct dmi_system_id *d)
{
- if (reboot_type != BOOT_CF9) {
- reboot_type = BOOT_CF9;
+ if (reboot_type != BOOT_CF9_FORCE) {
+ reboot_type = BOOT_CF9_FORCE;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
d->ident, "PCI");
}
@@ -458,20 +458,23 @@ void __attribute__((weak)) mach_reboot_fixups(void)
}
/*
- * Windows compatible x86 hardware expects the following on reboot:
+ * To the best of our knowledge Windows compatible x86 hardware expects
+ * the following on reboot:
*
* 1) If the FADT has the ACPI reboot register flag set, try it
* 2) If still alive, write to the keyboard controller
* 3) If still alive, write to the ACPI reboot register again
* 4) If still alive, write to the keyboard controller again
* 5) If still alive, call the EFI runtime service to reboot
- * 6) If still alive, write to the PCI IO port 0xCF9 to reboot
- * 7) If still alive, inform BIOS to do a proper reboot
+ * 6) If no EFI runtime service, call the BIOS to do a reboot
*
- * If the machine is still alive at this stage, it gives up. We default to
- * following the same pattern, except that if we're still alive after (7) we'll
- * try to force a triple fault and then cycle between hitting the keyboard
- * controller and doing that
+ * We default to following the same pattern. We also have
+ * two other reboot methods: 'triple fault' and 'PCI', which
+ * can be triggered via the reboot= kernel boot option or
+ * via quirks.
+ *
+ * This means that this function can never return, it can misbehave
+ * by not rebooting properly and hanging.
*/
static void native_machine_emergency_restart(void)
{
@@ -492,6 +495,11 @@ static void native_machine_emergency_restart(void)
for (;;) {
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
+ case BOOT_ACPI:
+ acpi_reboot();
+ reboot_type = BOOT_KBD;
+ break;
+
case BOOT_KBD:
mach_reboot_fixups(); /* For board specific fixups */
@@ -509,43 +517,29 @@ static void native_machine_emergency_restart(void)
}
break;
- case BOOT_TRIPLE:
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
-
- /* We're probably dead after this, but... */
- reboot_type = BOOT_KBD;
- break;
-
- case BOOT_BIOS:
- machine_real_restart(MRR_BIOS);
-
- /* We're probably dead after this, but... */
- reboot_type = BOOT_TRIPLE;
- break;
-
- case BOOT_ACPI:
- acpi_reboot();
- reboot_type = BOOT_KBD;
- break;
-
case BOOT_EFI:
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi.reset_system(reboot_mode == REBOOT_WARM ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
- reboot_type = BOOT_CF9_COND;
+ reboot_type = BOOT_BIOS;
+ break;
+
+ case BOOT_BIOS:
+ machine_real_restart(MRR_BIOS);
+
+ /* We're probably dead after this, but... */
+ reboot_type = BOOT_CF9_SAFE;
break;
- case BOOT_CF9:
+ case BOOT_CF9_FORCE:
port_cf9_safe = true;
/* Fall through */
- case BOOT_CF9_COND:
+ case BOOT_CF9_SAFE:
if (port_cf9_safe) {
- u8 reboot_code = reboot_mode == REBOOT_WARM ?
- 0x06 : 0x0E;
+ u8 reboot_code = reboot_mode == REBOOT_WARM ? 0x06 : 0x0E;
u8 cf9 = inb(0xcf9) & ~reboot_code;
outb(cf9|2, 0xcf9); /* Request hard reset */
udelay(50);
@@ -553,7 +547,15 @@ static void native_machine_emergency_restart(void)
outb(cf9|reboot_code, 0xcf9);
udelay(50);
}
- reboot_type = BOOT_BIOS;
+ reboot_type = BOOT_TRIPLE;
+ break;
+
+ case BOOT_TRIPLE:
+ load_idt(&no_idt);
+ __asm__ __volatile__("int3");
+
+ /* We're probably dead after this, but... */
+ reboot_type = BOOT_KBD;
break;
}
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bea60671ef8a..f47a104a749c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX);
+ F(ADX) | F(SMAP);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7ed8c1..eeecbed26ac7 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_SMEP));
}
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f5704d9e5ddc..813d31038b93 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
}
}
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
- bool fault, x, w, u, wf, uf, ff, smep;
+ bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
- smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+ /*
+ * PFERR_RSVD_MASK bit is set in PFEC if the access is not
+ * subject to SMAP restrictions, and cleared otherwise. The
+ * bit is only meaningful if the SMAP bit is set in CR4.
+ */
+ smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
- x &= !(smep && u && !uf);
+ x &= !(cr4_smep && u && !uf);
+
+ /*
+ * SMAP:kernel-mode data accesses from user-mode
+ * mappings should fault. A fault is considered
+ * as a SMAP violation if all of the following
+ * conditions are ture:
+ * - X86_CR4_SMAP is set in CR4
+ * - An user page is accessed
+ * - Page fault in kernel mode
+ * - if CPL = 3 or X86_EFLAGS_AC is clear
+ *
+ * Here, we cover the first three conditions.
+ * The fourth is computed dynamically in
+ * permission_fault() and is in smapf.
+ *
+ * Also, SMAP does not affect instruction
+ * fetches, add the !ff check here to make it
+ * clearer.
+ */
+ smap = cr4_smap && u && !uf && !ff;
} else
/* Not really needed: no U/S accesses on ept */
u = 1;
- fault = (ff && !x) || (uf && !u) || (wf && !w);
+ fault = (ff && !x) || (uf && !u) || (wf && !w) ||
+ (smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 292615274358..3842e70bdb7c 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+
+#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
+#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
+#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
@@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
* Will a fault with a given page-fault error code (pfec) cause a permission
* fault with the given access (in ACC_* format)?
*/
-static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
- unsigned pfec)
+static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ unsigned pte_access, unsigned pfec)
{
- return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
+ int cpl = kvm_x86_ops->get_cpl(vcpu);
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+
+ /*
+ * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
+ *
+ * If CPL = 3, SMAP applies to all supervisor-mode data accesses
+ * (these are implicit supervisor accesses) regardless of the value
+ * of EFLAGS.AC.
+ *
+ * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
+ * the result in X86_EFLAGS_AC. We then insert it in place of
+ * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
+ * but it will be one in index if SMAP checks are being overridden.
+ * It is important to keep this branchless.
+ */
+ unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
+ int index = (pfec >> 1) +
+ (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+
+ return (mmu->permissions[index] >> pte_access) & 1;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b1e6c1bf68d3..123efd3ec29f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -353,7 +353,7 @@ retry_walk:
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
- if (unlikely(permission_fault(mmu, pte_access, access))) {
+ if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
errcode |= PFERR_PRESENT_MASK;
goto error;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1320e0f8e611..1f68c5831924 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
- * SMEP is disabled if CPU is in non-paging mode in
- * hardware. However KVM always uses paging mode to
+ * SMEP/SMAP is disabled if CPU is in non-paging mode
+ * in hardware. However KVM always uses paging mode to
* emulate guest non-paging mode with TDP.
- * To emulate this behavior, SMEP needs to be manually
- * disabled when guest switches to non-paging mode.
+ * To emulate this behavior, SMEP/SMAP needs to be
+ * manually disabled when guest switches to non-paging
+ * mode.
*/
- hw_cr4 &= ~X86_CR4_SMEP;
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d1b5cd4d34c..8b8fc0b792ba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
return 1;
+ if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+ return 1;
+
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
@@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
+ if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
+ update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
+
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void)
{
struct timespec ts;
- WARN_ON(preemptible());
ktime_get_ts(&ts);
monotonic_to_bootbased(&ts);
return timespec_to_ns(&ts);
@@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva)
- && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
+ && !permission_fault(vcpu, vcpu->arch.walk_mmu,
+ vcpu->arch.access, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index f325af26107c..3323c2745248 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -54,5 +54,7 @@ syshdr-$(CONFIG_X86_64) += syscalls_64.h
targets += $(uapisyshdr-y) $(syshdr-y)
+PHONY += all
all: $(addprefix $(uapi)/,$(uapisyshdr-y))
all: $(addprefix $(out)/,$(syshdr-y))
+ @:
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 96bc506ac6de..d6b867921612 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -359,3 +359,4 @@
350 i386 finit_module sys_finit_module
351 i386 sched_setattr sys_sched_setattr
352 i386 sched_getattr sys_sched_getattr
+353 i386 renameat2 sys_renameat2
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index e8120346903b..604a37efd4d5 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -40,4 +40,6 @@ $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/ina
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
hostprogs-y += relocs
relocs-objs := relocs_32.o relocs_64.o relocs_common.o
+PHONY += relocs
relocs: $(obj)/relocs
+ @: