summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig22
-rw-r--r--arch/x86/boot/compressed/eboot.c32
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c5
-rw-r--r--arch/x86/include/asm/acpi.h23
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/atomic.h4
-rw-r--r--arch/x86/include/asm/atomic64_64.h4
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/hpet.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/mce.h34
-rw-r--r--arch/x86/include/asm/microcode.h26
-rw-r--r--arch/x86/include/asm/microcode_amd.h3
-rw-r--r--arch/x86/include/asm/microcode_intel.h10
-rw-r--r--arch/x86/include/asm/string_64.h5
-rw-r--r--arch/x86/include/uapi/asm/mce.h2
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c34
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/Makefile3
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c509
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c440
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c232
-rw-r--r--arch/x86/kernel/cpu/microcode/core_early.c170
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c791
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_early.c808
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_lib.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c39
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cstate.c694
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c34
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/hpet.c29
-rw-r--r--arch/x86/kernel/pci-dma.c5
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c114
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/tsc.c35
-rw-r--r--arch/x86/kvm/emulate.c10
-rw-r--r--arch/x86/kvm/vmx.c26
-rw-r--r--arch/x86/kvm/x86.c135
-rw-r--r--arch/x86/lib/x86-opcode-map.txt24
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/pageattr.c9
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c9
-rw-r--r--arch/x86/platform/efi/efi.c28
-rw-r--r--arch/x86/ras/Kconfig4
-rw-r--r--arch/x86/ras/mce_amd_inj.c103
-rw-r--r--arch/x86/um/ldt.c5
61 files changed, 2835 insertions, 1862 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96d058a87100..255ea22ccbec 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1123,8 +1123,10 @@ config X86_REBOOTFIXUPS
Say N otherwise.
config MICROCODE
- tristate "CPU microcode loading support"
+ bool "CPU microcode loading support"
+ default y
depends on CPU_SUP_AMD || CPU_SUP_INTEL
+ depends on BLK_DEV_INITRD
select FW_LOADER
---help---
@@ -1166,24 +1168,6 @@ config MICROCODE_OLD_INTERFACE
def_bool y
depends on MICROCODE
-config MICROCODE_INTEL_EARLY
- bool
-
-config MICROCODE_AMD_EARLY
- bool
-
-config MICROCODE_EARLY
- bool "Early load microcode"
- depends on MICROCODE=y && BLK_DEV_INITRD
- select MICROCODE_INTEL_EARLY if MICROCODE_INTEL
- select MICROCODE_AMD_EARLY if MICROCODE_AMD
- default y
- help
- This option provides functionality to read additional microcode data
- at the beginning of initrd image. The data tells kernel to load
- microcode to CPU's as early as possible. No functional change if no
- microcode data is glued to the initrd, therefore it's safe to say Y.
-
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
---help---
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ee1b6d346b98..583d539a4197 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -624,7 +624,7 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
static efi_status_t
__gop_query32(struct efi_graphics_output_protocol_32 *gop32,
struct efi_graphics_output_mode_info **info,
- unsigned long *size, u32 *fb_base)
+ unsigned long *size, u64 *fb_base)
{
struct efi_graphics_output_protocol_mode_32 *mode;
efi_status_t status;
@@ -650,7 +650,8 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
unsigned long nr_gops;
u16 width, height;
u32 pixels_per_scan_line;
- u32 fb_base;
+ u32 ext_lfb_base;
+ u64 fb_base;
struct efi_pixel_bitmask pixel_info;
int pixel_format;
efi_status_t status;
@@ -667,6 +668,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u32 h = handles[i];
+ u64 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop32);
@@ -678,7 +680,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query32(gop32, &info, &size, &fb_base);
+ status = __gop_query32(gop32, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -692,6 +694,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
@@ -713,6 +716,13 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
si->lfb_width = width;
si->lfb_height = height;
si->lfb_base = fb_base;
+
+ ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+ if (ext_lfb_base) {
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ si->ext_lfb_base = ext_lfb_base;
+ }
+
si->pages = 1;
setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
@@ -727,7 +737,7 @@ out:
static efi_status_t
__gop_query64(struct efi_graphics_output_protocol_64 *gop64,
struct efi_graphics_output_mode_info **info,
- unsigned long *size, u32 *fb_base)
+ unsigned long *size, u64 *fb_base)
{
struct efi_graphics_output_protocol_mode_64 *mode;
efi_status_t status;
@@ -753,7 +763,8 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
unsigned long nr_gops;
u16 width, height;
u32 pixels_per_scan_line;
- u32 fb_base;
+ u32 ext_lfb_base;
+ u64 fb_base;
struct efi_pixel_bitmask pixel_info;
int pixel_format;
efi_status_t status;
@@ -770,6 +781,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u64 h = handles[i];
+ u64 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop64);
@@ -781,7 +793,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query64(gop64, &info, &size, &fb_base);
+ status = __gop_query64(gop64, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -795,6 +807,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
@@ -816,6 +829,13 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
si->lfb_width = width;
si->lfb_height = height;
si->lfb_base = fb_base;
+
+ ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+ if (ext_lfb_base) {
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ si->ext_lfb_base = ext_lfb_base;
+ }
+
si->pages = 1;
setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2d6b309c8e9a..6236b9ec4b76 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -154,7 +154,7 @@ extra_header_fields:
#else
.quad 0 # ImageBase
#endif
- .long CONFIG_PHYSICAL_ALIGN # SectionAlignment
+ .long 0x20 # SectionAlignment
.long 0x20 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 80a0e4389c9a..bacaa13acac5 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -554,6 +554,11 @@ static int __init camellia_aesni_init(void)
{
const char *feature_name;
+ if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 3a45668f6dc3..94c18ebfd68c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -32,6 +32,10 @@
#include <asm/mpspec.h>
#include <asm/realmode.h>
+#ifdef CONFIG_ACPI_APEI
+# include <asm/pgtable_types.h>
+#endif
+
#ifdef CONFIG_ACPI
extern int acpi_lapic;
extern int acpi_ioapic;
@@ -147,4 +151,23 @@ extern int x86_acpi_numa_init(void);
#define acpi_unlazy_tlb(x) leave_mm(x)
+#ifdef CONFIG_ACPI_APEI
+static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
+{
+ /*
+ * We currently have no way to look up the EFI memory map
+ * attributes for a region in a consistent way, because the
+ * memmap is discarded after efi_free_boot_services(). So if
+ * you call efi_mem_attributes() during boot and at runtime,
+ * you could theoretically see different attributes.
+ *
+ * Since we are yet to see any x86 platforms that require
+ * anything other than PAGE_KERNEL (some arm64 platforms
+ * require the equivalent of PAGE_KERNEL_NOCACHE), return that
+ * until we know differently.
+ */
+ return PAGE_KERNEL;
+}
+#endif
+
#endif /* _ASM_X86_ACPI_H */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 1a5da2e63aee..3c56ef1ae068 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
-static inline u16 amd_get_node_id(struct pci_dev *pdev)
+static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
{
struct pci_dev *misc;
int i;
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index fb52aa644aab..ae5fb83e6d91 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -24,7 +24,7 @@
*/
static __always_inline int atomic_read(const atomic_t *v)
{
- return ACCESS_ONCE((v)->counter);
+ return READ_ONCE((v)->counter);
}
/**
@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
*/
static __always_inline void atomic_set(atomic_t *v, int i)
{
- v->counter = i;
+ WRITE_ONCE(v->counter, i);
}
/**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 50e33eff58de..037351022f54 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -18,7 +18,7 @@
*/
static inline long atomic64_read(const atomic64_t *v)
{
- return ACCESS_ONCE((v)->counter);
+ return READ_ONCE((v)->counter);
}
/**
@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
*/
static inline void atomic64_set(atomic64_t *v, long i)
{
- v->counter = i;
+ WRITE_ONCE(v->counter, i);
}
/**
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ae68be92f755..0010c78c4998 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -105,6 +105,7 @@ extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
extern pgd_t * __init efi_call_phys_prolog(void);
extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
+extern void __init efi_print_memmap(void);
extern void __init efi_unmap_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5fa9fb0f8809..cc285ec4b2c1 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,10 +63,10 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
-extern int boot_hpet_disable;
+extern bool boot_hpet_disable;
extern u8 hpet_blockid;
-extern int hpet_force_user;
-extern u8 hpet_msi_disable;
+extern bool hpet_force_user;
+extern bool hpet_msi_disable;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee0382088..3a36ee704c30 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1226,10 +1226,8 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2dbc0bf2b9f3..2ea4527e462f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -123,19 +123,27 @@ struct mca_config {
};
struct mce_vendor_flags {
- /*
- * overflow recovery cpuid bit indicates that overflow
- * conditions are not fatal
- */
- __u64 overflow_recov : 1,
-
- /*
- * SUCCOR stands for S/W UnCorrectable error COntainment
- * and Recovery. It indicates support for data poisoning
- * in HW and deferred error interrupts.
- */
- succor : 1,
- __reserved_0 : 62;
+ /*
+ * Indicates that overflow conditions are not fatal, when set.
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+ * Recovery. It indicates support for data poisoning in HW and deferred
+ * error interrupts.
+ */
+ succor : 1,
+
+ /*
+ * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+ * the register space for each MCA bank and also increases number of
+ * banks. Also, to accommodate the new banks and registers, the MCA
+ * register space is moved to a new MSR range.
+ */
+ smca : 1,
+
+ __reserved_0 : 61;
};
extern struct mce_vendor_flags mce_flags;
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 9e6278c7140e..34e62b1dcfce 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -27,7 +27,6 @@ struct cpu_signature {
struct device;
enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
-extern bool dis_ucode_ldr;
struct microcode_ops {
enum ucode_state (*request_microcode_user) (int cpu,
@@ -55,6 +54,12 @@ struct ucode_cpu_info {
};
extern struct ucode_cpu_info ucode_cpu_info[];
+#ifdef CONFIG_MICROCODE
+int __init microcode_init(void);
+#else
+static inline int __init microcode_init(void) { return 0; };
+#endif
+
#ifdef CONFIG_MICROCODE_INTEL
extern struct microcode_ops * __init init_intel_microcode(void);
#else
@@ -75,7 +80,6 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
static inline void __exit exit_amd_microcode(void) {}
#endif
-#ifdef CONFIG_MICROCODE_EARLY
#define MAX_UCODE_COUNT 128
#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -150,22 +154,18 @@ static inline unsigned int x86_model(unsigned int sig)
return model;
}
+#ifdef CONFIG_MICROCODE
extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
extern int __init save_microcode_in_initrd(void);
void reload_early_microcode(void);
extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
#else
-static inline void __init load_ucode_bsp(void) {}
-static inline void load_ucode_ap(void) {}
-static inline int __init save_microcode_in_initrd(void)
-{
- return 0;
-}
-static inline void reload_early_microcode(void) {}
-static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name)
-{
- return false;
-}
+static inline void __init load_ucode_bsp(void) { }
+static inline void load_ucode_ap(void) { }
+static inline int __init save_microcode_in_initrd(void) { return 0; }
+static inline void reload_early_microcode(void) { }
+static inline bool
+get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index ac6d328977a6..adfc847a395e 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -64,7 +64,7 @@ extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, s
#define PATCH_MAX_SIZE PAGE_SIZE
extern u8 amd_ucode_patch[PATCH_MAX_SIZE];
-#ifdef CONFIG_MICROCODE_AMD_EARLY
+#ifdef CONFIG_MICROCODE_AMD
extern void __init load_ucode_amd_bsp(unsigned int family);
extern void load_ucode_amd_ap(void);
extern int __init save_microcode_in_initrd_amd(void);
@@ -76,4 +76,5 @@ static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
void reload_ucode_amd(void) {}
#endif
+extern bool check_current_patch_level(u32 *rev, bool early);
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 7991c606125d..8559b0102ea1 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -57,7 +57,7 @@ extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev);
extern int microcode_sanity_check(void *mc, int print_err);
extern int find_matching_signature(void *mc, unsigned int csig, int cpf);
-#ifdef CONFIG_MICROCODE_INTEL_EARLY
+#ifdef CONFIG_MICROCODE_INTEL
extern void __init load_ucode_intel_bsp(void);
extern void load_ucode_intel_ap(void);
extern void show_ucode_info_early(void);
@@ -71,13 +71,9 @@ static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL;
static inline void reload_ucode_intel(void) {}
#endif
-#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
+#ifdef CONFIG_HOTPLUG_CPU
extern int save_mc_for_early(u8 *mc);
#else
-static inline int save_mc_for_early(u8 *mc)
-{
- return 0;
-}
+static inline int save_mc_for_early(u8 *mc) { return 0; }
#endif
-
#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index e4661196994e..ff8b9a17dc4b 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
function. */
#define __HAVE_ARCH_MEMCPY 1
+extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#ifndef CONFIG_KMEMCHECK
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-extern void *memcpy(void *to, const void *from, size_t len);
-#else
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
#define memcpy(dst, src, len) \
({ \
size_t __len = (len); \
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 76880ede9a35..03429da2fa80 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -2,7 +2,7 @@
#define _UAPI_ASM_X86_MCE_H
#include <linux/types.h>
-#include <asm/ioctls.h>
+#include <linux/ioctl.h>
/* Fields are zero when not available */
struct mce {
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c60bb162622..4f2821527014 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2547,7 +2547,9 @@ void __init setup_ioapic_dest(void)
mask = apic->target_cpus();
chip = irq_data_get_irq_chip(idata);
- chip->irq_set_affinity(idata, mask, false);
+ /* Might be lapic_chip for irq 0 */
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(idata, mask, false);
}
}
#endif
@@ -2907,6 +2909,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_data *irq_data;
struct mp_chip_data *data;
struct irq_alloc_info *info = arg;
+ unsigned long flags;
if (!info || nr_irqs > 1)
return -EINVAL;
@@ -2939,11 +2942,14 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
cfg = irqd_cfg(irq_data);
add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+
+ local_irq_save(flags);
if (info->ioapic_entry)
mp_setup_entry(cfg, data, info->ioapic_entry);
mp_register_handler(virq, data->trigger);
if (virq < nr_legacy_irqs())
legacy_pic->mask(virq);
+ local_irq_restore(flags);
apic_printk(APIC_VERBOSE, KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4eb065c6bed2..58031303e304 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
perf_event_intel_uncore_snb.o \
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index be4febc58b94..e38d338a6447 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -157,7 +157,7 @@ struct _cpuid4_info_regs {
struct amd_northbridge *nb;
};
-unsigned short num_cache_leaves;
+static unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
@@ -326,7 +326,7 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb)
*
* @returns: the disabled index if used or negative value if slot free.
*/
-int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
+static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
{
unsigned int reg = 0;
@@ -403,8 +403,8 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
*
* @return: 0 on success, error status on failure
*/
-int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
- unsigned long index)
+static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
+ unsigned slot, unsigned long index)
{
int ret = 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9d014b82a124..c5b0d562dbf5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1586,6 +1586,8 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
winchip_mcheck_init(c);
return 1;
break;
+ default:
+ return 0;
}
return 0;
@@ -1605,6 +1607,8 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_amd_feature_init(c);
mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1));
+ mce_flags.smca = !!(ebx & BIT(3));
+
break;
}
@@ -2042,7 +2046,7 @@ int __init mcheck_init(void)
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
-static int mce_disable_error_reporting(void)
+static void mce_disable_error_reporting(void)
{
int i;
@@ -2052,17 +2056,32 @@ static int mce_disable_error_reporting(void)
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
- return 0;
+ return;
+}
+
+static void vendor_disable_error_reporting(void)
+{
+ /*
+ * Don't clear on Intel CPUs. Some of these MSRs are socket-wide.
+ * Disabling them for just a single offlined CPU is bad, since it will
+ * inhibit reporting for all shared resources on the socket like the
+ * last level cache (LLC), the integrated memory controller (iMC), etc.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return;
+
+ mce_disable_error_reporting();
}
static int mce_syscore_suspend(void)
{
- return mce_disable_error_reporting();
+ vendor_disable_error_reporting();
+ return 0;
}
static void mce_syscore_shutdown(void)
{
- mce_disable_error_reporting();
+ vendor_disable_error_reporting();
}
/*
@@ -2342,19 +2361,14 @@ static void mce_device_remove(unsigned int cpu)
static void mce_disable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
- int i;
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
- for (i = 0; i < mca_cfg.banks; i++) {
- struct mce_bank *b = &mce_banks[i];
- if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), 0);
- }
+ vendor_disable_error_reporting();
}
static void mce_reenable_cpu(void *h)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1af51b1586d7..2c5aaf8c2e2f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -503,14 +503,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
return;
}
- /* Check whether a vector already exists */
- if (h & APIC_VECTOR_MASK) {
- printk(KERN_DEBUG
- "CPU%d: Thermal LVT vector (%#x) already installed\n",
- cpu, (h & APIC_VECTOR_MASK));
- return;
- }
-
/* early Pentium M models use different method for enabling TM2 */
if (cpu_has(c, X86_FEATURE_TM2)) {
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile
index 285c85427c32..220b1a508513 100644
--- a/arch/x86/kernel/cpu/microcode/Makefile
+++ b/arch/x86/kernel/cpu/microcode/Makefile
@@ -2,6 +2,3 @@ microcode-y := core.o
obj-$(CONFIG_MICROCODE) += microcode.o
microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o
microcode-$(CONFIG_MICROCODE_AMD) += amd.o
-obj-$(CONFIG_MICROCODE_EARLY) += core_early.o
-obj-$(CONFIG_MICROCODE_INTEL_EARLY) += intel_early.o
-obj-$(CONFIG_MICROCODE_AMD_EARLY) += amd_early.o
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 12829c3ced3c..2233f8a76615 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -1,5 +1,9 @@
/*
* AMD CPU Microcode Update Driver for Linux
+ *
+ * This driver allows to upgrade microcode on F10h AMD
+ * CPUs and later.
+ *
* Copyright (C) 2008-2011 Advanced Micro Devices Inc.
*
* Author: Peter Oruba <peter.oruba@amd.com>
@@ -7,34 +11,31 @@
* Based on work by:
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
*
- * Maintainers:
- * Andreas Herrmann <herrmann.der.user@googlemail.com>
- * Borislav Petkov <bp@alien8.de>
+ * early loader:
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
*
- * This driver allows to upgrade microcode on F10h AMD
- * CPUs and later.
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ * Fixes: Borislav Petkov <bp@suse.de>
*
* Licensed under the terms of the GNU General Public
* License version 2. See file COPYING for details.
*/
+#define pr_fmt(fmt) "microcode: " fmt
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
+#include <linux/earlycpio.h>
#include <linux/firmware.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/initrd.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/pci.h>
+#include <asm/microcode_amd.h>
#include <asm/microcode.h>
#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/cpu.h>
#include <asm/msr.h>
-#include <asm/microcode_amd.h>
-
-MODULE_DESCRIPTION("AMD Microcode Update Driver");
-MODULE_AUTHOR("Peter Oruba");
-MODULE_LICENSE("GPL v2");
static struct equiv_cpu_entry *equiv_cpu_table;
@@ -47,6 +48,432 @@ struct ucode_patch {
static LIST_HEAD(pcache);
+/*
+ * This points to the current valid container of microcode patches which we will
+ * save from the initrd before jettisoning its contents.
+ */
+static u8 *container;
+static size_t container_size;
+
+static u32 ucode_new_rev;
+u8 amd_ucode_patch[PATCH_MAX_SIZE];
+static u16 this_equiv_id;
+
+static struct cpio_data ucode_cpio;
+
+/*
+ * Microcode patch container file is prepended to the initrd in cpio format.
+ * See Documentation/x86/early-microcode.txt
+ */
+static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
+
+static struct cpio_data __init find_ucode_in_initrd(void)
+{
+ long offset = 0;
+ char *path;
+ void *start;
+ size_t size;
+
+#ifdef CONFIG_X86_32
+ struct boot_params *p;
+
+ /*
+ * On 32-bit, early load occurs before paging is turned on so we need
+ * to use physical addresses.
+ */
+ p = (struct boot_params *)__pa_nodebug(&boot_params);
+ path = (char *)__pa_nodebug(ucode_path);
+ start = (void *)p->hdr.ramdisk_image;
+ size = p->hdr.ramdisk_size;
+#else
+ path = ucode_path;
+ start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
+ size = boot_params.hdr.ramdisk_size;
+#endif
+
+ return find_cpio_data(path, start, size, &offset);
+}
+
+static size_t compute_container_size(u8 *data, u32 total_size)
+{
+ size_t size = 0;
+ u32 *header = (u32 *)data;
+
+ if (header[0] != UCODE_MAGIC ||
+ header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+ header[2] == 0) /* size */
+ return size;
+
+ size = header[2] + CONTAINER_HDR_SZ;
+ total_size -= size;
+ data += size;
+
+ while (total_size) {
+ u16 patch_size;
+
+ header = (u32 *)data;
+
+ if (header[0] != UCODE_UCODE_TYPE)
+ break;
+
+ /*
+ * Sanity-check patch size.
+ */
+ patch_size = header[1];
+ if (patch_size > PATCH_MAX_SIZE)
+ break;
+
+ size += patch_size + SECTION_HDR_SIZE;
+ data += patch_size + SECTION_HDR_SIZE;
+ total_size -= patch_size + SECTION_HDR_SIZE;
+ }
+
+ return size;
+}
+
+/*
+ * Early load occurs before we can vmalloc(). So we look for the microcode
+ * patch container file in initrd, traverse equivalent cpu table, look for a
+ * matching microcode patch, and update, all in initrd memory in place.
+ * When vmalloc() is available for use later -- on 64-bit during first AP load,
+ * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
+ * load_microcode_amd() to save equivalent cpu table and microcode patches in
+ * kernel heap memory.
+ */
+static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
+{
+ struct equiv_cpu_entry *eq;
+ size_t *cont_sz;
+ u32 *header;
+ u8 *data, **cont;
+ u8 (*patch)[PATCH_MAX_SIZE];
+ u16 eq_id = 0;
+ int offset, left;
+ u32 rev, eax, ebx, ecx, edx;
+ u32 *new_rev;
+
+#ifdef CONFIG_X86_32
+ new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+ cont_sz = (size_t *)__pa_nodebug(&container_size);
+ cont = (u8 **)__pa_nodebug(&container);
+ patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
+#else
+ new_rev = &ucode_new_rev;
+ cont_sz = &container_size;
+ cont = &container;
+ patch = &amd_ucode_patch;
+#endif
+
+ data = ucode;
+ left = size;
+ header = (u32 *)data;
+
+ /* find equiv cpu table */
+ if (header[0] != UCODE_MAGIC ||
+ header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+ header[2] == 0) /* size */
+ return;
+
+ eax = 0x00000001;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ while (left > 0) {
+ eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+
+ *cont = data;
+
+ /* Advance past the container header */
+ offset = header[2] + CONTAINER_HDR_SZ;
+ data += offset;
+ left -= offset;
+
+ eq_id = find_equiv_id(eq, eax);
+ if (eq_id) {
+ this_equiv_id = eq_id;
+ *cont_sz = compute_container_size(*cont, left + offset);
+
+ /*
+ * truncate how much we need to iterate over in the
+ * ucode update loop below
+ */
+ left = *cont_sz - offset;
+ break;
+ }
+
+ /*
+ * support multiple container files appended together. if this
+ * one does not have a matching equivalent cpu entry, we fast
+ * forward to the next container file.
+ */
+ while (left > 0) {
+ header = (u32 *)data;
+ if (header[0] == UCODE_MAGIC &&
+ header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
+ break;
+
+ offset = header[1] + SECTION_HDR_SIZE;
+ data += offset;
+ left -= offset;
+ }
+
+ /* mark where the next microcode container file starts */
+ offset = data - (u8 *)ucode;
+ ucode = data;
+ }
+
+ if (!eq_id) {
+ *cont = NULL;
+ *cont_sz = 0;
+ return;
+ }
+
+ if (check_current_patch_level(&rev, true))
+ return;
+
+ while (left > 0) {
+ struct microcode_amd *mc;
+
+ header = (u32 *)data;
+ if (header[0] != UCODE_UCODE_TYPE || /* type */
+ header[1] == 0) /* size */
+ break;
+
+ mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+
+ if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
+
+ if (!__apply_microcode_amd(mc)) {
+ rev = mc->hdr.patch_id;
+ *new_rev = rev;
+
+ if (save_patch)
+ memcpy(patch, mc,
+ min_t(u32, header[1], PATCH_MAX_SIZE));
+ }
+ }
+
+ offset = header[1] + SECTION_HDR_SIZE;
+ data += offset;
+ left -= offset;
+ }
+}
+
+static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
+ unsigned int family)
+{
+#ifdef CONFIG_X86_64
+ char fw_name[36] = "amd-ucode/microcode_amd.bin";
+
+ if (family >= 0x15)
+ snprintf(fw_name, sizeof(fw_name),
+ "amd-ucode/microcode_amd_fam%.2xh.bin", family);
+
+ return get_builtin_firmware(cp, fw_name);
+#else
+ return false;
+#endif
+}
+
+void __init load_ucode_amd_bsp(unsigned int family)
+{
+ struct cpio_data cp;
+ void **data;
+ size_t *size;
+
+#ifdef CONFIG_X86_32
+ data = (void **)__pa_nodebug(&ucode_cpio.data);
+ size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+#else
+ data = &ucode_cpio.data;
+ size = &ucode_cpio.size;
+#endif
+
+ cp = find_ucode_in_initrd();
+ if (!cp.data) {
+ if (!load_builtin_amd_microcode(&cp, family))
+ return;
+ }
+
+ *data = cp.data;
+ *size = cp.size;
+
+ apply_ucode_in_initrd(cp.data, cp.size, true);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * On 32-bit, since AP's early load occurs before paging is turned on, we
+ * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
+ * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
+ * which is used upon resume from suspend.
+ */
+void load_ucode_amd_ap(void)
+{
+ struct microcode_amd *mc;
+ size_t *usize;
+ void **ucode;
+
+ mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
+ if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
+ __apply_microcode_amd(mc);
+ return;
+ }
+
+ ucode = (void *)__pa_nodebug(&container);
+ usize = (size_t *)__pa_nodebug(&container_size);
+
+ if (!*ucode || !*usize)
+ return;
+
+ apply_ucode_in_initrd(*ucode, *usize, false);
+}
+
+static void __init collect_cpu_sig_on_bsp(void *arg)
+{
+ unsigned int cpu = smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ uci->cpu_sig.sig = cpuid_eax(0x00000001);
+}
+
+static void __init get_bsp_sig(void)
+{
+ unsigned int bsp = boot_cpu_data.cpu_index;
+ struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+ if (!uci->cpu_sig.sig)
+ smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+}
+#else
+void load_ucode_amd_ap(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct equiv_cpu_entry *eq;
+ struct microcode_amd *mc;
+ u32 rev, eax;
+ u16 eq_id;
+
+ /* Exit if called on the BSP. */
+ if (!cpu)
+ return;
+
+ if (!container)
+ return;
+
+ /*
+ * 64-bit runs with paging enabled, thus early==false.
+ */
+ if (check_current_patch_level(&rev, false))
+ return;
+
+ eax = cpuid_eax(0x00000001);
+ eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+
+ eq_id = find_equiv_id(eq, eax);
+ if (!eq_id)
+ return;
+
+ if (eq_id == this_equiv_id) {
+ mc = (struct microcode_amd *)amd_ucode_patch;
+
+ if (mc && rev < mc->hdr.patch_id) {
+ if (!__apply_microcode_amd(mc))
+ ucode_new_rev = mc->hdr.patch_id;
+ }
+
+ } else {
+ if (!ucode_cpio.data)
+ return;
+
+ /*
+ * AP has a different equivalence ID than BSP, looks like
+ * mixed-steppings silicon so go through the ucode blob anew.
+ */
+ apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
+ }
+}
+#endif
+
+int __init save_microcode_in_initrd_amd(void)
+{
+ unsigned long cont;
+ int retval = 0;
+ enum ucode_state ret;
+ u8 *cont_va;
+ u32 eax;
+
+ if (!container)
+ return -EINVAL;
+
+#ifdef CONFIG_X86_32
+ get_bsp_sig();
+ cont = (unsigned long)container;
+ cont_va = __va(container);
+#else
+ /*
+ * We need the physical address of the container for both bitness since
+ * boot_params.hdr.ramdisk_image is a physical address.
+ */
+ cont = __pa(container);
+ cont_va = container;
+#endif
+
+ /*
+ * Take into account the fact that the ramdisk might get relocated and
+ * therefore we need to recompute the container's position in virtual
+ * memory space.
+ */
+ if (relocated_ramdisk)
+ container = (u8 *)(__va(relocated_ramdisk) +
+ (cont - boot_params.hdr.ramdisk_image));
+ else
+ container = cont_va;
+
+ if (ucode_new_rev)
+ pr_info("microcode: updated early to new patch_level=0x%08x\n",
+ ucode_new_rev);
+
+ eax = cpuid_eax(0x00000001);
+ eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+
+ ret = load_microcode_amd(smp_processor_id(), eax, container, container_size);
+ if (ret != UCODE_OK)
+ retval = -EINVAL;
+
+ /*
+ * This will be freed any msec now, stash patches for the current
+ * family and switch to patch cache for cpu hotplug, etc later.
+ */
+ container = NULL;
+ container_size = 0;
+
+ return retval;
+}
+
+void reload_ucode_amd(void)
+{
+ struct microcode_amd *mc;
+ u32 rev;
+
+ /*
+ * early==false because this is a syscore ->resume path and by
+ * that time paging is long enabled.
+ */
+ if (check_current_patch_level(&rev, false))
+ return;
+
+ mc = (struct microcode_amd *)amd_ucode_patch;
+
+ if (mc && rev < mc->hdr.patch_id) {
+ if (!__apply_microcode_amd(mc)) {
+ ucode_new_rev = mc->hdr.patch_id;
+ pr_info("microcode: reload patch_level=0x%08x\n",
+ ucode_new_rev);
+ }
+ }
+}
static u16 __find_equiv_id(unsigned int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -177,6 +604,53 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
return patch_size;
}
+/*
+ * Those patch levels cannot be updated to newer ones and thus should be final.
+ */
+static u32 final_levels[] = {
+ 0x01000098,
+ 0x0100009f,
+ 0x010000af,
+ 0, /* T-101 terminator */
+};
+
+/*
+ * Check the current patch level on this CPU.
+ *
+ * @rev: Use it to return the patch level. It is set to 0 in the case of
+ * error.
+ *
+ * Returns:
+ * - true: if update should stop
+ * - false: otherwise
+ */
+bool check_current_patch_level(u32 *rev, bool early)
+{
+ u32 lvl, dummy, i;
+ bool ret = false;
+ u32 *levels;
+
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
+
+ if (IS_ENABLED(CONFIG_X86_32) && early)
+ levels = (u32 *)__pa_nodebug(&final_levels);
+ else
+ levels = final_levels;
+
+ for (i = 0; levels[i]; i++) {
+ if (lvl == levels[i]) {
+ lvl = 0;
+ ret = true;
+ break;
+ }
+ }
+
+ if (rev)
+ *rev = lvl;
+
+ return ret;
+}
+
int __apply_microcode_amd(struct microcode_amd *mc_amd)
{
u32 rev, dummy;
@@ -197,7 +671,7 @@ int apply_microcode_amd(int cpu)
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
- u32 rev, dummy;
+ u32 rev;
BUG_ON(raw_smp_processor_id() != cpu);
@@ -210,7 +684,8 @@ int apply_microcode_amd(int cpu)
mc_amd = p->data;
uci->mc = p->data;
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+ if (check_current_patch_level(&rev, false))
+ return -1;
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
@@ -387,7 +862,7 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s
if (ret != UCODE_OK)
cleanup();
-#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
+#ifdef CONFIG_X86_32
/* save BSP's matching patch for early load */
if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
struct ucode_patch *p = find_patch(cpu);
@@ -475,7 +950,7 @@ static struct microcode_ops microcode_amd_ops = {
struct microcode_ops * __init init_amd_microcode(void)
{
- struct cpuinfo_x86 *c = &cpu_data(0);
+ struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
deleted file mode 100644
index e8a215a9a345..000000000000
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Jacob Shin <jacob.shin@amd.com>
- * Fixes: Borislav Petkov <bp@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-#include <asm/cpu.h>
-#include <asm/setup.h>
-#include <asm/microcode_amd.h>
-
-/*
- * This points to the current valid container of microcode patches which we will
- * save from the initrd before jettisoning its contents.
- */
-static u8 *container;
-static size_t container_size;
-
-static u32 ucode_new_rev;
-u8 amd_ucode_patch[PATCH_MAX_SIZE];
-static u16 this_equiv_id;
-
-static struct cpio_data ucode_cpio;
-
-/*
- * Microcode patch container file is prepended to the initrd in cpio format.
- * See Documentation/x86/early-microcode.txt
- */
-static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
-
-static struct cpio_data __init find_ucode_in_initrd(void)
-{
- long offset = 0;
- char *path;
- void *start;
- size_t size;
-
-#ifdef CONFIG_X86_32
- struct boot_params *p;
-
- /*
- * On 32-bit, early load occurs before paging is turned on so we need
- * to use physical addresses.
- */
- p = (struct boot_params *)__pa_nodebug(&boot_params);
- path = (char *)__pa_nodebug(ucode_path);
- start = (void *)p->hdr.ramdisk_image;
- size = p->hdr.ramdisk_size;
-#else
- path = ucode_path;
- start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
- size = boot_params.hdr.ramdisk_size;
-#endif
-
- return find_cpio_data(path, start, size, &offset);
-}
-
-static size_t compute_container_size(u8 *data, u32 total_size)
-{
- size_t size = 0;
- u32 *header = (u32 *)data;
-
- if (header[0] != UCODE_MAGIC ||
- header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
- header[2] == 0) /* size */
- return size;
-
- size = header[2] + CONTAINER_HDR_SZ;
- total_size -= size;
- data += size;
-
- while (total_size) {
- u16 patch_size;
-
- header = (u32 *)data;
-
- if (header[0] != UCODE_UCODE_TYPE)
- break;
-
- /*
- * Sanity-check patch size.
- */
- patch_size = header[1];
- if (patch_size > PATCH_MAX_SIZE)
- break;
-
- size += patch_size + SECTION_HDR_SIZE;
- data += patch_size + SECTION_HDR_SIZE;
- total_size -= patch_size + SECTION_HDR_SIZE;
- }
-
- return size;
-}
-
-/*
- * Early load occurs before we can vmalloc(). So we look for the microcode
- * patch container file in initrd, traverse equivalent cpu table, look for a
- * matching microcode patch, and update, all in initrd memory in place.
- * When vmalloc() is available for use later -- on 64-bit during first AP load,
- * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
- * load_microcode_amd() to save equivalent cpu table and microcode patches in
- * kernel heap memory.
- */
-static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
-{
- struct equiv_cpu_entry *eq;
- size_t *cont_sz;
- u32 *header;
- u8 *data, **cont;
- u8 (*patch)[PATCH_MAX_SIZE];
- u16 eq_id = 0;
- int offset, left;
- u32 rev, eax, ebx, ecx, edx;
- u32 *new_rev;
-
-#ifdef CONFIG_X86_32
- new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
- cont_sz = (size_t *)__pa_nodebug(&container_size);
- cont = (u8 **)__pa_nodebug(&container);
- patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
-#else
- new_rev = &ucode_new_rev;
- cont_sz = &container_size;
- cont = &container;
- patch = &amd_ucode_patch;
-#endif
-
- data = ucode;
- left = size;
- header = (u32 *)data;
-
- /* find equiv cpu table */
- if (header[0] != UCODE_MAGIC ||
- header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
- header[2] == 0) /* size */
- return;
-
- eax = 0x00000001;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
-
- while (left > 0) {
- eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
-
- *cont = data;
-
- /* Advance past the container header */
- offset = header[2] + CONTAINER_HDR_SZ;
- data += offset;
- left -= offset;
-
- eq_id = find_equiv_id(eq, eax);
- if (eq_id) {
- this_equiv_id = eq_id;
- *cont_sz = compute_container_size(*cont, left + offset);
-
- /*
- * truncate how much we need to iterate over in the
- * ucode update loop below
- */
- left = *cont_sz - offset;
- break;
- }
-
- /*
- * support multiple container files appended together. if this
- * one does not have a matching equivalent cpu entry, we fast
- * forward to the next container file.
- */
- while (left > 0) {
- header = (u32 *)data;
- if (header[0] == UCODE_MAGIC &&
- header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
- break;
-
- offset = header[1] + SECTION_HDR_SIZE;
- data += offset;
- left -= offset;
- }
-
- /* mark where the next microcode container file starts */
- offset = data - (u8 *)ucode;
- ucode = data;
- }
-
- if (!eq_id) {
- *cont = NULL;
- *cont_sz = 0;
- return;
- }
-
- /* find ucode and update if needed */
-
- native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
- while (left > 0) {
- struct microcode_amd *mc;
-
- header = (u32 *)data;
- if (header[0] != UCODE_UCODE_TYPE || /* type */
- header[1] == 0) /* size */
- break;
-
- mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
-
- if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
-
- if (!__apply_microcode_amd(mc)) {
- rev = mc->hdr.patch_id;
- *new_rev = rev;
-
- if (save_patch)
- memcpy(patch, mc,
- min_t(u32, header[1], PATCH_MAX_SIZE));
- }
- }
-
- offset = header[1] + SECTION_HDR_SIZE;
- data += offset;
- left -= offset;
- }
-}
-
-static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
- unsigned int family)
-{
-#ifdef CONFIG_X86_64
- char fw_name[36] = "amd-ucode/microcode_amd.bin";
-
- if (family >= 0x15)
- snprintf(fw_name, sizeof(fw_name),
- "amd-ucode/microcode_amd_fam%.2xh.bin", family);
-
- return get_builtin_firmware(cp, fw_name);
-#else
- return false;
-#endif
-}
-
-void __init load_ucode_amd_bsp(unsigned int family)
-{
- struct cpio_data cp;
- void **data;
- size_t *size;
-
-#ifdef CONFIG_X86_32
- data = (void **)__pa_nodebug(&ucode_cpio.data);
- size = (size_t *)__pa_nodebug(&ucode_cpio.size);
-#else
- data = &ucode_cpio.data;
- size = &ucode_cpio.size;
-#endif
-
- cp = find_ucode_in_initrd();
- if (!cp.data) {
- if (!load_builtin_amd_microcode(&cp, family))
- return;
- }
-
- *data = cp.data;
- *size = cp.size;
-
- apply_ucode_in_initrd(cp.data, cp.size, true);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * On 32-bit, since AP's early load occurs before paging is turned on, we
- * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
- * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
- * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
- * which is used upon resume from suspend.
- */
-void load_ucode_amd_ap(void)
-{
- struct microcode_amd *mc;
- size_t *usize;
- void **ucode;
-
- mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
- if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
- __apply_microcode_amd(mc);
- return;
- }
-
- ucode = (void *)__pa_nodebug(&container);
- usize = (size_t *)__pa_nodebug(&container_size);
-
- if (!*ucode || !*usize)
- return;
-
- apply_ucode_in_initrd(*ucode, *usize, false);
-}
-
-static void __init collect_cpu_sig_on_bsp(void *arg)
-{
- unsigned int cpu = smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- uci->cpu_sig.sig = cpuid_eax(0x00000001);
-}
-
-static void __init get_bsp_sig(void)
-{
- unsigned int bsp = boot_cpu_data.cpu_index;
- struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
-
- if (!uci->cpu_sig.sig)
- smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
-}
-#else
-void load_ucode_amd_ap(void)
-{
- unsigned int cpu = smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- struct equiv_cpu_entry *eq;
- struct microcode_amd *mc;
- u32 rev, eax;
- u16 eq_id;
-
- /* Exit if called on the BSP. */
- if (!cpu)
- return;
-
- if (!container)
- return;
-
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
- uci->cpu_sig.rev = rev;
- uci->cpu_sig.sig = eax;
-
- eax = cpuid_eax(0x00000001);
- eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
-
- eq_id = find_equiv_id(eq, eax);
- if (!eq_id)
- return;
-
- if (eq_id == this_equiv_id) {
- mc = (struct microcode_amd *)amd_ucode_patch;
-
- if (mc && rev < mc->hdr.patch_id) {
- if (!__apply_microcode_amd(mc))
- ucode_new_rev = mc->hdr.patch_id;
- }
-
- } else {
- if (!ucode_cpio.data)
- return;
-
- /*
- * AP has a different equivalence ID than BSP, looks like
- * mixed-steppings silicon so go through the ucode blob anew.
- */
- apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
- }
-}
-#endif
-
-int __init save_microcode_in_initrd_amd(void)
-{
- unsigned long cont;
- int retval = 0;
- enum ucode_state ret;
- u8 *cont_va;
- u32 eax;
-
- if (!container)
- return -EINVAL;
-
-#ifdef CONFIG_X86_32
- get_bsp_sig();
- cont = (unsigned long)container;
- cont_va = __va(container);
-#else
- /*
- * We need the physical address of the container for both bitness since
- * boot_params.hdr.ramdisk_image is a physical address.
- */
- cont = __pa(container);
- cont_va = container;
-#endif
-
- /*
- * Take into account the fact that the ramdisk might get relocated and
- * therefore we need to recompute the container's position in virtual
- * memory space.
- */
- if (relocated_ramdisk)
- container = (u8 *)(__va(relocated_ramdisk) +
- (cont - boot_params.hdr.ramdisk_image));
- else
- container = cont_va;
-
- if (ucode_new_rev)
- pr_info("microcode: updated early to new patch_level=0x%08x\n",
- ucode_new_rev);
-
- eax = cpuid_eax(0x00000001);
- eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
-
- ret = load_microcode_amd(smp_processor_id(), eax, container, container_size);
- if (ret != UCODE_OK)
- retval = -EINVAL;
-
- /*
- * This will be freed any msec now, stash patches for the current
- * family and switch to patch cache for cpu hotplug, etc later.
- */
- container = NULL;
- container_size = 0;
-
- return retval;
-}
-
-void reload_ucode_amd(void)
-{
- struct microcode_amd *mc;
- u32 rev, eax;
-
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
- mc = (struct microcode_amd *)amd_ucode_patch;
-
- if (mc && rev < mc->hdr.patch_id) {
- if (!__apply_microcode_amd(mc)) {
- ucode_new_rev = mc->hdr.patch_id;
- pr_info("microcode: reload patch_level=0x%08x\n",
- ucode_new_rev);
- }
- }
-}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 9e3f3c7dd5d7..7fc27f1cca58 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -5,6 +5,12 @@
* 2006 Shaohua Li <shaohua.li@intel.com>
* 2013-2015 Borislav Petkov <bp@alien8.de>
*
+ * X86 CPU microcode early update for Linux:
+ *
+ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ * H Peter Anvin" <hpa@zytor.com>
+ * (C) 2015 Borislav Petkov <bp@alien8.de>
+ *
* This driver allows to upgrade microcode on x86 processors.
*
* This program is free software; you can redistribute it and/or
@@ -13,34 +19,39 @@
* 2 of the License, or (at your option) any later version.
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define pr_fmt(fmt) "microcode: " fmt
#include <linux/platform_device.h>
+#include <linux/syscore_ops.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
+#include <linux/firmware.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/fs.h>
#include <linux/mm.h>
-#include <linux/syscore_ops.h>
-#include <asm/microcode.h>
-#include <asm/processor.h>
+#include <asm/microcode_intel.h>
#include <asm/cpu_device_id.h>
+#include <asm/microcode_amd.h>
#include <asm/perf_event.h>
+#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/cmdline.h>
-MODULE_DESCRIPTION("Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
-
-#define MICROCODE_VERSION "2.00"
+#define MICROCODE_VERSION "2.01"
static struct microcode_ops *microcode_ops;
-bool dis_ucode_ldr;
-module_param(dis_ucode_ldr, bool, 0);
+static bool dis_ucode_ldr;
+
+static int __init disable_loader(char *str)
+{
+ dis_ucode_ldr = true;
+ return 1;
+}
+__setup("dis_ucode_ldr", disable_loader);
/*
* Synchronization.
@@ -68,6 +79,150 @@ struct cpu_info_ctx {
int err;
};
+static bool __init check_loader_disabled_bsp(void)
+{
+#ifdef CONFIG_X86_32
+ const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
+ const char *opt = "dis_ucode_ldr";
+ const char *option = (const char *)__pa_nodebug(opt);
+ bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
+
+#else /* CONFIG_X86_64 */
+ const char *cmdline = boot_command_line;
+ const char *option = "dis_ucode_ldr";
+ bool *res = &dis_ucode_ldr;
+#endif
+
+ if (cmdline_find_option_bool(cmdline, option))
+ *res = true;
+
+ return *res;
+}
+
+extern struct builtin_fw __start_builtin_fw[];
+extern struct builtin_fw __end_builtin_fw[];
+
+bool get_builtin_firmware(struct cpio_data *cd, const char *name)
+{
+#ifdef CONFIG_FW_LOADER
+ struct builtin_fw *b_fw;
+
+ for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
+ if (!strcmp(name, b_fw->name)) {
+ cd->size = b_fw->size;
+ cd->data = b_fw->data;
+ return true;
+ }
+ }
+#endif
+ return false;
+}
+
+void __init load_ucode_bsp(void)
+{
+ int vendor;
+ unsigned int family;
+
+ if (check_loader_disabled_bsp())
+ return;
+
+ if (!have_cpuid_p())
+ return;
+
+ vendor = x86_vendor();
+ family = x86_family();
+
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (family >= 6)
+ load_ucode_intel_bsp();
+ break;
+ case X86_VENDOR_AMD:
+ if (family >= 0x10)
+ load_ucode_amd_bsp(family);
+ break;
+ default:
+ break;
+ }
+}
+
+static bool check_loader_disabled_ap(void)
+{
+#ifdef CONFIG_X86_32
+ return *((bool *)__pa_nodebug(&dis_ucode_ldr));
+#else
+ return dis_ucode_ldr;
+#endif
+}
+
+void load_ucode_ap(void)
+{
+ int vendor, family;
+
+ if (check_loader_disabled_ap())
+ return;
+
+ if (!have_cpuid_p())
+ return;
+
+ vendor = x86_vendor();
+ family = x86_family();
+
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (family >= 6)
+ load_ucode_intel_ap();
+ break;
+ case X86_VENDOR_AMD:
+ if (family >= 0x10)
+ load_ucode_amd_ap();
+ break;
+ default:
+ break;
+ }
+}
+
+int __init save_microcode_in_initrd(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (c->x86 >= 6)
+ save_microcode_in_initrd_intel();
+ break;
+ case X86_VENDOR_AMD:
+ if (c->x86 >= 0x10)
+ save_microcode_in_initrd_amd();
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void reload_early_microcode(void)
+{
+ int vendor, family;
+
+ vendor = x86_vendor();
+ family = x86_family();
+
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (family >= 6)
+ reload_ucode_intel();
+ break;
+ case X86_VENDOR_AMD:
+ if (family >= 0x10)
+ reload_ucode_amd();
+ break;
+ default:
+ break;
+ }
+}
+
static void collect_cpu_info_local(void *arg)
{
struct cpu_info_ctx *ctx = arg;
@@ -210,9 +365,6 @@ static void __exit microcode_dev_exit(void)
{
misc_deregister(&microcode_dev);
}
-
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
-MODULE_ALIAS("devname:cpu/microcode");
#else
#define microcode_dev_init() 0
#define microcode_dev_exit() do { } while (0)
@@ -463,20 +615,6 @@ static struct notifier_block mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
-#ifdef MODULE
-/* Autoload on Intel and AMD systems */
-static const struct x86_cpu_id __initconst microcode_id[] = {
-#ifdef CONFIG_MICROCODE_INTEL
- { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
-#endif
-#ifdef CONFIG_MICROCODE_AMD
- { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
-#endif
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, microcode_id);
-#endif
-
static struct attribute *cpu_root_microcode_attrs[] = {
&dev_attr_reload.attr,
NULL
@@ -487,9 +625,9 @@ static struct attribute_group cpu_root_microcode_group = {
.attrs = cpu_root_microcode_attrs,
};
-static int __init microcode_init(void)
+int __init microcode_init(void)
{
- struct cpuinfo_x86 *c = &cpu_data(0);
+ struct cpuinfo_x86 *c = &boot_cpu_data;
int error;
if (paravirt_enabled() || dis_ucode_ldr)
@@ -560,35 +698,3 @@ static int __init microcode_init(void)
return error;
}
-module_init(microcode_init);
-
-static void __exit microcode_exit(void)
-{
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- microcode_dev_exit();
-
- unregister_hotcpu_notifier(&mc_cpu_notifier);
- unregister_syscore_ops(&mc_syscore_ops);
-
- sysfs_remove_group(&cpu_subsys.dev_root->kobj,
- &cpu_root_microcode_group);
-
- get_online_cpus();
- mutex_lock(&microcode_mutex);
-
- subsys_interface_unregister(&mc_cpu_interface);
-
- mutex_unlock(&microcode_mutex);
- put_online_cpus();
-
- platform_device_unregister(microcode_pdev);
-
- microcode_ops = NULL;
-
- if (c->x86_vendor == X86_VENDOR_AMD)
- exit_amd_microcode();
-
- pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
-}
-module_exit(microcode_exit);
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
deleted file mode 100644
index 8ebc421d6299..000000000000
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * X86 CPU microcode early update for Linux
- *
- * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- * H Peter Anvin" <hpa@zytor.com>
- * (C) 2015 Borislav Petkov <bp@alien8.de>
- *
- * This driver allows to early upgrade microcode on Intel processors
- * belonging to IA-32 family - PentiumPro, Pentium II,
- * Pentium III, Xeon, Pentium 4, etc.
- *
- * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
- * Software Developer's Manual.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <linux/firmware.h>
-#include <asm/microcode.h>
-#include <asm/microcode_intel.h>
-#include <asm/microcode_amd.h>
-#include <asm/processor.h>
-#include <asm/cmdline.h>
-
-static bool __init check_loader_disabled_bsp(void)
-{
-#ifdef CONFIG_X86_32
- const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
- const char *opt = "dis_ucode_ldr";
- const char *option = (const char *)__pa_nodebug(opt);
- bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
-
-#else /* CONFIG_X86_64 */
- const char *cmdline = boot_command_line;
- const char *option = "dis_ucode_ldr";
- bool *res = &dis_ucode_ldr;
-#endif
-
- if (cmdline_find_option_bool(cmdline, option))
- *res = true;
-
- return *res;
-}
-
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
-
-bool get_builtin_firmware(struct cpio_data *cd, const char *name)
-{
-#ifdef CONFIG_FW_LOADER
- struct builtin_fw *b_fw;
-
- for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
- if (!strcmp(name, b_fw->name)) {
- cd->size = b_fw->size;
- cd->data = b_fw->data;
- return true;
- }
- }
-#endif
- return false;
-}
-
-void __init load_ucode_bsp(void)
-{
- int vendor;
- unsigned int family;
-
- if (check_loader_disabled_bsp())
- return;
-
- if (!have_cpuid_p())
- return;
-
- vendor = x86_vendor();
- family = x86_family();
-
- switch (vendor) {
- case X86_VENDOR_INTEL:
- if (family >= 6)
- load_ucode_intel_bsp();
- break;
- case X86_VENDOR_AMD:
- if (family >= 0x10)
- load_ucode_amd_bsp(family);
- break;
- default:
- break;
- }
-}
-
-static bool check_loader_disabled_ap(void)
-{
-#ifdef CONFIG_X86_32
- return *((bool *)__pa_nodebug(&dis_ucode_ldr));
-#else
- return dis_ucode_ldr;
-#endif
-}
-
-void load_ucode_ap(void)
-{
- int vendor, family;
-
- if (check_loader_disabled_ap())
- return;
-
- if (!have_cpuid_p())
- return;
-
- vendor = x86_vendor();
- family = x86_family();
-
- switch (vendor) {
- case X86_VENDOR_INTEL:
- if (family >= 6)
- load_ucode_intel_ap();
- break;
- case X86_VENDOR_AMD:
- if (family >= 0x10)
- load_ucode_amd_ap();
- break;
- default:
- break;
- }
-}
-
-int __init save_microcode_in_initrd(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_INTEL:
- if (c->x86 >= 6)
- save_microcode_in_initrd_intel();
- break;
- case X86_VENDOR_AMD:
- if (c->x86 >= 0x10)
- save_microcode_in_initrd_amd();
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-void reload_early_microcode(void)
-{
- int vendor, family;
-
- vendor = x86_vendor();
- family = x86_family();
-
- switch (vendor) {
- case X86_VENDOR_INTEL:
- if (family >= 6)
- reload_ucode_intel();
- break;
- case X86_VENDOR_AMD:
- if (family >= 0x10)
- reload_ucode_amd();
- break;
- default:
- break;
- }
-}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 969dc17eb1b4..ce47402eb2f9 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -4,27 +4,804 @@
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
+ * Intel CPU microcode early update for Linux
+ *
+ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ * H Peter Anvin" <hpa@zytor.com>
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+/*
+ * This needs to be before all headers so that pr_debug in printk.h doesn't turn
+ * printk calls into no_printk().
+ *
+ *#define DEBUG
+ */
+#define pr_fmt(fmt) "microcode: " fmt
+#include <linux/earlycpio.h>
#include <linux/firmware.h>
#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/initrd.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
#include <asm/microcode_intel.h>
#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/setup.h>
#include <asm/msr.h>
-MODULE_DESCRIPTION("Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
+static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+static struct mc_saved_data {
+ unsigned int mc_saved_count;
+ struct microcode_intel **mc_saved;
+} mc_saved_data;
+
+static enum ucode_state
+load_microcode_early(struct microcode_intel **saved,
+ unsigned int num_saved, struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *ucode_ptr, *new_mc = NULL;
+ struct microcode_header_intel *mc_hdr;
+ int new_rev, ret, i;
+
+ new_rev = uci->cpu_sig.rev;
+
+ for (i = 0; i < num_saved; i++) {
+ ucode_ptr = saved[i];
+ mc_hdr = (struct microcode_header_intel *)ucode_ptr;
+
+ ret = has_newer_microcode(ucode_ptr,
+ uci->cpu_sig.sig,
+ uci->cpu_sig.pf,
+ new_rev);
+ if (!ret)
+ continue;
+
+ new_rev = mc_hdr->rev;
+ new_mc = ucode_ptr;
+ }
+
+ if (!new_mc)
+ return UCODE_NFOUND;
+
+ uci->mc = (struct microcode_intel *)new_mc;
+ return UCODE_OK;
+}
+
+static inline void
+copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
+ unsigned long off, int num_saved)
+{
+ int i;
+
+ for (i = 0; i < num_saved; i++)
+ mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+}
+
+#ifdef CONFIG_X86_32
+static void
+microcode_phys(struct microcode_intel **mc_saved_tmp,
+ struct mc_saved_data *mc_saved_data)
+{
+ int i;
+ struct microcode_intel ***mc_saved;
+
+ mc_saved = (struct microcode_intel ***)
+ __pa_nodebug(&mc_saved_data->mc_saved);
+ for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+ struct microcode_intel *p;
+
+ p = *(struct microcode_intel **)
+ __pa_nodebug(mc_saved_data->mc_saved + i);
+ mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
+ }
+}
+#endif
+
+static enum ucode_state
+load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+ unsigned long initrd_start, struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ unsigned int count = mc_saved_data->mc_saved_count;
+
+ if (!mc_saved_data->mc_saved) {
+ copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+
+ return load_microcode_early(mc_saved_tmp, count, uci);
+ } else {
+#ifdef CONFIG_X86_32
+ microcode_phys(mc_saved_tmp, mc_saved_data);
+ return load_microcode_early(mc_saved_tmp, count, uci);
+#else
+ return load_microcode_early(mc_saved_data->mc_saved,
+ count, uci);
+#endif
+ }
+}
+
+/*
+ * Given CPU signature and a microcode patch, this function finds if the
+ * microcode patch has matching family and model with the CPU.
+ */
+static enum ucode_state
+matching_model_microcode(struct microcode_header_intel *mc_header,
+ unsigned long sig)
+{
+ unsigned int fam, model;
+ unsigned int fam_ucode, model_ucode;
+ struct extended_sigtable *ext_header;
+ unsigned long total_size = get_totalsize(mc_header);
+ unsigned long data_size = get_datasize(mc_header);
+ int ext_sigcount, i;
+ struct extended_signature *ext_sig;
+
+ fam = __x86_family(sig);
+ model = x86_model(sig);
+
+ fam_ucode = __x86_family(mc_header->sig);
+ model_ucode = x86_model(mc_header->sig);
+
+ if (fam == fam_ucode && model == model_ucode)
+ return UCODE_OK;
+
+ /* Look for ext. headers: */
+ if (total_size <= data_size + MC_HEADER_SIZE)
+ return UCODE_NFOUND;
+
+ ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE;
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+ ext_sigcount = ext_header->count;
+
+ for (i = 0; i < ext_sigcount; i++) {
+ fam_ucode = __x86_family(ext_sig->sig);
+ model_ucode = x86_model(ext_sig->sig);
+
+ if (fam == fam_ucode && model == model_ucode)
+ return UCODE_OK;
+
+ ext_sig++;
+ }
+ return UCODE_NFOUND;
+}
+
+static int
+save_microcode(struct mc_saved_data *mc_saved_data,
+ struct microcode_intel **mc_saved_src,
+ unsigned int mc_saved_count)
+{
+ int i, j;
+ struct microcode_intel **saved_ptr;
+ int ret;
+
+ if (!mc_saved_count)
+ return -EINVAL;
+
+ /*
+ * Copy new microcode data.
+ */
+ saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+ if (!saved_ptr)
+ return -ENOMEM;
+
+ for (i = 0; i < mc_saved_count; i++) {
+ struct microcode_header_intel *mc_hdr;
+ struct microcode_intel *mc;
+ unsigned long size;
+
+ if (!mc_saved_src[i]) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ mc = mc_saved_src[i];
+ mc_hdr = &mc->hdr;
+ size = get_totalsize(mc_hdr);
+
+ saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+ if (!saved_ptr[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ memcpy(saved_ptr[i], mc, size);
+ }
+
+ /*
+ * Point to newly saved microcode.
+ */
+ mc_saved_data->mc_saved = saved_ptr;
+ mc_saved_data->mc_saved_count = mc_saved_count;
+
+ return 0;
+
+err:
+ for (j = 0; j <= i; j++)
+ kfree(saved_ptr[j]);
+ kfree(saved_ptr);
+
+ return ret;
+}
+
+/*
+ * A microcode patch in ucode_ptr is saved into mc_saved
+ * - if it has matching signature and newer revision compared to an existing
+ * patch mc_saved.
+ * - or if it is a newly discovered microcode patch.
+ *
+ * The microcode patch should have matching model with CPU.
+ *
+ * Returns: The updated number @num_saved of saved microcode patches.
+ */
+static unsigned int _save_mc(struct microcode_intel **mc_saved,
+ u8 *ucode_ptr, unsigned int num_saved)
+{
+ struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
+ unsigned int sig, pf;
+ int found = 0, i;
+
+ mc_hdr = (struct microcode_header_intel *)ucode_ptr;
+
+ for (i = 0; i < num_saved; i++) {
+ mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
+ sig = mc_saved_hdr->sig;
+ pf = mc_saved_hdr->pf;
+
+ if (!find_matching_signature(ucode_ptr, sig, pf))
+ continue;
+
+ found = 1;
+
+ if (mc_hdr->rev <= mc_saved_hdr->rev)
+ continue;
+
+ /*
+ * Found an older ucode saved earlier. Replace it with
+ * this newer one.
+ */
+ mc_saved[i] = (struct microcode_intel *)ucode_ptr;
+ break;
+ }
+
+ /* Newly detected microcode, save it to memory. */
+ if (i >= num_saved && !found)
+ mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
+
+ return num_saved;
+}
+
+/*
+ * Get microcode matching with BSP's model. Only CPUs with the same model as
+ * BSP can stay in the platform.
+ */
+static enum ucode_state __init
+get_matching_model_microcode(int cpu, unsigned long start,
+ void *data, size_t size,
+ struct mc_saved_data *mc_saved_data,
+ unsigned long *mc_saved_in_initrd,
+ struct ucode_cpu_info *uci)
+{
+ u8 *ucode_ptr = data;
+ unsigned int leftover = size;
+ enum ucode_state state = UCODE_OK;
+ unsigned int mc_size;
+ struct microcode_header_intel *mc_header;
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
+ int i;
+
+ while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+
+ if (leftover < sizeof(mc_header))
+ break;
+
+ mc_header = (struct microcode_header_intel *)ucode_ptr;
+
+ mc_size = get_totalsize(mc_header);
+ if (!mc_size || mc_size > leftover ||
+ microcode_sanity_check(ucode_ptr, 0) < 0)
+ break;
+
+ leftover -= mc_size;
+
+ /*
+ * Since APs with same family and model as the BSP may boot in
+ * the platform, we need to find and save microcode patches
+ * with the same family and model as the BSP.
+ */
+ if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
+ UCODE_OK) {
+ ucode_ptr += mc_size;
+ continue;
+ }
+
+ mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+
+ ucode_ptr += mc_size;
+ }
+
+ if (leftover) {
+ state = UCODE_ERROR;
+ goto out;
+ }
+
+ if (mc_saved_count == 0) {
+ state = UCODE_NFOUND;
+ goto out;
+ }
+
+ for (i = 0; i < mc_saved_count; i++)
+ mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+ mc_saved_data->mc_saved_count = mc_saved_count;
+out:
+ return state;
+}
+
+static int collect_cpu_info_early(struct ucode_cpu_info *uci)
+{
+ unsigned int val[2];
+ unsigned int family, model;
+ struct cpu_signature csig;
+ unsigned int eax, ebx, ecx, edx;
+
+ csig.sig = 0;
+ csig.pf = 0;
+ csig.rev = 0;
+
+ memset(uci, 0, sizeof(*uci));
+
+ eax = 0x00000001;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ csig.sig = eax;
+
+ family = __x86_family(csig.sig);
+ model = x86_model(csig.sig);
+
+ if ((model >= 5) || (family > 6)) {
+ /* get processor flags from MSR 0x17 */
+ native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+ csig.pf = 1 << ((val[1] >> 18) & 7);
+ }
+ native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+ /* As documented in the SDM: Do a CPUID 1 here */
+ sync_core();
+
+ /* get the current revision from MSR 0x8B */
+ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+ csig.rev = val[1];
+
+ uci->cpu_sig = csig;
+ uci->valid = 1;
+
+ return 0;
+}
+
+static void show_saved_mc(void)
+{
+#ifdef DEBUG
+ int i, j;
+ unsigned int sig, pf, rev, total_size, data_size, date;
+ struct ucode_cpu_info uci;
+
+ if (mc_saved_data.mc_saved_count == 0) {
+ pr_debug("no microcode data saved.\n");
+ return;
+ }
+ pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+
+ collect_cpu_info_early(&uci);
+
+ sig = uci.cpu_sig.sig;
+ pf = uci.cpu_sig.pf;
+ rev = uci.cpu_sig.rev;
+ pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
+
+ for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+ struct microcode_header_intel *mc_saved_header;
+ struct extended_sigtable *ext_header;
+ int ext_sigcount;
+ struct extended_signature *ext_sig;
+
+ mc_saved_header = (struct microcode_header_intel *)
+ mc_saved_data.mc_saved[i];
+ sig = mc_saved_header->sig;
+ pf = mc_saved_header->pf;
+ rev = mc_saved_header->rev;
+ total_size = get_totalsize(mc_saved_header);
+ data_size = get_datasize(mc_saved_header);
+ date = mc_saved_header->date;
+
+ pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
+ i, sig, pf, rev, total_size,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
+
+ /* Look for ext. headers: */
+ if (total_size <= data_size + MC_HEADER_SIZE)
+ continue;
+
+ ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
+ ext_sigcount = ext_header->count;
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+ for (j = 0; j < ext_sigcount; j++) {
+ sig = ext_sig->sig;
+ pf = ext_sig->pf;
+
+ pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
+ j, sig, pf);
+
+ ext_sig++;
+ }
+
+ }
+#endif
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_MUTEX(x86_cpu_microcode_mutex);
+/*
+ * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
+ * hot added or resumes.
+ *
+ * Please make sure this mc should be a valid microcode patch before calling
+ * this function.
+ */
+int save_mc_for_early(u8 *mc)
+{
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ unsigned int mc_saved_count_init;
+ unsigned int mc_saved_count;
+ struct microcode_intel **mc_saved;
+ int ret = 0;
+ int i;
+
+ /*
+ * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
+ * hotplug.
+ */
+ mutex_lock(&x86_cpu_microcode_mutex);
+
+ mc_saved_count_init = mc_saved_data.mc_saved_count;
+ mc_saved_count = mc_saved_data.mc_saved_count;
+ mc_saved = mc_saved_data.mc_saved;
+
+ if (mc_saved && mc_saved_count)
+ memcpy(mc_saved_tmp, mc_saved,
+ mc_saved_count * sizeof(struct microcode_intel *));
+ /*
+ * Save the microcode patch mc in mc_save_tmp structure if it's a newer
+ * version.
+ */
+ mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+
+ /*
+ * Save the mc_save_tmp in global mc_saved_data.
+ */
+ ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+ if (ret) {
+ pr_err("Cannot save microcode patch.\n");
+ goto out;
+ }
+
+ show_saved_mc();
+
+ /*
+ * Free old saved microcode data.
+ */
+ if (mc_saved) {
+ for (i = 0; i < mc_saved_count_init; i++)
+ kfree(mc_saved[i]);
+ kfree(mc_saved);
+ }
+
+out:
+ mutex_unlock(&x86_cpu_microcode_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(save_mc_for_early);
+#endif
+
+static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
+{
+#ifdef CONFIG_X86_64
+ unsigned int eax = 0x00000001, ebx, ecx = 0, edx;
+ unsigned int family, model, stepping;
+ char name[30];
+
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ family = __x86_family(eax);
+ model = x86_model(eax);
+ stepping = eax & 0xf;
+
+ sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping);
+
+ return get_builtin_firmware(cp, name);
+#else
+ return false;
+#endif
+}
+
+static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
+static __init enum ucode_state
+scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+ unsigned long start, unsigned long size,
+ struct ucode_cpu_info *uci)
+{
+ struct cpio_data cd;
+ long offset = 0;
+#ifdef CONFIG_X86_32
+ char *p = (char *)__pa_nodebug(ucode_name);
+#else
+ char *p = ucode_name;
+#endif
+
+ cd.data = NULL;
+ cd.size = 0;
+
+ cd = find_cpio_data(p, (void *)start, size, &offset);
+ if (!cd.data) {
+ if (!load_builtin_intel_microcode(&cd))
+ return UCODE_ERROR;
+ }
+
+ return get_matching_model_microcode(0, start, cd.data, cd.size,
+ mc_saved_data, initrd, uci);
+}
+
+/*
+ * Print ucode update info.
+ */
+static void
+print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
+{
+ int cpu = smp_processor_id();
+
+ pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+ cpu,
+ uci->cpu_sig.rev,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
+}
+
+#ifdef CONFIG_X86_32
+
+static int delay_ucode_info;
+static int current_mc_date;
+
+/*
+ * Print early updated ucode info after printk works. This is delayed info dump.
+ */
+void show_ucode_info_early(void)
+{
+ struct ucode_cpu_info uci;
+
+ if (delay_ucode_info) {
+ collect_cpu_info_early(&uci);
+ print_ucode_info(&uci, current_mc_date);
+ delay_ucode_info = 0;
+ }
+}
+
+/*
+ * At this point, we can not call printk() yet. Keep microcode patch number in
+ * mc_saved_data.mc_saved and delay printing microcode info in
+ * show_ucode_info_early() until printk() works.
+ */
+static void print_ucode(struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_intel;
+ int *delay_ucode_info_p;
+ int *current_mc_date_p;
+
+ mc_intel = uci->mc;
+ if (mc_intel == NULL)
+ return;
+
+ delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
+ current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
+
+ *delay_ucode_info_p = 1;
+ *current_mc_date_p = mc_intel->hdr.date;
+}
+#else
+
+/*
+ * Flush global tlb. We only do this in x86_64 where paging has been enabled
+ * already and PGE should be enabled as well.
+ */
+static inline void flush_tlb_early(void)
+{
+ __native_flush_tlb_global_irq_disabled();
+}
+
+static inline void print_ucode(struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_intel;
+
+ mc_intel = uci->mc;
+ if (mc_intel == NULL)
+ return;
+
+ print_ucode_info(uci, mc_intel->hdr.date);
+}
+#endif
+
+static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
+{
+ struct microcode_intel *mc_intel;
+ unsigned int val[2];
+
+ mc_intel = uci->mc;
+ if (mc_intel == NULL)
+ return 0;
+
+ /* write microcode via MSR 0x79 */
+ native_wrmsr(MSR_IA32_UCODE_WRITE,
+ (unsigned long) mc_intel->bits,
+ (unsigned long) mc_intel->bits >> 16 >> 16);
+ native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+ /* As documented in the SDM: Do a CPUID 1 here */
+ sync_core();
+
+ /* get the current revision from MSR 0x8B */
+ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+ if (val[1] != mc_intel->hdr.rev)
+ return -1;
+
+#ifdef CONFIG_X86_64
+ /* Flush global tlb. This is precaution. */
+ flush_tlb_early();
+#endif
+ uci->cpu_sig.rev = val[1];
+
+ if (early)
+ print_ucode(uci);
+ else
+ print_ucode_info(uci, mc_intel->hdr.date);
+
+ return 0;
+}
+
+/*
+ * This function converts microcode patch offsets previously stored in
+ * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ */
+int __init save_microcode_in_initrd_intel(void)
+{
+ unsigned int count = mc_saved_data.mc_saved_count;
+ struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
+ int ret = 0;
+
+ if (count == 0)
+ return ret;
+
+ copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+ ret = save_microcode(&mc_saved_data, mc_saved, count);
+ if (ret)
+ pr_err("Cannot save microcode patches from initrd.\n");
+
+ show_saved_mc();
+
+ return ret;
+}
+
+static void __init
+_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
+ unsigned long *initrd,
+ unsigned long start, unsigned long size)
+{
+ struct ucode_cpu_info uci;
+ enum ucode_state ret;
+
+ collect_cpu_info_early(&uci);
+
+ ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+ if (ret != UCODE_OK)
+ return;
+
+ ret = load_microcode(mc_saved_data, initrd, start, &uci);
+ if (ret != UCODE_OK)
+ return;
+
+ apply_microcode_early(&uci, true);
+}
+
+void __init load_ucode_intel_bsp(void)
+{
+ u64 start, size;
+#ifdef CONFIG_X86_32
+ struct boot_params *p;
+
+ p = (struct boot_params *)__pa_nodebug(&boot_params);
+ start = p->hdr.ramdisk_image;
+ size = p->hdr.ramdisk_size;
+
+ _load_ucode_intel_bsp(
+ (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+ (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+ start, size);
+#else
+ start = boot_params.hdr.ramdisk_image + PAGE_OFFSET;
+ size = boot_params.hdr.ramdisk_size;
+
+ _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+#endif
+}
+
+void load_ucode_intel_ap(void)
+{
+ struct mc_saved_data *mc_saved_data_p;
+ struct ucode_cpu_info uci;
+ unsigned long *mc_saved_in_initrd_p;
+ unsigned long initrd_start_addr;
+ enum ucode_state ret;
+#ifdef CONFIG_X86_32
+ unsigned long *initrd_start_p;
+
+ mc_saved_in_initrd_p =
+ (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+ mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+ initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+ initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+ mc_saved_data_p = &mc_saved_data;
+ mc_saved_in_initrd_p = mc_saved_in_initrd;
+ initrd_start_addr = initrd_start;
+#endif
+
+ /*
+ * If there is no valid ucode previously saved in memory, no need to
+ * update ucode on this AP.
+ */
+ if (mc_saved_data_p->mc_saved_count == 0)
+ return;
+
+ collect_cpu_info_early(&uci);
+ ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+ initrd_start_addr, &uci);
+
+ if (ret != UCODE_OK)
+ return;
+
+ apply_microcode_early(&uci, true);
+}
+
+void reload_ucode_intel(void)
+{
+ struct ucode_cpu_info uci;
+ enum ucode_state ret;
+
+ if (!mc_saved_data.mc_saved_count)
+ return;
+
+ collect_cpu_info_early(&uci);
+
+ ret = load_microcode_early(mc_saved_data.mc_saved,
+ mc_saved_data.mc_saved_count, &uci);
+ if (ret != UCODE_OK)
+ return;
+
+ apply_microcode_early(&uci, false);
+}
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
@@ -264,7 +1041,7 @@ static struct microcode_ops microcode_intel_ops = {
struct microcode_ops * __init init_intel_microcode(void)
{
- struct cpuinfo_x86 *c = &cpu_data(0);
+ struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
deleted file mode 100644
index 37ea89c11520..000000000000
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ /dev/null
@@ -1,808 +0,0 @@
-/*
- * Intel CPU microcode early update for Linux
- *
- * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- * H Peter Anvin" <hpa@zytor.com>
- *
- * This allows to early upgrade microcode on Intel processors
- * belonging to IA-32 family - PentiumPro, Pentium II,
- * Pentium III, Xeon, Pentium 4, etc.
- *
- * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
- * Software Developer's Manual.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/*
- * This needs to be before all headers so that pr_debug in printk.h doesn't turn
- * printk calls into no_printk().
- *
- *#define DEBUG
- */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-#include <linux/cpu.h>
-#include <asm/msr.h>
-#include <asm/microcode_intel.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/setup.h>
-
-#undef pr_fmt
-#define pr_fmt(fmt) "microcode: " fmt
-
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
-static struct mc_saved_data {
- unsigned int mc_saved_count;
- struct microcode_intel **mc_saved;
-} mc_saved_data;
-
-static enum ucode_state
-load_microcode_early(struct microcode_intel **saved,
- unsigned int num_saved, struct ucode_cpu_info *uci)
-{
- struct microcode_intel *ucode_ptr, *new_mc = NULL;
- struct microcode_header_intel *mc_hdr;
- int new_rev, ret, i;
-
- new_rev = uci->cpu_sig.rev;
-
- for (i = 0; i < num_saved; i++) {
- ucode_ptr = saved[i];
- mc_hdr = (struct microcode_header_intel *)ucode_ptr;
-
- ret = has_newer_microcode(ucode_ptr,
- uci->cpu_sig.sig,
- uci->cpu_sig.pf,
- new_rev);
- if (!ret)
- continue;
-
- new_rev = mc_hdr->rev;
- new_mc = ucode_ptr;
- }
-
- if (!new_mc)
- return UCODE_NFOUND;
-
- uci->mc = (struct microcode_intel *)new_mc;
- return UCODE_OK;
-}
-
-static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
- unsigned long off, int num_saved)
-{
- int i;
-
- for (i = 0; i < num_saved; i++)
- mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
-}
-
-#ifdef CONFIG_X86_32
-static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
- struct mc_saved_data *mc_saved_data)
-{
- int i;
- struct microcode_intel ***mc_saved;
-
- mc_saved = (struct microcode_intel ***)
- __pa_nodebug(&mc_saved_data->mc_saved);
- for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
- struct microcode_intel *p;
-
- p = *(struct microcode_intel **)
- __pa_nodebug(mc_saved_data->mc_saved + i);
- mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
- }
-}
-#endif
-
-static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
- unsigned long initrd_start, struct ucode_cpu_info *uci)
-{
- struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int count = mc_saved_data->mc_saved_count;
-
- if (!mc_saved_data->mc_saved) {
- copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
-
- return load_microcode_early(mc_saved_tmp, count, uci);
- } else {
-#ifdef CONFIG_X86_32
- microcode_phys(mc_saved_tmp, mc_saved_data);
- return load_microcode_early(mc_saved_tmp, count, uci);
-#else
- return load_microcode_early(mc_saved_data->mc_saved,
- count, uci);
-#endif
- }
-}
-
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- */
-static enum ucode_state
-matching_model_microcode(struct microcode_header_intel *mc_header,
- unsigned long sig)
-{
- unsigned int fam, model;
- unsigned int fam_ucode, model_ucode;
- struct extended_sigtable *ext_header;
- unsigned long total_size = get_totalsize(mc_header);
- unsigned long data_size = get_datasize(mc_header);
- int ext_sigcount, i;
- struct extended_signature *ext_sig;
-
- fam = __x86_family(sig);
- model = x86_model(sig);
-
- fam_ucode = __x86_family(mc_header->sig);
- model_ucode = x86_model(mc_header->sig);
-
- if (fam == fam_ucode && model == model_ucode)
- return UCODE_OK;
-
- /* Look for ext. headers: */
- if (total_size <= data_size + MC_HEADER_SIZE)
- return UCODE_NFOUND;
-
- ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE;
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
- ext_sigcount = ext_header->count;
-
- for (i = 0; i < ext_sigcount; i++) {
- fam_ucode = __x86_family(ext_sig->sig);
- model_ucode = x86_model(ext_sig->sig);
-
- if (fam == fam_ucode && model == model_ucode)
- return UCODE_OK;
-
- ext_sig++;
- }
- return UCODE_NFOUND;
-}
-
-static int
-save_microcode(struct mc_saved_data *mc_saved_data,
- struct microcode_intel **mc_saved_src,
- unsigned int mc_saved_count)
-{
- int i, j;
- struct microcode_intel **saved_ptr;
- int ret;
-
- if (!mc_saved_count)
- return -EINVAL;
-
- /*
- * Copy new microcode data.
- */
- saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
- if (!saved_ptr)
- return -ENOMEM;
-
- for (i = 0; i < mc_saved_count; i++) {
- struct microcode_header_intel *mc_hdr;
- struct microcode_intel *mc;
- unsigned long size;
-
- if (!mc_saved_src[i]) {
- ret = -EINVAL;
- goto err;
- }
-
- mc = mc_saved_src[i];
- mc_hdr = &mc->hdr;
- size = get_totalsize(mc_hdr);
-
- saved_ptr[i] = kmalloc(size, GFP_KERNEL);
- if (!saved_ptr[i]) {
- ret = -ENOMEM;
- goto err;
- }
-
- memcpy(saved_ptr[i], mc, size);
- }
-
- /*
- * Point to newly saved microcode.
- */
- mc_saved_data->mc_saved = saved_ptr;
- mc_saved_data->mc_saved_count = mc_saved_count;
-
- return 0;
-
-err:
- for (j = 0; j <= i; j++)
- kfree(saved_ptr[j]);
- kfree(saved_ptr);
-
- return ret;
-}
-
-/*
- * A microcode patch in ucode_ptr is saved into mc_saved
- * - if it has matching signature and newer revision compared to an existing
- * patch mc_saved.
- * - or if it is a newly discovered microcode patch.
- *
- * The microcode patch should have matching model with CPU.
- *
- * Returns: The updated number @num_saved of saved microcode patches.
- */
-static unsigned int _save_mc(struct microcode_intel **mc_saved,
- u8 *ucode_ptr, unsigned int num_saved)
-{
- struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
- unsigned int sig, pf;
- int found = 0, i;
-
- mc_hdr = (struct microcode_header_intel *)ucode_ptr;
-
- for (i = 0; i < num_saved; i++) {
- mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
- sig = mc_saved_hdr->sig;
- pf = mc_saved_hdr->pf;
-
- if (!find_matching_signature(ucode_ptr, sig, pf))
- continue;
-
- found = 1;
-
- if (mc_hdr->rev <= mc_saved_hdr->rev)
- continue;
-
- /*
- * Found an older ucode saved earlier. Replace it with
- * this newer one.
- */
- mc_saved[i] = (struct microcode_intel *)ucode_ptr;
- break;
- }
-
- /* Newly detected microcode, save it to memory. */
- if (i >= num_saved && !found)
- mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
-
- return num_saved;
-}
-
-/*
- * Get microcode matching with BSP's model. Only CPUs with the same model as
- * BSP can stay in the platform.
- */
-static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
- void *data, size_t size,
- struct mc_saved_data *mc_saved_data,
- unsigned long *mc_saved_in_initrd,
- struct ucode_cpu_info *uci)
-{
- u8 *ucode_ptr = data;
- unsigned int leftover = size;
- enum ucode_state state = UCODE_OK;
- unsigned int mc_size;
- struct microcode_header_intel *mc_header;
- struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
- int i;
-
- while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
-
- if (leftover < sizeof(mc_header))
- break;
-
- mc_header = (struct microcode_header_intel *)ucode_ptr;
-
- mc_size = get_totalsize(mc_header);
- if (!mc_size || mc_size > leftover ||
- microcode_sanity_check(ucode_ptr, 0) < 0)
- break;
-
- leftover -= mc_size;
-
- /*
- * Since APs with same family and model as the BSP may boot in
- * the platform, we need to find and save microcode patches
- * with the same family and model as the BSP.
- */
- if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
- UCODE_OK) {
- ucode_ptr += mc_size;
- continue;
- }
-
- mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
-
- ucode_ptr += mc_size;
- }
-
- if (leftover) {
- state = UCODE_ERROR;
- goto out;
- }
-
- if (mc_saved_count == 0) {
- state = UCODE_NFOUND;
- goto out;
- }
-
- for (i = 0; i < mc_saved_count; i++)
- mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
-
- mc_saved_data->mc_saved_count = mc_saved_count;
-out:
- return state;
-}
-
-static int collect_cpu_info_early(struct ucode_cpu_info *uci)
-{
- unsigned int val[2];
- unsigned int family, model;
- struct cpu_signature csig;
- unsigned int eax, ebx, ecx, edx;
-
- csig.sig = 0;
- csig.pf = 0;
- csig.rev = 0;
-
- memset(uci, 0, sizeof(*uci));
-
- eax = 0x00000001;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- csig.sig = eax;
-
- family = __x86_family(csig.sig);
- model = x86_model(csig.sig);
-
- if ((model >= 5) || (family > 6)) {
- /* get processor flags from MSR 0x17 */
- native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
- csig.pf = 1 << ((val[1] >> 18) & 7);
- }
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
- /* As documented in the SDM: Do a CPUID 1 here */
- sync_core();
-
- /* get the current revision from MSR 0x8B */
- native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
- csig.rev = val[1];
-
- uci->cpu_sig = csig;
- uci->valid = 1;
-
- return 0;
-}
-
-#ifdef DEBUG
-static void show_saved_mc(void)
-{
- int i, j;
- unsigned int sig, pf, rev, total_size, data_size, date;
- struct ucode_cpu_info uci;
-
- if (mc_saved_data.mc_saved_count == 0) {
- pr_debug("no microcode data saved.\n");
- return;
- }
- pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
-
- collect_cpu_info_early(&uci);
-
- sig = uci.cpu_sig.sig;
- pf = uci.cpu_sig.pf;
- rev = uci.cpu_sig.rev;
- pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
-
- for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
- struct microcode_header_intel *mc_saved_header;
- struct extended_sigtable *ext_header;
- int ext_sigcount;
- struct extended_signature *ext_sig;
-
- mc_saved_header = (struct microcode_header_intel *)
- mc_saved_data.mc_saved[i];
- sig = mc_saved_header->sig;
- pf = mc_saved_header->pf;
- rev = mc_saved_header->rev;
- total_size = get_totalsize(mc_saved_header);
- data_size = get_datasize(mc_saved_header);
- date = mc_saved_header->date;
-
- pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
- i, sig, pf, rev, total_size,
- date & 0xffff,
- date >> 24,
- (date >> 16) & 0xff);
-
- /* Look for ext. headers: */
- if (total_size <= data_size + MC_HEADER_SIZE)
- continue;
-
- ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
- ext_sigcount = ext_header->count;
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
- for (j = 0; j < ext_sigcount; j++) {
- sig = ext_sig->sig;
- pf = ext_sig->pf;
-
- pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
- j, sig, pf);
-
- ext_sig++;
- }
-
- }
-}
-#else
-static inline void show_saved_mc(void)
-{
-}
-#endif
-
-#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
-static DEFINE_MUTEX(x86_cpu_microcode_mutex);
-/*
- * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
- * hot added or resumes.
- *
- * Please make sure this mc should be a valid microcode patch before calling
- * this function.
- */
-int save_mc_for_early(u8 *mc)
-{
- struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int mc_saved_count_init;
- unsigned int mc_saved_count;
- struct microcode_intel **mc_saved;
- int ret = 0;
- int i;
-
- /*
- * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
- * hotplug.
- */
- mutex_lock(&x86_cpu_microcode_mutex);
-
- mc_saved_count_init = mc_saved_data.mc_saved_count;
- mc_saved_count = mc_saved_data.mc_saved_count;
- mc_saved = mc_saved_data.mc_saved;
-
- if (mc_saved && mc_saved_count)
- memcpy(mc_saved_tmp, mc_saved,
- mc_saved_count * sizeof(struct microcode_intel *));
- /*
- * Save the microcode patch mc in mc_save_tmp structure if it's a newer
- * version.
- */
- mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
-
- /*
- * Save the mc_save_tmp in global mc_saved_data.
- */
- ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
- if (ret) {
- pr_err("Cannot save microcode patch.\n");
- goto out;
- }
-
- show_saved_mc();
-
- /*
- * Free old saved microcode data.
- */
- if (mc_saved) {
- for (i = 0; i < mc_saved_count_init; i++)
- kfree(mc_saved[i]);
- kfree(mc_saved);
- }
-
-out:
- mutex_unlock(&x86_cpu_microcode_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(save_mc_for_early);
-#endif
-
-static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
-{
-#ifdef CONFIG_X86_64
- unsigned int eax = 0x00000001, ebx, ecx = 0, edx;
- unsigned int family, model, stepping;
- char name[30];
-
- native_cpuid(&eax, &ebx, &ecx, &edx);
-
- family = __x86_family(eax);
- model = x86_model(eax);
- stepping = eax & 0xf;
-
- sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping);
-
- return get_builtin_firmware(cp, name);
-#else
- return false;
-#endif
-}
-
-static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
-static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
- unsigned long start, unsigned long size,
- struct ucode_cpu_info *uci)
-{
- struct cpio_data cd;
- long offset = 0;
-#ifdef CONFIG_X86_32
- char *p = (char *)__pa_nodebug(ucode_name);
-#else
- char *p = ucode_name;
-#endif
-
- cd.data = NULL;
- cd.size = 0;
-
- cd = find_cpio_data(p, (void *)start, size, &offset);
- if (!cd.data) {
- if (!load_builtin_intel_microcode(&cd))
- return UCODE_ERROR;
- }
-
- return get_matching_model_microcode(0, start, cd.data, cd.size,
- mc_saved_data, initrd, uci);
-}
-
-/*
- * Print ucode update info.
- */
-static void
-print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
-{
- int cpu = smp_processor_id();
-
- pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu,
- uci->cpu_sig.rev,
- date & 0xffff,
- date >> 24,
- (date >> 16) & 0xff);
-}
-
-#ifdef CONFIG_X86_32
-
-static int delay_ucode_info;
-static int current_mc_date;
-
-/*
- * Print early updated ucode info after printk works. This is delayed info dump.
- */
-void show_ucode_info_early(void)
-{
- struct ucode_cpu_info uci;
-
- if (delay_ucode_info) {
- collect_cpu_info_early(&uci);
- print_ucode_info(&uci, current_mc_date);
- delay_ucode_info = 0;
- }
-}
-
-/*
- * At this point, we can not call printk() yet. Keep microcode patch number in
- * mc_saved_data.mc_saved and delay printing microcode info in
- * show_ucode_info_early() until printk() works.
- */
-static void print_ucode(struct ucode_cpu_info *uci)
-{
- struct microcode_intel *mc_intel;
- int *delay_ucode_info_p;
- int *current_mc_date_p;
-
- mc_intel = uci->mc;
- if (mc_intel == NULL)
- return;
-
- delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
- current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
-
- *delay_ucode_info_p = 1;
- *current_mc_date_p = mc_intel->hdr.date;
-}
-#else
-
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
- __native_flush_tlb_global_irq_disabled();
-}
-
-static inline void print_ucode(struct ucode_cpu_info *uci)
-{
- struct microcode_intel *mc_intel;
-
- mc_intel = uci->mc;
- if (mc_intel == NULL)
- return;
-
- print_ucode_info(uci, mc_intel->hdr.date);
-}
-#endif
-
-static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
-{
- struct microcode_intel *mc_intel;
- unsigned int val[2];
-
- mc_intel = uci->mc;
- if (mc_intel == NULL)
- return 0;
-
- /* write microcode via MSR 0x79 */
- native_wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
- /* As documented in the SDM: Do a CPUID 1 here */
- sync_core();
-
- /* get the current revision from MSR 0x8B */
- native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev)
- return -1;
-
-#ifdef CONFIG_X86_64
- /* Flush global tlb. This is precaution. */
- flush_tlb_early();
-#endif
- uci->cpu_sig.rev = val[1];
-
- if (early)
- print_ucode(uci);
- else
- print_ucode_info(uci, mc_intel->hdr.date);
-
- return 0;
-}
-
-/*
- * This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
- */
-int __init save_microcode_in_initrd_intel(void)
-{
- unsigned int count = mc_saved_data.mc_saved_count;
- struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
- int ret = 0;
-
- if (count == 0)
- return ret;
-
- copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
- ret = save_microcode(&mc_saved_data, mc_saved, count);
- if (ret)
- pr_err("Cannot save microcode patches from initrd.\n");
-
- show_saved_mc();
-
- return ret;
-}
-
-static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
- unsigned long *initrd,
- unsigned long start, unsigned long size)
-{
- struct ucode_cpu_info uci;
- enum ucode_state ret;
-
- collect_cpu_info_early(&uci);
-
- ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
- if (ret != UCODE_OK)
- return;
-
- ret = load_microcode(mc_saved_data, initrd, start, &uci);
- if (ret != UCODE_OK)
- return;
-
- apply_microcode_early(&uci, true);
-}
-
-void __init load_ucode_intel_bsp(void)
-{
- u64 start, size;
-#ifdef CONFIG_X86_32
- struct boot_params *p;
-
- p = (struct boot_params *)__pa_nodebug(&boot_params);
- start = p->hdr.ramdisk_image;
- size = p->hdr.ramdisk_size;
-
- _load_ucode_intel_bsp(
- (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
- (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
- start, size);
-#else
- start = boot_params.hdr.ramdisk_image + PAGE_OFFSET;
- size = boot_params.hdr.ramdisk_size;
-
- _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
-#endif
-}
-
-void load_ucode_intel_ap(void)
-{
- struct mc_saved_data *mc_saved_data_p;
- struct ucode_cpu_info uci;
- unsigned long *mc_saved_in_initrd_p;
- unsigned long initrd_start_addr;
- enum ucode_state ret;
-#ifdef CONFIG_X86_32
- unsigned long *initrd_start_p;
-
- mc_saved_in_initrd_p =
- (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
- mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
- initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
- initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
-#else
- mc_saved_data_p = &mc_saved_data;
- mc_saved_in_initrd_p = mc_saved_in_initrd;
- initrd_start_addr = initrd_start;
-#endif
-
- /*
- * If there is no valid ucode previously saved in memory, no need to
- * update ucode on this AP.
- */
- if (mc_saved_data_p->mc_saved_count == 0)
- return;
-
- collect_cpu_info_early(&uci);
- ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
- initrd_start_addr, &uci);
-
- if (ret != UCODE_OK)
- return;
-
- apply_microcode_early(&uci, true);
-}
-
-void reload_ucode_intel(void)
-{
- struct ucode_cpu_info uci;
- enum ucode_state ret;
-
- if (!mc_saved_data.mc_saved_count)
- return;
-
- collect_cpu_info_early(&uci);
-
- ret = load_microcode_early(mc_saved_data.mc_saved,
- mc_saved_data.mc_saved_count, &uci);
- if (ret != UCODE_OK)
- return;
-
- apply_microcode_early(&uci, false);
-}
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index 1883d252ff7d..b96896bcbdaf 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -25,7 +25,6 @@
#include <linux/firmware.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <asm/microcode_intel.h>
#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66dd3fe99b82..4562cf070c27 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
* skip the schedulability test here, it will be performed
* at commit time (->commit_txn) as a whole.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN)
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
* XXX assumes any ->del() called during a TXN will only be on
* an event added during that same TXN.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN)
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
return;
/*
@@ -1748,11 +1748,22 @@ static inline void x86_pmu_read(struct perf_event *event)
* Start group events scheduling transaction
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
*/
-static void x86_pmu_start_txn(struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */
+
+ cpuc->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
perf_pmu_disable(pmu);
- __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
__this_cpu_write(cpu_hw_events.n_txn, 0);
}
@@ -1763,7 +1774,16 @@ static void x86_pmu_start_txn(struct pmu *pmu)
*/
static void x86_pmu_cancel_txn(struct pmu *pmu)
{
- __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
+ unsigned int txn_flags;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuc->txn_flags;
+ cpuc->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
/*
* Truncate collected array by the number of events added in this
* transaction. See x86_pmu_add() and x86_pmu_*_txn().
@@ -1786,6 +1806,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
int assign[X86_PMC_IDX_MAX];
int n, ret;
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuc->txn_flags = 0;
+ return 0;
+ }
+
n = cpuc->n_events;
if (!x86_pmu_initialized())
@@ -1801,7 +1828,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
- cpuc->group_flag &= ~PERF_EVENT_TXN;
+ cpuc->txn_flags = 0;
perf_pmu_enable(pmu);
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 165be83a7fa4..499f533dd3cc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -196,7 +196,7 @@ struct cpu_hw_events {
int n_excl; /* the number of exclusive events */
- unsigned int group_flag;
+ unsigned int txn_flags;
int is_fake;
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index d1c0f254afbe..2cad71d1b14c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -495,6 +495,19 @@ static int bts_event_init(struct perf_event *event)
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;
+ /*
+ * BTS leaks kernel addresses even when CPL0 tracing is
+ * disabled, so disallow intel_bts driver for unprivileged
+ * users on paranoid systems since it provides trace data
+ * to the user in a zero-copy fashion.
+ *
+ * Note that the default paranoia setting permits unprivileged
+ * users to profile the kernel.
+ */
+ if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
+ !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
ret = x86_reserve_hardware();
if (ret) {
x86_del_exclusive(x86_lbr_exclusive_bts);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
new file mode 100644
index 000000000000..75a38b5a2e26
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
@@ -0,0 +1,694 @@
+/*
+ * perf_event_intel_cstate.c: support cstate residency counters
+ *
+ * Copyright (C) 2015, Intel Corp.
+ * Author: Kan Liang (kan.liang@intel.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ */
+
+/*
+ * This file export cstate related free running (read-only) counters
+ * for perf. These counters may be use simultaneously by other tools,
+ * such as turbostat. However, it still make sense to implement them
+ * in perf. Because we can conveniently collect them together with
+ * other events, and allow to use them from tools without special MSR
+ * access code.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it is not supported by the hardware.
+ *
+ * According to counters' scope and category, two PMUs are registered
+ * with the perf_event core subsystem.
+ * - 'cstate_core': The counter is available for each physical core.
+ * The counters include CORE_C*_RESIDENCY.
+ * - 'cstate_pkg': The counter is available for each physical package.
+ * The counters include PKG_C*_RESIDENCY.
+ *
+ * All of these counters are specified in the Intel® 64 and IA-32
+ * Architectures Software Developer.s Manual Vol3b.
+ *
+ * Model specific counters:
+ * MSR_CORE_C1_RES: CORE C1 Residency Counter
+ * perf code: 0x00
+ * Available model: SLM,AMT
+ * Scope: Core (each processor core has a MSR)
+ * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
+ * perf code: 0x01
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
+ * perf code: 0x02
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
+ * perf code: 0x03
+ * Available model: SNB,IVB,HSW,BDW,SKL
+ * Scope: Core
+ * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
+ * perf code: 0x00
+ * Available model: SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
+ * perf code: 0x01
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
+ * perf code: 0x02
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
+ * perf code: 0x03
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Scope: Package (physical package)
+ * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
+ * perf code: 0x04
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
+ * perf code: 0x05
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
+ * perf code: 0x06
+ * Available model: HSW ULT only
+ * Scope: Package (physical package)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "perf_event.h"
+
+#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+struct perf_cstate_msr {
+ u64 msr;
+ struct perf_pmu_events_attr *attr;
+ bool (*test)(int idx);
+};
+
+
+/* cstate_core PMU */
+
+static struct pmu cstate_core_pmu;
+static bool has_cstate_core;
+
+enum perf_cstate_core_id {
+ /*
+ * cstate_core events
+ */
+ PERF_CSTATE_CORE_C1_RES = 0,
+ PERF_CSTATE_CORE_C3_RES,
+ PERF_CSTATE_CORE_C6_RES,
+ PERF_CSTATE_CORE_C7_RES,
+
+ PERF_CSTATE_CORE_EVENT_MAX,
+};
+
+bool test_core(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES ||
+ idx == PERF_CSTATE_CORE_C7_RES)
+ return true;
+ break;
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_CSTATE_CORE_C1_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+
+static struct perf_cstate_msr core_msr[] = {
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
+};
+
+static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group core_events_attr_group = {
+ .name = "events",
+ .attrs = core_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
+static struct attribute *core_format_attrs[] = {
+ &format_attr_core_event.attr,
+ NULL,
+};
+
+static struct attribute_group core_format_attr_group = {
+ .name = "format",
+ .attrs = core_format_attrs,
+};
+
+static cpumask_t cstate_core_cpu_mask;
+static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+
+static struct attribute *cstate_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = cstate_cpumask_attrs,
+};
+
+static const struct attribute_group *core_attr_groups[] = {
+ &core_events_attr_group,
+ &core_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+/* cstate_core PMU end */
+
+
+/* cstate_pkg PMU */
+
+static struct pmu cstate_pkg_pmu;
+static bool has_cstate_pkg;
+
+enum perf_cstate_pkg_id {
+ /*
+ * cstate_pkg events
+ */
+ PERF_CSTATE_PKG_C2_RES = 0,
+ PERF_CSTATE_PKG_C3_RES,
+ PERF_CSTATE_PKG_C6_RES,
+ PERF_CSTATE_PKG_C7_RES,
+ PERF_CSTATE_PKG_C8_RES,
+ PERF_CSTATE_PKG_C9_RES,
+ PERF_CSTATE_PKG_C10_RES,
+
+ PERF_CSTATE_PKG_EVENT_MAX,
+};
+
+bool test_pkg(int idx)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6)
+ return false;
+
+ switch (boot_cpu_data.x86_model) {
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
+
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
+ if (idx == PERF_CSTATE_CORE_C3_RES ||
+ idx == PERF_CSTATE_CORE_C6_RES ||
+ idx == PERF_CSTATE_CORE_C7_RES)
+ return true;
+ break;
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
+
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+ case 61: /* 14nm Broadwell Core-M */
+ case 86: /* 14nm Broadwell Xeon D */
+ case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+ case 79: /* 14nm Broadwell Server */
+
+ case 78: /* 14nm Skylake Mobile */
+ case 94: /* 14nm Skylake Desktop */
+ if (idx == PERF_CSTATE_PKG_C2_RES ||
+ idx == PERF_CSTATE_PKG_C3_RES ||
+ idx == PERF_CSTATE_PKG_C6_RES ||
+ idx == PERF_CSTATE_PKG_C7_RES)
+ return true;
+ break;
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case 76: /* 14nm Atom "Airmont" */
+ if (idx == PERF_CSTATE_CORE_C6_RES)
+ return true;
+ break;
+ case 69: /* 22nm Haswell ULT */
+ if (idx == PERF_CSTATE_PKG_C2_RES ||
+ idx == PERF_CSTATE_PKG_C3_RES ||
+ idx == PERF_CSTATE_PKG_C6_RES ||
+ idx == PERF_CSTATE_PKG_C7_RES ||
+ idx == PERF_CSTATE_PKG_C8_RES ||
+ idx == PERF_CSTATE_PKG_C9_RES ||
+ idx == PERF_CSTATE_PKG_C10_RES)
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
+
+static struct perf_cstate_msr pkg_msr[] = {
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
+};
+
+static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
+ NULL,
+};
+
+static struct attribute_group pkg_events_attr_group = {
+ .name = "events",
+ .attrs = pkg_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
+static struct attribute *pkg_format_attrs[] = {
+ &format_attr_pkg_event.attr,
+ NULL,
+};
+static struct attribute_group pkg_format_attr_group = {
+ .name = "format",
+ .attrs = pkg_format_attrs,
+};
+
+static cpumask_t cstate_pkg_cpu_mask;
+
+static const struct attribute_group *pkg_attr_groups[] = {
+ &pkg_events_attr_group,
+ &pkg_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+/* cstate_pkg PMU end*/
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (pmu == &cstate_core_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+ else if (pmu == &cstate_pkg_pmu)
+ return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+ else
+ return 0;
+}
+
+static int cstate_pmu_event_init(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+ int ret = 0;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
+ if (event->pmu == &cstate_core_pmu) {
+ if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
+ return -EINVAL;
+ if (!core_msr[cfg].attr)
+ return -EINVAL;
+ event->hw.event_base = core_msr[cfg].msr;
+ } else if (event->pmu == &cstate_pkg_pmu) {
+ if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+ return -EINVAL;
+ if (!pkg_msr[cfg].attr)
+ return -EINVAL;
+ event->hw.event_base = pkg_msr[cfg].msr;
+ } else
+ return -ENOENT;
+
+ /* must be done before validate_group */
+ event->hw.config = cfg;
+ event->hw.idx = -1;
+
+ return ret;
+}
+
+static inline u64 cstate_pmu_read_counter(struct perf_event *event)
+{
+ u64 val;
+
+ rdmsrl(event->hw.event_base, val);
+ return val;
+}
+
+static void cstate_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_raw_count, new_raw_count;
+
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = cstate_pmu_read_counter(event);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ local64_add(new_raw_count - prev_raw_count, &event->count);
+}
+
+static void cstate_pmu_event_start(struct perf_event *event, int mode)
+{
+ local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
+}
+
+static void cstate_pmu_event_stop(struct perf_event *event, int mode)
+{
+ cstate_pmu_event_update(event);
+}
+
+static void cstate_pmu_event_del(struct perf_event *event, int mode)
+{
+ cstate_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int cstate_pmu_event_add(struct perf_event *event, int mode)
+{
+ if (mode & PERF_EF_START)
+ cstate_pmu_event_start(event, mode);
+
+ return 0;
+}
+
+static void cstate_cpu_exit(int cpu)
+{
+ int i, id, target;
+
+ /* cpu exit for cstate core */
+ if (has_cstate_core) {
+ id = topology_core_id(cpu);
+ target = -1;
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (id == topology_core_id(i)) {
+ target = i;
+ break;
+ }
+ }
+ if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+ cpumask_set_cpu(target, &cstate_core_cpu_mask);
+ WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
+ if (target >= 0)
+ perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+ }
+
+ /* cpu exit for cstate pkg */
+ if (has_cstate_pkg) {
+ id = topology_physical_package_id(cpu);
+ target = -1;
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (id == topology_physical_package_id(i)) {
+ target = i;
+ break;
+ }
+ }
+ if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+ cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+ WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
+ if (target >= 0)
+ perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+ }
+}
+
+static void cstate_cpu_init(int cpu)
+{
+ int i, id;
+
+ /* cpu init for cstate core */
+ if (has_cstate_core) {
+ id = topology_core_id(cpu);
+ for_each_cpu(i, &cstate_core_cpu_mask) {
+ if (id == topology_core_id(i))
+ break;
+ }
+ if (i >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+ }
+
+ /* cpu init for cstate pkg */
+ if (has_cstate_pkg) {
+ id = topology_physical_package_id(cpu);
+ for_each_cpu(i, &cstate_pkg_cpu_mask) {
+ if (id == topology_physical_package_id(i))
+ break;
+ }
+ if (i >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+ }
+}
+
+static int cstate_cpu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ break;
+ case CPU_STARTING:
+ cstate_cpu_init(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DYING:
+ break;
+ case CPU_ONLINE:
+ case CPU_DEAD:
+ break;
+ case CPU_DOWN_PREPARE:
+ cstate_cpu_exit(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there is no available events.
+ */
+static bool cstate_probe_msr(struct perf_cstate_msr *msr,
+ struct attribute **events_attrs,
+ int max_event_nr)
+{
+ int i, j = 0;
+ u64 val;
+
+ /* Probe the cstate events. */
+ for (i = 0; i < max_event_nr; i++) {
+ if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+ msr[i].attr = NULL;
+ }
+
+ /* List remaining events in the sysfs attrs. */
+ for (i = 0; i < max_event_nr; i++) {
+ if (msr[i].attr)
+ events_attrs[j++] = &msr[i].attr->attr.attr;
+ }
+ events_attrs[j] = NULL;
+
+ return (j > 0) ? true : false;
+}
+
+static int __init cstate_init(void)
+{
+ /* SLM has different MSR for PKG C6 */
+ switch (boot_cpu_data.x86_model) {
+ case 55:
+ case 76:
+ case 77:
+ pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+ }
+
+ if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
+ has_cstate_core = true;
+
+ if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
+ has_cstate_pkg = true;
+
+ return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static void __init cstate_cpumask_init(void)
+{
+ int cpu;
+
+ cpu_notifier_register_begin();
+
+ for_each_online_cpu(cpu)
+ cstate_cpu_init(cpu);
+
+ __perf_cpu_notifier(cstate_cpu_notifier);
+
+ cpu_notifier_register_done();
+}
+
+static struct pmu cstate_core_pmu = {
+ .attr_groups = core_attr_groups,
+ .name = "cstate_core",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add, /* must have */
+ .del = cstate_pmu_event_del, /* must have */
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static struct pmu cstate_pkg_pmu = {
+ .attr_groups = pkg_attr_groups,
+ .name = "cstate_pkg",
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cstate_pmu_event_init,
+ .add = cstate_pmu_event_add, /* must have */
+ .del = cstate_pmu_event_del, /* must have */
+ .start = cstate_pmu_event_start,
+ .stop = cstate_pmu_event_stop,
+ .read = cstate_pmu_event_update,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static void __init cstate_pmus_register(void)
+{
+ int err;
+
+ if (has_cstate_core) {
+ err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+ if (WARN_ON(err))
+ pr_info("Failed to register PMU %s error %d\n",
+ cstate_core_pmu.name, err);
+ }
+
+ if (has_cstate_pkg) {
+ err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+ if (WARN_ON(err))
+ pr_info("Failed to register PMU %s error %d\n",
+ cstate_pkg_pmu.name, err);
+ }
+}
+
+static int __init cstate_pmu_init(void)
+{
+ int err;
+
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
+ err = cstate_init();
+ if (err)
+ return err;
+
+ cstate_cpumask_init();
+
+ cstate_pmus_register();
+
+ return 0;
+}
+
+device_initcall(cstate_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 84f236ab96b0..5db1c7755548 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -510,10 +510,11 @@ int intel_pmu_drain_bts_buffer(void)
u64 flags;
};
struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
- struct bts_record *at, *top;
+ struct bts_record *at, *base, *top;
struct perf_output_handle handle;
struct perf_event_header header;
struct perf_sample_data data;
+ unsigned long skip = 0;
struct pt_regs regs;
if (!event)
@@ -522,10 +523,10 @@ int intel_pmu_drain_bts_buffer(void)
if (!x86_pmu.bts_active)
return 0;
- at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
- top = (struct bts_record *)(unsigned long)ds->bts_index;
+ base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+ top = (struct bts_record *)(unsigned long)ds->bts_index;
- if (top <= at)
+ if (top <= base)
return 0;
memset(&regs, 0, sizeof(regs));
@@ -535,16 +536,43 @@ int intel_pmu_drain_bts_buffer(void)
perf_sample_data_init(&data, 0, event->hw.last_period);
/*
+ * BTS leaks kernel addresses in branches across the cpl boundary,
+ * such as traps or system calls, so unless the user is asking for
+ * kernel tracing (and right now it's not possible), we'd need to
+ * filter them out. But first we need to count how many of those we
+ * have in the current batch. This is an extra O(n) pass, however,
+ * it's much faster than the other one especially considering that
+ * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
+ * alloc_bts_buffer()).
+ */
+ for (at = base; at < top; at++) {
+ /*
+ * Note that right now *this* BTS code only works if
+ * attr::exclude_kernel is set, but let's keep this extra
+ * check here in case that changes.
+ */
+ if (event->attr.exclude_kernel &&
+ (kernel_ip(at->from) || kernel_ip(at->to)))
+ skip++;
+ }
+
+ /*
* Prepare a generic sample, i.e. fill in the invariant fields.
* We will overwrite the from and to address before we output
* the sample.
*/
perf_prepare_sample(&header, &data, event, &regs);
- if (perf_output_begin(&handle, event, header.size * (top - at)))
+ if (perf_output_begin(&handle, event, header.size *
+ (top - base - skip)))
return 1;
- for (; at < top; at++) {
+ for (at = base; at < top; at++) {
+ /* Filter out any records that contain kernel addresses. */
+ if (event->attr.exclude_kernel &&
+ (kernel_ip(at->from) || kernel_ip(at->to)))
+ continue;
+
data.ip = at->from;
data.addr = at->to;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index b2c9475b7ff2..bfd0b717e944 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -151,10 +151,10 @@ static void __intel_pmu_lbr_enable(bool pmi)
* No need to reprogram LBR_SELECT in a PMI, as it
* did not change.
*/
- if (cpuc->lbr_sel && !pmi) {
+ if (cpuc->lbr_sel)
lbr_select = cpuc->lbr_sel->config;
+ if (!pmi)
wrmsrl(MSR_LBR_SELECT, lbr_select);
- }
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
orig_debugctl = debugctl;
@@ -555,6 +555,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
mask |= X86_BR_IND_JMP;
+ if (br_type & PERF_SAMPLE_BRANCH_CALL)
+ mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
@@ -890,6 +892,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
};
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -905,6 +908,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
| LBR_RETURN | LBR_CALL_STACK,
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
};
/* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 42169283448b..868e1194337f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -139,9 +139,6 @@ static int __init pt_pmu_hw_init(void)
long i;
attrs = NULL;
- ret = -ENODEV;
- if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
- goto fail;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
@@ -1130,6 +1127,10 @@ static __init int pt_init(void)
int ret, cpu, prior_warn = 0;
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+
+ if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+ return -ENODEV;
+
get_online_cpus();
for_each_online_cpu(cpu) {
u64 ctl;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 560e5255b15e..61215a69b03d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
static bool pcidrv_registered;
struct pci_driver *uncore_pci_driver;
/* pci bus to socket mapping */
-int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
static DEFINE_RAW_SPINLOCK(uncore_box_lock);
@@ -20,6 +21,59 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
+int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+ struct pci2phy_map *map;
+ int phys_id = -1;
+
+ raw_spin_lock(&pci2phy_map_lock);
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == pci_domain_nr(bus)) {
+ phys_id = map->pbus_to_physid[bus->number];
+ break;
+ }
+ }
+ raw_spin_unlock(&pci2phy_map_lock);
+
+ return phys_id;
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+ struct pci2phy_map *map, *alloc = NULL;
+ int i;
+
+ lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == segment)
+ goto end;
+ }
+
+ if (!alloc) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+ raw_spin_lock(&pci2phy_map_lock);
+
+ if (!alloc)
+ return NULL;
+
+ goto lookup;
+ }
+
+ map = alloc;
+ alloc = NULL;
+ map->segment = segment;
+ for (i = 0; i < 256; i++)
+ map->pbus_to_physid[i] = -1;
+ list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+ kfree(alloc);
+ return map;
+}
+
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -809,7 +863,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
int phys_id;
bool first_box = false;
- phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+ phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
@@ -856,9 +910,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
struct intel_uncore_pmu *pmu;
- int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+ int i, cpu, phys_id;
bool last_box = false;
+ phys_id = uncore_pcibus_to_physid(pdev->bus);
box = pci_get_drvdata(pdev);
if (!box) {
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 72c54c2e5b1a..2f0a4a98e16b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -117,6 +117,15 @@ struct uncore_event_desc {
const char *config;
};
+struct pci2phy_map {
+ struct list_head list;
+ int segment;
+ int pbus_to_physid[256];
+};
+
+int uncore_pcibus_to_physid(struct pci_bus *bus);
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
@@ -317,7 +326,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
extern struct pci_driver *uncore_pci_driver;
-extern int uncore_pcibus_to_physid[256];
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
extern struct event_constraint uncore_constraint_empty;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index f78574b3cb55..845256158a10 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -420,15 +420,25 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
static int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
- int bus;
+ struct pci2phy_map *map;
+ int bus, segment;
dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
if (!dev)
return -ENOTTY;
bus = dev->bus->number;
-
- uncore_pcibus_to_physid[bus] = 0;
+ segment = pci_domain_nr(dev->bus);
+
+ raw_spin_lock(&pci2phy_map_lock);
+ map = __find_pci2phy_map(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ pci_dev_put(dev);
+ return -ENOMEM;
+ }
+ map->pbus_to_physid[bus] = 0;
+ raw_spin_unlock(&pci2phy_map_lock);
pci_dev_put(dev);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 694510a887dc..f0f4fcba252e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1087,7 +1087,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
static int snbep_pci2phy_map_init(int devid)
{
struct pci_dev *ubox_dev = NULL;
- int i, bus, nodeid;
+ int i, bus, nodeid, segment;
+ struct pci2phy_map *map;
int err = 0;
u32 config = 0;
@@ -1106,16 +1107,27 @@ static int snbep_pci2phy_map_init(int devid)
err = pci_read_config_dword(ubox_dev, 0x54, &config);
if (err)
break;
+
+ segment = pci_domain_nr(ubox_dev->bus);
+ raw_spin_lock(&pci2phy_map_lock);
+ map = __find_pci2phy_map(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ err = -ENOMEM;
+ break;
+ }
+
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
*/
for (i = 0; i < 8; i++) {
if (nodeid == ((config >> (3 * i)) & 0x7)) {
- uncore_pcibus_to_physid[bus] = i;
+ map->pbus_to_physid[bus] = i;
break;
}
}
+ raw_spin_unlock(&pci2phy_map_lock);
}
if (!err) {
@@ -1123,13 +1135,17 @@ static int snbep_pci2phy_map_init(int devid)
* For PCI bus with no UBOX device, find the next bus
* that has UBOX device and use its mapping.
*/
- i = -1;
- for (bus = 255; bus >= 0; bus--) {
- if (uncore_pcibus_to_physid[bus] >= 0)
- i = uncore_pcibus_to_physid[bus];
- else
- uncore_pcibus_to_physid[bus] = i;
+ raw_spin_lock(&pci2phy_map_lock);
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ i = -1;
+ for (bus = 255; bus >= 0; bus--) {
+ if (map->pbus_to_physid[bus] >= 0)
+ i = map->pbus_to_physid[bus];
+ else
+ map->pbus_to_physid[bus] = i;
+ }
}
+ raw_spin_unlock(&pci2phy_map_lock);
}
pci_dev_put(ubox_dev);
@@ -2444,7 +2460,7 @@ static struct intel_uncore_type *bdx_pci_uncores[] = {
NULL,
};
-static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
+static const struct pci_device_id bdx_uncore_pci_ids[] = {
{ /* Home Agent 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f9cc682e561..db9a675e751b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -584,7 +584,7 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
static void __init force_disable_hpet(int num, int slot, int func)
{
#ifdef CONFIG_HPET_TIMER
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
#endif
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 0e2d96ffd158..6bc9ae24b6d2 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -152,7 +152,7 @@ ENTRY(startup_32)
movl %eax, pa(olpc_ofw_pgd)
#endif
-#ifdef CONFIG_MICROCODE_EARLY
+#ifdef CONFIG_MICROCODE
/* Early load ucode on BSP. */
call load_ucode_bsp
#endif
@@ -311,12 +311,11 @@ ENTRY(startup_32_smp)
movl %eax,%ss
leal -__PAGE_OFFSET(%ecx),%esp
-#ifdef CONFIG_MICROCODE_EARLY
+#ifdef CONFIG_MICROCODE
/* Early load ucode on AP. */
call load_ucode_ap
#endif
-
default_entry:
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 88b4da373081..b8e6ff5cd5d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -37,10 +37,10 @@
*/
unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
-u8 hpet_msi_disable;
+bool hpet_msi_disable;
#ifdef CONFIG_PCI_MSI
-static unsigned long hpet_num_timers;
+static unsigned int hpet_num_timers;
#endif
static void __iomem *hpet_virt_address;
@@ -86,9 +86,9 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-int boot_hpet_disable;
-int hpet_force_user;
-static int hpet_verbose;
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
static int __init hpet_setup(char *str)
{
@@ -98,11 +98,11 @@ static int __init hpet_setup(char *str)
if (next)
*next++ = 0;
if (!strncmp("disable", str, 7))
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
if (!strncmp("force", str, 5))
- hpet_force_user = 1;
+ hpet_force_user = true;
if (!strncmp("verbose", str, 7))
- hpet_verbose = 1;
+ hpet_verbose = true;
str = next;
}
return 1;
@@ -111,7 +111,7 @@ __setup("hpet=", hpet_setup);
static int __init disable_hpet(char *str)
{
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
return 1;
}
__setup("nohpet", disable_hpet);
@@ -124,7 +124,7 @@ static inline int is_hpet_capable(void)
/*
* HPET timer interrupt enable / disable
*/
-static int hpet_legacy_int_enabled;
+static bool hpet_legacy_int_enabled;
/**
* is_hpet_enabled - check whether the hpet timer interrupt is enabled
@@ -230,7 +230,7 @@ static struct clock_event_device hpet_clockevent;
static void hpet_stop_counter(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ u32 cfg = hpet_readl(HPET_CFG);
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -272,7 +272,7 @@ static void hpet_enable_legacy_int(void)
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
- hpet_legacy_int_enabled = 1;
+ hpet_legacy_int_enabled = true;
}
static void hpet_legacy_clockevent_register(void)
@@ -983,7 +983,7 @@ void hpet_disable(void)
cfg = *hpet_boot_cfg;
else if (hpet_legacy_int_enabled) {
cfg &= ~HPET_CFG_LEGACY;
- hpet_legacy_int_enabled = 0;
+ hpet_legacy_int_enabled = false;
}
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
@@ -1121,8 +1121,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
static void hpet_disable_rtc_channel(void)
{
- unsigned long cfg;
- cfg = hpet_readl(HPET_T1_CFG);
+ u32 cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1b55de1267cf..cd99433b8ba1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -131,11 +131,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
+ if (!*dev)
+ *dev = &x86_dma_fallback_dev;
+
*gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
- if (!*dev)
- *dev = &x86_dma_fallback_dev;
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 39e585a554b7..9f7c21c22477 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -84,6 +84,9 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
memcpy(dst, src, arch_task_struct_size);
+#ifdef CONFIG_VM86
+ dst->thread.vm86 = NULL;
+#endif
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
@@ -550,14 +553,14 @@ unsigned long get_wchan(struct task_struct *p)
if (sp < bottom || sp > top)
return 0;
- fp = READ_ONCE(*(unsigned long *)sp);
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
do {
if (fp < bottom || fp > top)
return 0;
- ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long)));
+ ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
if (!in_sched_functions(ip))
return ip;
- fp = READ_ONCE(*(unsigned long *)fp);
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 176a0f99d4da..cc457ff818ad 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -524,7 +524,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
*/
static void force_disable_hpet_msi(struct pci_dev *unused)
{
- hpet_msi_disable = 1;
+ hpet_msi_disable = true;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fdb7f2a2d328..a1e4da98c8f0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -111,6 +111,7 @@
#include <asm/mce.h>
#include <asm/alternative.h>
#include <asm/prom.h>
+#include <asm/microcode.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -480,34 +481,34 @@ static void __init memblock_x86_reserve_range_setup_data(void)
#ifdef CONFIG_KEXEC_CORE
+/* 16M alignment for crash kernel regions */
+#define CRASH_ALIGN (16 << 20)
+
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
* On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
-# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20)
-# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20)
+# define CRASH_ADDR_LOW_MAX (512 << 20)
+# define CRASH_ADDR_HIGH_MAX (512 << 20)
#else
-# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20)
-# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM
+# define CRASH_ADDR_LOW_MAX (896UL << 20)
+# define CRASH_ADDR_HIGH_MAX MAXMEM
#endif
-static void __init reserve_crashkernel_low(void)
+static int __init reserve_crashkernel_low(void)
{
#ifdef CONFIG_X86_64
- const unsigned long long alignment = 16<<20; /* 16M */
- unsigned long long low_base = 0, low_size = 0;
+ unsigned long long base, low_base = 0, low_size = 0;
unsigned long total_low_mem;
- unsigned long long base;
- bool auto_set = false;
int ret;
- total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+ total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
+
/* crashkernel=Y,low */
- ret = parse_crashkernel_low(boot_command_line, total_low_mem,
- &low_size, &base);
- if (ret != 0) {
+ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
+ if (ret) {
/*
* two parts from lib/swiotlb.c:
* -swiotlb size: user-specified with swiotlb= or default.
@@ -517,52 +518,52 @@ static void __init reserve_crashkernel_low(void)
* make sure we allocate enough extra low memory so that we
* don't run out of DMA buffers for 32-bit devices.
*/
- low_size = max(swiotlb_size_or_default() + (8UL<<20), 256UL<<20);
- auto_set = true;
+ low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
} else {
/* passed with crashkernel=0,low ? */
if (!low_size)
- return;
+ return 0;
}
- low_base = memblock_find_in_range(low_size, (1ULL<<32),
- low_size, alignment);
-
+ low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN);
if (!low_base) {
- if (!auto_set)
- pr_info("crashkernel low reservation failed - No suitable area found.\n");
+ pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
+ (unsigned long)(low_size >> 20));
+ return -ENOMEM;
+ }
- return;
+ ret = memblock_reserve(low_base, low_size);
+ if (ret) {
+ pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
+ return ret;
}
- memblock_reserve(low_base, low_size);
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
- (unsigned long)(low_size >> 20),
- (unsigned long)(low_base >> 20),
- (unsigned long)(total_low_mem >> 20));
+ (unsigned long)(low_size >> 20),
+ (unsigned long)(low_base >> 20),
+ (unsigned long)(total_low_mem >> 20));
+
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
insert_resource(&iomem_resource, &crashk_low_res);
#endif
+ return 0;
}
static void __init reserve_crashkernel(void)
{
- const unsigned long long alignment = 16<<20; /* 16M */
- unsigned long long total_mem;
- unsigned long long crash_size, crash_base;
+ unsigned long long crash_size, crash_base, total_mem;
bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
/* crashkernel=XM */
- ret = parse_crashkernel(boot_command_line, total_mem,
- &crash_size, &crash_base);
+ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
if (ret != 0 || crash_size <= 0) {
/* crashkernel=X,high */
ret = parse_crashkernel_high(boot_command_line, total_mem,
- &crash_size, &crash_base);
+ &crash_size, &crash_base);
if (ret != 0 || crash_size <= 0)
return;
high = true;
@@ -573,11 +574,10 @@ static void __init reserve_crashkernel(void)
/*
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
- crash_base = memblock_find_in_range(alignment,
- high ? CRASH_KERNEL_ADDR_HIGH_MAX :
- CRASH_KERNEL_ADDR_LOW_MAX,
- crash_size, alignment);
-
+ crash_base = memblock_find_in_range(CRASH_ALIGN,
+ high ? CRASH_ADDR_HIGH_MAX
+ : CRASH_ADDR_LOW_MAX,
+ crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
@@ -587,26 +587,32 @@ static void __init reserve_crashkernel(void)
unsigned long long start;
start = memblock_find_in_range(crash_base,
- crash_base + crash_size, crash_size, 1<<20);
+ crash_base + crash_size,
+ crash_size, 1 << 20);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
- memblock_reserve(crash_base, crash_size);
+ ret = memblock_reserve(crash_base, crash_size);
+ if (ret) {
+ pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
+ return;
+ }
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crash_base >> 20),
- (unsigned long)(total_mem >> 20));
+ if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
+ memblock_free(crash_base, crash_size);
+ return;
+ }
+
+ pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
-
- if (crash_base >= (1ULL<<32))
- reserve_crashkernel_low();
}
#else
static void __init reserve_crashkernel(void)
@@ -1079,8 +1085,10 @@ void __init setup_arch(char **cmdline_p)
memblock_set_current_limit(ISA_END_ADDRESS);
memblock_x86_fill();
- if (efi_enabled(EFI_BOOT))
+ if (efi_enabled(EFI_BOOT)) {
+ efi_fake_memmap();
efi_find_mirror();
+ }
/*
* The EFI specification says that boot service code won't be called
@@ -1173,6 +1181,14 @@ void __init setup_arch(char **cmdline_p)
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
#endif
tboot_probe();
@@ -1234,6 +1250,8 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_apply_memmap_quirks();
#endif
+
+ microcode_init();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e0c198e5f920..892ee2e5ecbc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
*/
#define UDELAY_10MS_DEFAULT 10000
-static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
+static unsigned int init_udelay = INT_MAX;
static int __init cpu_init_udelay(char *str)
{
@@ -522,13 +522,16 @@ early_param("cpu_init_udelay", cpu_init_udelay);
static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
- if (init_udelay != UDELAY_10MS_DEFAULT)
+ if (init_udelay != INT_MAX)
return;
/* if modern processor, use no delay */
if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
init_udelay = 0;
+
+ /* else, use legacy delay */
+ init_udelay = UDELAY_10MS_DEFAULT;
}
/*
@@ -657,7 +660,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(300);
pr_debug("Startup point 1\n");
@@ -668,7 +673,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(200);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c3f7602cd038..c7c4d9c51e99 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -168,21 +168,20 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
- * into a shift.
+ * into a shift. The larger SC is, the more accurate the conversion, but
+ * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication
+ * (64-bit result) can be used.
*
- * We can use khz divisor instead of mhz to keep a better precision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * We can use khz divisor instead of mhz to keep a better precision.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
static void cyc2ns_data_init(struct cyc2ns_data *data)
{
data->cyc2ns_mul = 0;
- data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+ data->cyc2ns_shift = 0;
data->cyc2ns_offset = 0;
data->__count = 0;
}
@@ -216,14 +215,14 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
if (likely(data == tail)) {
ns = data->cyc2ns_offset;
- ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+ ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
} else {
data->__count++;
barrier();
ns = data->cyc2ns_offset;
- ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+ ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
barrier();
@@ -257,12 +256,22 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
- data->cyc2ns_mul =
- DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
- cpu_khz);
- data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+ clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+ NSEC_PER_MSEC, 0);
+
+ /*
+ * cyc2ns_shift is exported via arch_perf_update_userpage() where it is
+ * not expected to be greater than 31 due to the original published
+ * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
+ * value) - refer perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (data->cyc2ns_shift == 32) {
+ data->cyc2ns_shift = 31;
+ data->cyc2ns_mul >>= 1;
+ }
+
data->cyc2ns_offset = ns_now -
- mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+ mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
cyc2ns_write_end(cpu, data);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b372a7557c16..9da95b9daf8d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
u64 val, cr0, cr4;
u32 base3;
u16 selector;
- int i;
+ int i, r;
for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
@@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smbase, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return X86EMUL_CONTINUE;
}
static int em_rsm(struct x86_emulate_ctxt *ctxt)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06ef4908ba61..6a8bc64566ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4105,17 +4105,13 @@ static void seg_setup(int seg)
static int alloc_apic_access_page(struct kvm *kvm)
{
struct page *page;
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
goto out;
- kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
if (r)
goto out;
@@ -4140,17 +4136,12 @@ static int alloc_identity_pagetable(struct kvm *kvm)
{
/* Called with kvm->slots_lock held. */
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
BUG_ON(kvm->arch.ept_identity_pagetable_done);
- kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr =
- kvm->arch.ept_identity_map_addr;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ kvm->arch.ept_identity_map_addr, PAGE_SIZE);
return r;
}
@@ -4949,14 +4940,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
- struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
- .guest_phys_addr = addr,
- .memory_size = PAGE_SIZE * 3,
- .flags = 0,
- };
- ret = x86_set_memory_region(kvm, &tss_mem);
+ ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+ PAGE_SIZE * 3);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 92511d4b7236..9a9a19830321 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6453,6 +6453,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
return 1;
}
+static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ !vcpu->arch.apf.halted);
+}
+
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
@@ -6461,8 +6467,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
for (;;) {
- if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
+ if (kvm_vcpu_running(vcpu))
r = vcpu_enter_guest(vcpu);
else
r = vcpu_block(kvm, vcpu);
@@ -7474,34 +7479,66 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
+ unsigned long hva;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *slot, old;
/* Called with kvm->slots_lock held. */
- BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+ return -EINVAL;
+
+ slot = id_to_memslot(slots, id);
+ if (size) {
+ if (WARN_ON(slot->npages))
+ return -EEXIST;
+
+ /*
+ * MAP_SHARED to prevent internal slot pages from being moved
+ * by fork()/COW.
+ */
+ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *)hva))
+ return PTR_ERR((void *)hva);
+ } else {
+ if (!slot->npages)
+ return 0;
+ hva = 0;
+ }
+
+ old = *slot;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m = *mem;
+ struct kvm_userspace_memory_region m;
- m.slot |= i << 16;
+ m.slot = id | (i << 16);
+ m.flags = 0;
+ m.guest_phys_addr = gpa;
+ m.userspace_addr = hva;
+ m.memory_size = size;
r = __kvm_set_memory_region(kvm, &m);
if (r < 0)
return r;
}
+ if (!size) {
+ r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+ WARN_ON(r < 0);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(__x86_set_memory_region);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int r;
mutex_lock(&kvm->slots_lock);
- r = __x86_set_memory_region(kvm, mem);
+ r = __x86_set_memory_region(kvm, id, gpa, size);
mutex_unlock(&kvm->slots_lock);
return r;
@@ -7516,16 +7553,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
* unless the the memory map has changed due to process exit
* or fd copying.
*/
- struct kvm_userspace_memory_region mem;
- memset(&mem, 0, sizeof(mem));
- mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = TSS_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
@@ -7628,27 +7658,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- /*
- * Only private memory slots need to be mapped here since
- * KVM_SET_MEMORY_REGION ioctl is no longer supported.
- */
- if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
- unsigned long userspace_addr;
-
- /*
- * MAP_SHARED to prevent internal slot pages from being moved
- * by fork()/COW.
- */
- userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, 0);
-
- if (IS_ERR((void *)userspace_addr))
- return PTR_ERR((void *)userspace_addr);
-
- memslot->userspace_addr = userspace_addr;
- }
-
return 0;
}
@@ -7710,17 +7719,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int nr_mmu_pages = 0;
- if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
- int ret;
-
- ret = vm_munmap(old->userspace_addr,
- old->npages * PAGE_SIZE);
- if (ret < 0)
- printk(KERN_WARNING
- "kvm_vm_ioctl_set_memory_region: "
- "failed to munmap memory\n");
- }
-
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -7769,19 +7767,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_mmu_invalidate_zap_all_pages(kvm);
}
+static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+{
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+
+ if (kvm_apic_has_events(vcpu))
+ return true;
+
+ if (vcpu->arch.pv.pv_unhalted)
+ return true;
+
+ if (atomic_read(&vcpu->arch.nmi_queued))
+ return true;
+
+ if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ return true;
+
+ if (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_has_interrupt(vcpu))
+ return true;
+
+ return false;
+}
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
kvm_x86_ops->check_nested_events(vcpu, false);
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
- || !list_empty_careful(&vcpu->async_pf.done)
- || kvm_apic_has_events(vcpu)
- || vcpu->arch.pv.pv_unhalted
- || atomic_read(&vcpu->arch.nmi_queued) ||
- (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu));
+ return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 816488c0b97e..d388de72eaca 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -353,8 +353,12 @@ AVXcode: 1
17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
18: Grp16 (1A)
19:
-1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
-1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c:
1d:
1e:
@@ -732,6 +736,12 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
# 0x0f 0x38 0xc0-0xff
+c8: sha1nexte Vdq,Wdq
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq
+cb: sha256rnds2 Vdq,Wdq
+cc: sha256msg1 Vdq,Wdq
+cd: sha256msg2 Vdq,Wdq
db: VAESIMC Vdq,Wdq (66),(v1)
dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -790,6 +800,7 @@ AVXcode: 3
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+cc: sha1rnds4 Vdq,Wdq,Ib
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
f0: RORX Gy,Ey,Ib (F2),(v)
EndTable
@@ -874,7 +885,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5:
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -888,6 +899,9 @@ EndTable
GrpTable: Grp9
1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable
@@ -932,8 +946,8 @@ GrpTable: Grp15
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE
5: XRSTOR | lfence (11B)
-6: XSAVEOPT | mfence (11B)
-7: clflush | sfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
EndTable
GrpTable: Grp16
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1d8a83df153a..1f37cb2b56a9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -693,14 +693,12 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
-#ifdef CONFIG_MICROCODE_EARLY
/*
* Remember, initrd memory may contain microcode or other useful things.
* Before we lose initrd mem, we need to find a place to hold them
* now that normal virtual memory is enabled.
*/
save_microcode_in_initrd();
-#endif
/*
* end could be not aligned, and We can not align that,
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2c44c0792301..050a092b8d9a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -647,9 +647,12 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
- if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
- PFN_DOWN(__pa(address)) + 1))
- split_page_count(level);
+ if (virt_addr_valid(address)) {
+ unsigned long pfn = PFN_DOWN(__pa(address));
+
+ if (pfn_range_is_mapped(pfn, pfn + 1))
+ split_page_count(level);
+ }
/*
* Install the new, split up pagetable.
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d7f997f7c26d..ea48449b2e63 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -50,11 +50,16 @@ void __init efi_bgrt_init(void)
bgrt_tab->version);
return;
}
- if (bgrt_tab->status != 1) {
- pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
+ if (bgrt_tab->status & 0xfe) {
+ pr_err("Ignoring BGRT: reserved status bits are non-zero %u\n",
bgrt_tab->status);
return;
}
+ if (bgrt_tab->status != 1) {
+ pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n",
+ bgrt_tab->status);
+ return;
+ }
if (bgrt_tab->image_type != 0) {
pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
bgrt_tab->image_type);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 6a28ded74211..ad285404ea7f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -194,7 +194,7 @@ static void __init do_add_efi_memmap(void)
int __init efi_memblock_x86_reserve_range(void)
{
struct efi_info *e = &boot_params.efi_info;
- unsigned long pmap;
+ phys_addr_t pmap;
if (efi_enabled(EFI_PARAVIRT))
return 0;
@@ -209,7 +209,7 @@ int __init efi_memblock_x86_reserve_range(void)
#else
pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
#endif
- memmap.phys_map = (void *)pmap;
+ memmap.phys_map = pmap;
memmap.nr_map = e->efi_memmap_size /
e->efi_memdesc_size;
memmap.desc_size = e->efi_memdesc_size;
@@ -222,7 +222,7 @@ int __init efi_memblock_x86_reserve_range(void)
return 0;
}
-static void __init print_efi_memmap(void)
+void __init efi_print_memmap(void)
{
#ifdef EFI_DEBUG
efi_memory_desc_t *md;
@@ -524,7 +524,7 @@ void __init efi_init(void)
return;
if (efi_enabled(EFI_DBG))
- print_efi_memmap();
+ efi_print_memmap();
efi_esrt_init();
}
@@ -1017,24 +1017,6 @@ u32 efi_mem_type(unsigned long phys_addr)
return 0;
}
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
- void *p;
-
- if (!efi_enabled(EFI_MEMMAP))
- return 0;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) &&
- (phys_addr < (md->phys_addr +
- (md->num_pages << EFI_PAGE_SHIFT))))
- return md->attribute;
- }
- return 0;
-}
-
static int __init arch_parse_efi_cmdline(char *str)
{
if (!str) {
@@ -1044,8 +1026,6 @@ static int __init arch_parse_efi_cmdline(char *str)
if (parse_option_str(str, "old_map"))
set_bit(EFI_OLD_MEMMAP, &efi.flags);
- if (parse_option_str(str, "debug"))
- set_bit(EFI_DBG, &efi.flags);
return 0;
}
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index 10fea5fc821e..df280da34825 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -1,11 +1,9 @@
config AMD_MCE_INJ
tristate "Simple MCE injection interface for AMD processors"
- depends on RAS && EDAC_DECODE_MCE && DEBUG_FS
+ depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
default n
help
This is a simple debugfs interface to inject MCEs and test different
aspects of the MCE handling code.
WARNING: Do not even assume this interface is staying stable!
-
-
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 17e35b5bf779..55d38cfa46c2 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -17,7 +17,11 @@
#include <linux/cpu.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <linux/pci.h>
+
#include <asm/mce.h>
+#include <asm/amd_nb.h>
+#include <asm/irq_vectors.h>
#include "../kernel/cpu/mcheck/mce-internal.h"
@@ -30,16 +34,21 @@ static struct dentry *dfs_inj;
static u8 n_banks;
#define MAX_FLAG_OPT_SIZE 3
+#define NBCFG 0x44
enum injection_type {
SW_INJ = 0, /* SW injection, simply decode the error */
HW_INJ, /* Trigger a #MC */
+ DFR_INT_INJ, /* Trigger Deferred error interrupt */
+ THR_INT_INJ, /* Trigger threshold interrupt */
N_INJ_TYPES,
};
static const char * const flags_options[] = {
[SW_INJ] = "sw",
[HW_INJ] = "hw",
+ [DFR_INT_INJ] = "df",
+ [THR_INT_INJ] = "th",
NULL
};
@@ -129,12 +138,9 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
{
char buf[MAX_FLAG_OPT_SIZE], *__buf;
int err;
- size_t ret;
if (cnt > MAX_FLAG_OPT_SIZE)
- cnt = MAX_FLAG_OPT_SIZE;
-
- ret = cnt;
+ return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
@@ -150,9 +156,9 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
return err;
}
- *ppos += ret;
+ *ppos += cnt;
- return ret;
+ return cnt;
}
static const struct file_operations flags_fops = {
@@ -185,6 +191,55 @@ static void trigger_mce(void *info)
asm volatile("int $18");
}
+static void trigger_dfr_int(void *info)
+{
+ asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
+}
+
+static void trigger_thr_int(void *info)
+{
+ asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
+}
+
+static u32 get_nbc_for_node(int node_id)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u32 cores_per_node;
+
+ cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket();
+
+ return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ u32 val;
+ int err;
+
+ if (!F3)
+ return;
+
+ err = pci_read_config_dword(F3, NBCFG, &val);
+ if (err) {
+ pr_err("%s: Error reading F%dx%03x.\n",
+ __func__, PCI_FUNC(F3->devfn), NBCFG);
+ return;
+ }
+
+ if (val & BIT(27))
+ return;
+
+ pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
+ __func__);
+
+ val |= BIT(27);
+ err = pci_write_config_dword(F3, NBCFG, val);
+ if (err)
+ pr_err("%s: Error writing F%dx%03x.\n",
+ __func__, PCI_FUNC(F3->devfn), NBCFG);
+}
+
static void do_inject(void)
{
u64 mcg_status = 0;
@@ -205,6 +260,26 @@ static void do_inject(void)
if (!(i_mce.status & MCI_STATUS_PCC))
mcg_status |= MCG_STATUS_RIPV;
+ /*
+ * Ensure necessary status bits for deferred errors:
+ * - MCx_STATUS[Deferred]: make sure it is a deferred error
+ * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
+ */
+ if (inj_type == DFR_INT_INJ) {
+ i_mce.status |= MCI_STATUS_DEFERRED;
+ i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
+ }
+
+ /*
+ * For multi node CPUs, logging and reporting of bank 4 errors happens
+ * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
+ * Fam10h and later BKDGs.
+ */
+ if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+ toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+ cpu = get_nbc_for_node(amd_get_nb_id(cpu));
+ }
+
get_online_cpus();
if (!cpu_online(cpu))
goto err;
@@ -225,7 +300,16 @@ static void do_inject(void)
toggle_hw_mce_inject(cpu, false);
- smp_call_function_single(cpu, trigger_mce, NULL, 0);
+ switch (inj_type) {
+ case DFR_INT_INJ:
+ smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
+ break;
+ case THR_INT_INJ:
+ smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
+ break;
+ default:
+ smp_call_function_single(cpu, trigger_mce, NULL, 0);
+ }
err:
put_online_cpus();
@@ -290,6 +374,11 @@ static const char readme_msg[] =
"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
"\t before injecting.\n"
+"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
+"\t error APIC interrupt handler to handle the error if the feature is \n"
+"\t is present in hardware. \n"
+"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
+"\t APIC interrupt handler to handle the error. \n"
"\n";
static ssize_t
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 9701a4fd7bf2..836a1eb5df43 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -12,7 +12,10 @@
#include <skas.h>
#include <sysdep/tls.h>
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
+static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
+{
+ return syscall(__NR_modify_ldt, func, ptr, bytecount);
+}
static long write_ldt_entry(struct mm_id *mm_idp, int func,
struct user_desc *desc, void **addr, int done)