summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2021-03-12 12:32:54 +0100
committerBorislav Petkov <bp@suse.de>2021-03-15 16:24:59 +0100
commita89dfde3dc3c2dbf56910af75e2d8b11ec5308f6 (patch)
tree1f5d63a4ea45af7d513ce044362993174ad52694 /arch/x86/kernel
parent59eca2fa1934de42d8aa44d3bef655c92ea69703 (diff)
x86: Remove dynamic NOP selection
This ensures that a NOP is a NOP and not a random other instruction that is also a NOP. It allows simplification of dynamic code patching that wants to verify existing code before writing new instructions (ftrace, jump_label, static_call, etc..). Differentiating on NOPs is not a feature. This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS). 32bit is not a performance target. Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL being required for x86_64, all x86_64 CPUs can use NOPL. So stop caring about NOPs, simplify things and get on with life. [ The problem seems to be that some uarchs can only decode NOPL on a single front-end port while others have severe decode penalties for excessive prefixes. All modern uarchs can handle both, except Atom, which has prefix penalties. ] [ Also, much doubt you can actually measure any of this on normal workloads. ] After this, FEATURE_NOPL is unused except for required-features for x86_64. FEATURE_K8 is only used for PTI. [ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Acked-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> # bpf Acked-by: Linus Torvalds <torvalds@linuxfoundation.org> Link: https://lkml.kernel.org/r/20210312115749.065275711@infradead.org
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/alternative.c198
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/jump_label.c32
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/static_call.c4
7 files changed, 32 insertions, 214 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e46725d..fcac875c3d79 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -74,186 +74,30 @@ do { \
} \
} while (0)
-/*
- * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
- * that correspond to that nop. Getting from one nop to the next, we
- * add to the array the offset that is equal to the sum of all sizes of
- * nops preceding the one we are after.
- *
- * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
- * nice symmetry of sizes of the previous nops.
- */
-#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
-static const unsigned char intelnops[] =
+const unsigned char x86nops[] =
{
- GENERIC_NOP1,
- GENERIC_NOP2,
- GENERIC_NOP3,
- GENERIC_NOP4,
- GENERIC_NOP5,
- GENERIC_NOP6,
- GENERIC_NOP7,
- GENERIC_NOP8,
- GENERIC_NOP5_ATOMIC
-};
-static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
-{
- NULL,
- intelnops,
- intelnops + 1,
- intelnops + 1 + 2,
- intelnops + 1 + 2 + 3,
- intelnops + 1 + 2 + 3 + 4,
- intelnops + 1 + 2 + 3 + 4 + 5,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+ BYTES_NOP1,
+ BYTES_NOP2,
+ BYTES_NOP3,
+ BYTES_NOP4,
+ BYTES_NOP5,
+ BYTES_NOP6,
+ BYTES_NOP7,
+ BYTES_NOP8,
};
-#endif
-#ifdef K8_NOP1
-static const unsigned char k8nops[] =
-{
- K8_NOP1,
- K8_NOP2,
- K8_NOP3,
- K8_NOP4,
- K8_NOP5,
- K8_NOP6,
- K8_NOP7,
- K8_NOP8,
- K8_NOP5_ATOMIC
-};
-static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
-{
- NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-};
-#endif
-
-#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
-static const unsigned char k7nops[] =
-{
- K7_NOP1,
- K7_NOP2,
- K7_NOP3,
- K7_NOP4,
- K7_NOP5,
- K7_NOP6,
- K7_NOP7,
- K7_NOP8,
- K7_NOP5_ATOMIC
-};
-static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
-{
- NULL,
- k7nops,
- k7nops + 1,
- k7nops + 1 + 2,
- k7nops + 1 + 2 + 3,
- k7nops + 1 + 2 + 3 + 4,
- k7nops + 1 + 2 + 3 + 4 + 5,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-};
-#endif
-
-#ifdef P6_NOP1
-static const unsigned char p6nops[] =
-{
- P6_NOP1,
- P6_NOP2,
- P6_NOP3,
- P6_NOP4,
- P6_NOP5,
- P6_NOP6,
- P6_NOP7,
- P6_NOP8,
- P6_NOP5_ATOMIC
-};
-static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
+const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
{
NULL,
- p6nops,
- p6nops + 1,
- p6nops + 1 + 2,
- p6nops + 1 + 2 + 3,
- p6nops + 1 + 2 + 3 + 4,
- p6nops + 1 + 2 + 3 + 4 + 5,
- p6nops + 1 + 2 + 3 + 4 + 5 + 6,
- p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+ x86nops,
+ x86nops + 1,
+ x86nops + 1 + 2,
+ x86nops + 1 + 2 + 3,
+ x86nops + 1 + 2 + 3 + 4,
+ x86nops + 1 + 2 + 3 + 4 + 5,
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6,
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
-#endif
-
-/* Initialize these to a safe default */
-#ifdef CONFIG_X86_64
-const unsigned char * const *ideal_nops = p6_nops;
-#else
-const unsigned char * const *ideal_nops = intel_nops;
-#endif
-
-void __init arch_init_ideal_nops(void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- /*
- * Due to a decoder implementation quirk, some
- * specific Intel CPUs actually perform better with
- * the "k8_nops" than with the SDM-recommended NOPs.
- */
- if (boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model >= 0x0f &&
- boot_cpu_data.x86_model != 0x1c &&
- boot_cpu_data.x86_model != 0x26 &&
- boot_cpu_data.x86_model != 0x27 &&
- boot_cpu_data.x86_model < 0x30) {
- ideal_nops = k8_nops;
- } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
- ideal_nops = p6_nops;
- } else {
-#ifdef CONFIG_X86_64
- ideal_nops = k8_nops;
-#else
- ideal_nops = intel_nops;
-#endif
- }
- break;
-
- case X86_VENDOR_HYGON:
- ideal_nops = p6_nops;
- return;
-
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 > 0xf) {
- ideal_nops = p6_nops;
- return;
- }
-
- fallthrough;
-
- default:
-#ifdef CONFIG_X86_64
- ideal_nops = k8_nops;
-#else
- if (boot_cpu_has(X86_FEATURE_K8))
- ideal_nops = k8_nops;
- else if (boot_cpu_has(X86_FEATURE_K7))
- ideal_nops = k7_nops;
- else
- ideal_nops = intel_nops;
-#endif
- }
-}
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
static void __init_or_module add_nops(void *insns, unsigned int len)
@@ -262,7 +106,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
unsigned int noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
- memcpy(insns, ideal_nops[noplen], noplen);
+ memcpy(insns, x86_nops[noplen], noplen);
insns += noplen;
len -= noplen;
}
@@ -1302,13 +1146,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
default: /* assume NOP */
switch (len) {
case 2: /* NOP2 -- emulate as JMP8+0 */
- BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ BUG_ON(memcmp(emulate, x86_nops[len], len));
tp->opcode = JMP8_INSN_OPCODE;
tp->rel32 = 0;
break;
case 5: /* NOP5 -- emulate as JMP32+0 */
- BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ BUG_ON(memcmp(emulate, x86_nops[len], len));
tp->opcode = JMP32_INSN_OPCODE;
tp->rel32 = 0;
break;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 347a956f71ca..2d11384dc9ab 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -628,11 +628,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_init_amd_mc(c);
-#ifdef CONFIG_X86_32
- if (c->x86 == 6)
- set_cpu_cap(c, X86_FEATURE_K7);
-#endif
-
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 7edbd5ee5ed4..1b3ce3b4a2a2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -66,7 +66,7 @@ int ftrace_arch_code_modify_post_process(void)
static const char *ftrace_nop_replace(void)
{
- return ideal_nops[NOP_ATOMIC5];
+ return x86_nops[5];
}
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
@@ -377,7 +377,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + (jmp_offset - start_offset);
if (WARN_ON(*(char *)ip != 0x75))
goto fail;
- ret = copy_from_kernel_nofault(ip, ideal_nops[2], 2);
+ ret = copy_from_kernel_nofault(ip, x86_nops[2], 2);
if (ret < 0)
goto fail;
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 5ba8477c2cb7..6a2eb62c85e6 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -28,10 +28,8 @@ static void bug_at(const void *ip, int line)
}
static const void *
-__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)
+__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
{
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
const void *expect, *code;
const void *addr, *dest;
int line;
@@ -41,10 +39,8 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type,
code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
- if (init) {
- expect = default_nop; line = __LINE__;
- } else if (type == JUMP_LABEL_JMP) {
- expect = ideal_nop; line = __LINE__;
+ if (type == JUMP_LABEL_JMP) {
+ expect = x86_nops[5]; line = __LINE__;
} else {
expect = code; line = __LINE__;
}
@@ -53,7 +49,7 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type,
bug_at(addr, line);
if (type == JUMP_LABEL_NOP)
- code = ideal_nop;
+ code = x86_nops[5];
return code;
}
@@ -62,7 +58,7 @@ static inline void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
- const void *opcode = __jump_label_set_jump_code(entry, type, init);
+ const void *opcode = __jump_label_set_jump_code(entry, type);
/*
* As long as only a single processor is running and the code is still
@@ -113,7 +109,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
}
mutex_lock(&text_mutex);
- opcode = __jump_label_set_jump_code(entry, type, 0);
+ opcode = __jump_label_set_jump_code(entry, type);
text_poke_queue((void *)jump_entry_code(entry),
opcode, JUMP_LABEL_NOP_SIZE, NULL);
mutex_unlock(&text_mutex);
@@ -136,22 +132,6 @@ static enum {
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- /*
- * This function is called at boot up and when modules are
- * first loaded. Check if the default nop, the one that is
- * inserted at compile time, is the ideal nop. If it is, then
- * we do not need to update the nop, and we can leave it as is.
- * If it is not, then we need to update the nop to the ideal nop.
- */
- if (jlstate == JL_STATE_START) {
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
-
- if (memcmp(ideal_nop, default_nop, 5) != 0)
- jlstate = JL_STATE_UPDATE;
- else
- jlstate = JL_STATE_NO_UPDATE;
- }
if (jlstate == JL_STATE_UPDATE)
jump_label_transform(entry, type, 1);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index df776cdca327..6356834928b9 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -229,7 +229,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
return 0UL;
if (faddr)
- memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
+ memcpy(buf, x86_nops[5], 5);
else
buf[0] = kp->opcode;
return (unsigned long)buf;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d883176ef2ce..3b4b9b281545 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -822,7 +822,6 @@ void __init setup_arch(char **cmdline_p)
idt_setup_early_traps();
early_cpu_init();
- arch_init_ideal_nops();
jump_label_init();
static_call_init();
early_ioremap_init();
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 9442c4136c38..ea028e736831 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -34,7 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
break;
case NOP:
- code = ideal_nops[NOP_ATOMIC5];
+ code = x86_nops[5];
break;
case JMP:
@@ -66,7 +66,7 @@ static void __static_call_validate(void *insn, bool tail)
return;
} else {
if (opcode == CALL_INSN_OPCODE ||
- !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) ||
+ !memcmp(insn, x86_nops[5], 5) ||
!memcmp(insn, xor5rax, 5))
return;
}