diff options
author | Peter Zijlstra <peterz@infradead.org> | 2021-03-12 12:32:54 +0100 |
---|---|---|
committer | Borislav Petkov <bp@suse.de> | 2021-03-15 16:24:59 +0100 |
commit | a89dfde3dc3c2dbf56910af75e2d8b11ec5308f6 (patch) | |
tree | 1f5d63a4ea45af7d513ce044362993174ad52694 /arch/x86/kernel | |
parent | 59eca2fa1934de42d8aa44d3bef655c92ea69703 (diff) |
x86: Remove dynamic NOP selection
This ensures that a NOP is a NOP and not a random other instruction that
is also a NOP. It allows simplification of dynamic code patching that
wants to verify existing code before writing new instructions (ftrace,
jump_label, static_call, etc..).
Differentiating on NOPs is not a feature.
This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS).
32bit is not a performance target.
Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is
fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL
being required for x86_64, all x86_64 CPUs can use NOPL. So stop
caring about NOPs, simplify things and get on with life.
[ The problem seems to be that some uarchs can only decode NOPL on a
single front-end port while others have severe decode penalties for
excessive prefixes. All modern uarchs can handle both, except Atom,
which has prefix penalties. ]
[ Also, much doubt you can actually measure any of this on normal
workloads. ]
After this, FEATURE_NOPL is unused except for required-features for
x86_64. FEATURE_K8 is only used for PTI.
[ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge
which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> # bpf
Acked-by: Linus Torvalds <torvalds@linuxfoundation.org>
Link: https://lkml.kernel.org/r/20210312115749.065275711@infradead.org
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/alternative.c | 198 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/jump_label.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/static_call.c | 4 |
7 files changed, 32 insertions, 214 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 8d778e46725d..fcac875c3d79 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -74,186 +74,30 @@ do { \ } \ } while (0) -/* - * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes - * that correspond to that nop. Getting from one nop to the next, we - * add to the array the offset that is equal to the sum of all sizes of - * nops preceding the one we are after. - * - * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the - * nice symmetry of sizes of the previous nops. - */ -#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) -static const unsigned char intelnops[] = +const unsigned char x86nops[] = { - GENERIC_NOP1, - GENERIC_NOP2, - GENERIC_NOP3, - GENERIC_NOP4, - GENERIC_NOP5, - GENERIC_NOP6, - GENERIC_NOP7, - GENERIC_NOP8, - GENERIC_NOP5_ATOMIC -}; -static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = -{ - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, + BYTES_NOP1, + BYTES_NOP2, + BYTES_NOP3, + BYTES_NOP4, + BYTES_NOP5, + BYTES_NOP6, + BYTES_NOP7, + BYTES_NOP8, }; -#endif -#ifdef K8_NOP1 -static const unsigned char k8nops[] = -{ - K8_NOP1, - K8_NOP2, - K8_NOP3, - K8_NOP4, - K8_NOP5, - K8_NOP6, - K8_NOP7, - K8_NOP8, - K8_NOP5_ATOMIC -}; -static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = -{ - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#if defined(K7_NOP1) && !defined(CONFIG_X86_64) -static const unsigned char k7nops[] = -{ - K7_NOP1, - K7_NOP2, - K7_NOP3, - K7_NOP4, - K7_NOP5, - K7_NOP6, - K7_NOP7, - K7_NOP8, - K7_NOP5_ATOMIC -}; -static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = -{ - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#ifdef P6_NOP1 -static const unsigned char p6nops[] = -{ - P6_NOP1, - P6_NOP2, - P6_NOP3, - P6_NOP4, - P6_NOP5, - P6_NOP6, - P6_NOP7, - P6_NOP8, - P6_NOP5_ATOMIC -}; -static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = +const unsigned char * const x86_nops[ASM_NOP_MAX+1] = { NULL, - p6nops, - p6nops + 1, - p6nops + 1 + 2, - p6nops + 1 + 2 + 3, - p6nops + 1 + 2 + 3 + 4, - p6nops + 1 + 2 + 3 + 4 + 5, - p6nops + 1 + 2 + 3 + 4 + 5 + 6, - p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, + x86nops, + x86nops + 1, + x86nops + 1 + 2, + x86nops + 1 + 2 + 3, + x86nops + 1 + 2 + 3 + 4, + x86nops + 1 + 2 + 3 + 4 + 5, + x86nops + 1 + 2 + 3 + 4 + 5 + 6, + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; -#endif - -/* Initialize these to a safe default */ -#ifdef CONFIG_X86_64 -const unsigned char * const *ideal_nops = p6_nops; -#else -const unsigned char * const *ideal_nops = intel_nops; -#endif - -void __init arch_init_ideal_nops(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - /* - * Due to a decoder implementation quirk, some - * specific Intel CPUs actually perform better with - * the "k8_nops" than with the SDM-recommended NOPs. - */ - if (boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model >= 0x0f && - boot_cpu_data.x86_model != 0x1c && - boot_cpu_data.x86_model != 0x26 && - boot_cpu_data.x86_model != 0x27 && - boot_cpu_data.x86_model < 0x30) { - ideal_nops = k8_nops; - } else if (boot_cpu_has(X86_FEATURE_NOPL)) { - ideal_nops = p6_nops; - } else { -#ifdef CONFIG_X86_64 - ideal_nops = k8_nops; -#else - ideal_nops = intel_nops; -#endif - } - break; - - case X86_VENDOR_HYGON: - ideal_nops = p6_nops; - return; - - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 > 0xf) { - ideal_nops = p6_nops; - return; - } - - fallthrough; - - default: -#ifdef CONFIG_X86_64 - ideal_nops = k8_nops; -#else - if (boot_cpu_has(X86_FEATURE_K8)) - ideal_nops = k8_nops; - else if (boot_cpu_has(X86_FEATURE_K7)) - ideal_nops = k7_nops; - else - ideal_nops = intel_nops; -#endif - } -} /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void __init_or_module add_nops(void *insns, unsigned int len) @@ -262,7 +106,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - memcpy(insns, ideal_nops[noplen], noplen); + memcpy(insns, x86_nops[noplen], noplen); insns += noplen; len -= noplen; } @@ -1302,13 +1146,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, default: /* assume NOP */ switch (len) { case 2: /* NOP2 -- emulate as JMP8+0 */ - BUG_ON(memcmp(emulate, ideal_nops[len], len)); + BUG_ON(memcmp(emulate, x86_nops[len], len)); tp->opcode = JMP8_INSN_OPCODE; tp->rel32 = 0; break; case 5: /* NOP5 -- emulate as JMP32+0 */ - BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); + BUG_ON(memcmp(emulate, x86_nops[len], len)); tp->opcode = JMP32_INSN_OPCODE; tp->rel32 = 0; break; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 347a956f71ca..2d11384dc9ab 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -628,11 +628,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_init_amd_mc(c); -#ifdef CONFIG_X86_32 - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_K7); -#endif - if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7edbd5ee5ed4..1b3ce3b4a2a2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -66,7 +66,7 @@ int ftrace_arch_code_modify_post_process(void) static const char *ftrace_nop_replace(void) { - return ideal_nops[NOP_ATOMIC5]; + return x86_nops[5]; } static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) @@ -377,7 +377,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + (jmp_offset - start_offset); if (WARN_ON(*(char *)ip != 0x75)) goto fail; - ret = copy_from_kernel_nofault(ip, ideal_nops[2], 2); + ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); if (ret < 0) goto fail; } diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 5ba8477c2cb7..6a2eb62c85e6 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -28,10 +28,8 @@ static void bug_at(const void *ip, int line) } static const void * -__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init) +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) { - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; - const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; const void *expect, *code; const void *addr, *dest; int line; @@ -41,10 +39,8 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); - if (init) { - expect = default_nop; line = __LINE__; - } else if (type == JUMP_LABEL_JMP) { - expect = ideal_nop; line = __LINE__; + if (type == JUMP_LABEL_JMP) { + expect = x86_nops[5]; line = __LINE__; } else { expect = code; line = __LINE__; } @@ -53,7 +49,7 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, bug_at(addr, line); if (type == JUMP_LABEL_NOP) - code = ideal_nop; + code = x86_nops[5]; return code; } @@ -62,7 +58,7 @@ static inline void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { - const void *opcode = __jump_label_set_jump_code(entry, type, init); + const void *opcode = __jump_label_set_jump_code(entry, type); /* * As long as only a single processor is running and the code is still @@ -113,7 +109,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, } mutex_lock(&text_mutex); - opcode = __jump_label_set_jump_code(entry, type, 0); + opcode = __jump_label_set_jump_code(entry, type); text_poke_queue((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); mutex_unlock(&text_mutex); @@ -136,22 +132,6 @@ static enum { __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - /* - * This function is called at boot up and when modules are - * first loaded. Check if the default nop, the one that is - * inserted at compile time, is the ideal nop. If it is, then - * we do not need to update the nop, and we can leave it as is. - * If it is not, then we need to update the nop to the ideal nop. - */ - if (jlstate == JL_STATE_START) { - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; - const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - - if (memcmp(ideal_nop, default_nop, 5) != 0) - jlstate = JL_STATE_UPDATE; - else - jlstate = JL_STATE_NO_UPDATE; - } if (jlstate == JL_STATE_UPDATE) jump_label_transform(entry, type, 1); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index df776cdca327..6356834928b9 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -229,7 +229,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) return 0UL; if (faddr) - memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); + memcpy(buf, x86_nops[5], 5); else buf[0] = kp->opcode; return (unsigned long)buf; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d883176ef2ce..3b4b9b281545 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -822,7 +822,6 @@ void __init setup_arch(char **cmdline_p) idt_setup_early_traps(); early_cpu_init(); - arch_init_ideal_nops(); jump_label_init(); static_call_init(); early_ioremap_init(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 9442c4136c38..ea028e736831 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -34,7 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case NOP: - code = ideal_nops[NOP_ATOMIC5]; + code = x86_nops[5]; break; case JMP: @@ -66,7 +66,7 @@ static void __static_call_validate(void *insn, bool tail) return; } else { if (opcode == CALL_INSN_OPCODE || - !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) || + !memcmp(insn, x86_nops[5], 5) || !memcmp(insn, xor5rax, 5)) return; } |