diff options
author | Nicolas Pitre <nicolas.pitre@linaro.org> | 2015-12-12 02:49:21 +0100 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-12-17 10:29:01 +0000 |
commit | 42f25bddd0a226d2431e057b9e01c5cc61067e12 (patch) | |
tree | 33e3bf9c4669caba06f72303e5d333e2acd15e78 /arch/arm | |
parent | 38fc2f6c98262913388de338d5b0cda67e3f78cd (diff) |
ARM: 8477/1: runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()
The ARM compiler inserts calls to __aeabi_idiv() and
__aeabi_uidiv() when it needs to perform division on signed and
unsigned integers. If a processor has support for the sdiv and
udiv instructions, the kernel may overwrite the beginning of those
functions with those instructions and a "bx lr" to get better
performance.
To ensure that those functions are aligned to a 32-bit word for easier
patching (which might not always be the case in Thumb mode) and that
the two patched instructions end up in the same cache line, a 8-byte
alignment is enforced when ARM_PATCH_IDIV is selected.
This was heavily inspired by a previous patch from Stephen Boyd.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/Kconfig | 18 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 67 | ||||
-rw-r--r-- | arch/arm/lib/lib1funcs.S | 8 |
3 files changed, 93 insertions, 0 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1eeb2e7d75..6e644fd68ad2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1603,6 +1603,24 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 config ARM_ASM_UNIFIED bool +config ARM_PATCH_IDIV + bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" + depends on CPU_32v7 && !XIP_KERNEL + default y + help + The ARM compiler inserts calls to __aeabi_idiv() and + __aeabi_uidiv() when it needs to perform division on signed + and unsigned integers. Some v7 CPUs have support for the sdiv + and udiv instructions that can be used to implement those + functions. + + Enabling this option allows the kernel to modify itself to + replace the first two instructions of these library functions + with the sdiv or udiv plus "bx lr" instructions when the CPU + it is running on supports them. Typically this will be faster + and less power intensive than running the original library + code to do integer division. + config AEABI bool "Use the ARM EABI to compile the kernel" help diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 20edd349d379..e07f567487cd 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -375,6 +375,72 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +#ifdef CONFIG_ARM_PATCH_IDIV + +static inline u32 __attribute_const__ sdiv_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "sdiv r0, r0, r1" */ + u32 insn = __opcode_thumb32_compose(0xfb90, 0xf0f1); + return __opcode_to_mem_thumb32(insn); + } + + /* "sdiv r0, r0, r1" */ + return __opcode_to_mem_arm(0xe710f110); +} + +static inline u32 __attribute_const__ udiv_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "udiv r0, r0, r1" */ + u32 insn = __opcode_thumb32_compose(0xfbb0, 0xf0f1); + return __opcode_to_mem_thumb32(insn); + } + + /* "udiv r0, r0, r1" */ + return __opcode_to_mem_arm(0xe730f110); +} + +static inline u32 __attribute_const__ bx_lr_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "bx lr; nop" */ + u32 insn = __opcode_thumb32_compose(0x4770, 0x46c0); + return __opcode_to_mem_thumb32(insn); + } + + /* "bx lr" */ + return __opcode_to_mem_arm(0xe12fff1e); +} + +static void __init patch_aeabi_idiv(void) +{ + extern void __aeabi_uidiv(void); + extern void __aeabi_idiv(void); + uintptr_t fn_addr; + unsigned int mask; + + mask = IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA; + if (!(elf_hwcap & mask)) + return; + + pr_info("CPU: div instructions available: patching division code\n"); + + fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1; + ((u32 *)fn_addr)[0] = udiv_instruction(); + ((u32 *)fn_addr)[1] = bx_lr_instruction(); + flush_icache_range(fn_addr, fn_addr + 8); + + fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1; + ((u32 *)fn_addr)[0] = sdiv_instruction(); + ((u32 *)fn_addr)[1] = bx_lr_instruction(); + flush_icache_range(fn_addr, fn_addr + 8); +} + +#else +static inline void patch_aeabi_idiv(void) { } +#endif + static void __init cpuid_init_hwcaps(void) { int block; @@ -642,6 +708,7 @@ static void __init setup_processor(void) elf_hwcap = list->elf_hwcap; cpuid_init_hwcaps(); + patch_aeabi_idiv(); #ifndef CONFIG_ARM_THUMB elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index af2267f6a529..9397b2e532af 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -205,6 +205,10 @@ Boston, MA 02111-1307, USA. */ .endm +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) UNWIND(.fnstart) @@ -253,6 +257,10 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__umodsi3) +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + ENTRY(__divsi3) ENTRY(__aeabi_idiv) UNWIND(.fnstart) |