diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-04 19:18:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-04 19:18:29 -0700 |
commit | 886d7de631da71e30909980fdbf318f7caade262 (patch) | |
tree | d79c741b609098ec8c8a3643ba52f26e6a2cd4d1 /tools | |
parent | 5bfea2d9b17f1034a68147a8b03b9789af5700f9 (diff) | |
parent | 469cbd016157d28c27fda8da6ddc76b856f4e1b9 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge yet more updates from Andrew Morton:
- More MM work. 100ish more to go. Mike Rapoport's "mm: remove
__ARCH_HAS_5LEVEL_HACK" series should fix the current ppc issue
- Various other little subsystems
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (127 commits)
lib/ubsan.c: fix gcc-10 warnings
tools/testing/selftests/vm: remove duplicate headers
selftests: vm: pkeys: fix multilib builds for x86
selftests: vm: pkeys: use the correct page size on powerpc
selftests/vm/pkeys: override access right definitions on powerpc
selftests/vm/pkeys: test correct behaviour of pkey-0
selftests/vm/pkeys: introduce a sub-page allocator
selftests/vm/pkeys: detect write violation on a mapped access-denied-key page
selftests/vm/pkeys: associate key on a mapped page and detect write violation
selftests/vm/pkeys: associate key on a mapped page and detect access violation
selftests/vm/pkeys: improve checks to determine pkey support
selftests/vm/pkeys: fix assertion in test_pkey_alloc_exhaust()
selftests/vm/pkeys: fix number of reserved powerpc pkeys
selftests/vm/pkeys: introduce powerpc support
selftests/vm/pkeys: introduce generic pkey abstractions
selftests: vm: pkeys: use the correct huge page size
selftests/vm/pkeys: fix alloc_random_pkey() to make it really random
selftests/vm/pkeys: fix assertion in pkey_disable_set/clear()
selftests/vm/pkeys: fix pkey_disable_clear()
selftests: vm: pkeys: add helpers for pkey bits
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/lib/config | 1 | ||||
-rw-r--r-- | tools/testing/selftests/vm/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/vm/Makefile | 73 | ||||
-rw-r--r-- | tools/testing/selftests/vm/mremap_dontunmap.c | 1 | ||||
-rw-r--r-- | tools/testing/selftests/vm/pkey-helpers.h | 225 | ||||
-rw-r--r-- | tools/testing/selftests/vm/pkey-powerpc.h | 133 | ||||
-rw-r--r-- | tools/testing/selftests/vm/pkey-x86.h | 181 | ||||
-rw-r--r-- | tools/testing/selftests/vm/protection_keys.c (renamed from tools/testing/selftests/x86/protection_keys.c) | 696 | ||||
-rw-r--r-- | tools/testing/selftests/x86/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/x86/pkey-helpers.h | 219 |
11 files changed, 1000 insertions, 533 deletions
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index 14a77ea4a8da..b80ee3f6e265 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -2,3 +2,4 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m CONFIG_PRIME_NUMBERS=m CONFIG_TEST_STRSCPY=m +CONFIG_TEST_BITOPS=m diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 8df6a074e370..849e8226395a 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -10,6 +10,7 @@ mlock2-tests mremap_dontunmap on-fault-limit transhuge-stress +protection_keys userfaultfd mlock-intersect-test mlock-random-test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9f18440080ef..a9026706d597 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -22,6 +22,30 @@ TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += khugepaged +ifeq ($(ARCH),x86_64) +CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) + +TARGETS := protection_keys +BINARIES_32 := $(TARGETS:%=%_32) +BINARIES_64 := $(TARGETS:%=%_64) + +ifeq ($(CAN_BUILD_WITH_NOPIE),1) +CFLAGS += -no-pie +endif + +ifeq ($(CAN_BUILD_I386),1) +TEST_GEN_FILES += $(BINARIES_32) +endif + +ifeq ($(CAN_BUILD_X86_64),1) +TEST_GEN_FILES += $(BINARIES_64) +endif +else +TEST_GEN_FILES += protection_keys +endif + ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range @@ -37,6 +61,55 @@ include ../lib.mk $(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread +ifeq ($(ARCH),x86_64) +BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) +BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) + +define gen-target-rule-32 +$(1) $(1)_32: $(OUTPUT)/$(1)_32 +.PHONY: $(1) $(1)_32 +endef + +define gen-target-rule-64 +$(1) $(1)_64: $(OUTPUT)/$(1)_64 +.PHONY: $(1) $(1)_64 +endef + +ifeq ($(CAN_BUILD_I386),1) +$(BINARIES_32): CFLAGS += -m32 +$(BINARIES_32): LDLIBS += -lrt -ldl -lm +$(BINARIES_32): %_32: %.c + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ +$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) +endif + +ifeq ($(CAN_BUILD_X86_64),1) +$(BINARIES_64): CFLAGS += -m64 +$(BINARIES_64): LDLIBS += -lrt -ldl +$(BINARIES_64): %_64: %.c + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ +$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) +endif + +# x86_64 users should be encouraged to install 32-bit libraries +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) +all: warn_32bit_failure + +warn_32bit_failure: + @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \ + echo "environment. This will reduce test coverage of 64-bit" 2>&1; \ + echo "kernels. If you are using a Debian-like distribution," 2>&1; \ + echo "try:"; 2>&1; \ + echo ""; \ + echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \ + echo ""; \ + echo "If you are using a Fedora-like distribution, try:"; \ + echo ""; \ + echo " yum install glibc-devel.*i686"; \ + exit 0; +endif +endif + $(OUTPUT)/userfaultfd: LDLIBS += -lpthread $(OUTPUT)/mlock-random-test: LDLIBS += -lcap diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c index ee06cb0b9efb..3a7b5ef0b0c6 100644 --- a/tools/testing/selftests/vm/mremap_dontunmap.c +++ b/tools/testing/selftests/vm/mremap_dontunmap.c @@ -11,7 +11,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <stdlib.h> #include <unistd.h> #include "../kselftest.h" diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h new file mode 100644 index 000000000000..622a85848f61 --- /dev/null +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PKEYS_HELPER_H +#define _PKEYS_HELPER_H +#define _GNU_SOURCE +#include <string.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> + +/* Define some kernel-like types */ +#define u8 __u8 +#define u16 __u16 +#define u32 __u32 +#define u64 __u64 + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 +extern int dprint_in_signal; +extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern int test_nr; +extern int iteration_nr; + +#ifdef __GNUC__ +__attribute__((format(printf, 1, 2))) +#endif +static inline void sigsafe_printf(const char *format, ...) +{ + va_list ap; + + if (!dprint_in_signal) { + va_start(ap, format); + vprintf(format, ap); + va_end(ap); + } else { + int ret; + /* + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. + */ + ret = write(1, format, strlen(format)); + if (ret < 0) + exit(1); + } +} +#define dprintf_level(level, args...) do { \ + if (level <= DEBUG_LEVEL) \ + sigsafe_printf(args); \ +} while (0) +#define dprintf0(args...) dprintf_level(0, args) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + exit(__LINE__); \ + } \ +} while (0) + +__attribute__((noinline)) int read_ptr(int *ptr); +void expected_pkey_fault(int pkey); +int sys_pkey_alloc(unsigned long flags, unsigned long init_val); +int sys_pkey_free(unsigned long pkey); +int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey); +void record_pkey_malloc(void *ptr, long size, int prot); + +#if defined(__i386__) || defined(__x86_64__) /* arch */ +#include "pkey-x86.h" +#elif defined(__powerpc64__) /* arch */ +#include "pkey-powerpc.h" +#else /* arch */ +#error Architecture not supported +#endif /* arch */ + +#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) + +static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + u32 shift = pkey_bit_position(pkey); + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + /* OR in new bits for pkey */ + reg |= (flags & PKEY_MASK) << shift; + return reg; +} + +static inline u64 get_pkey_bits(u64 reg, int pkey) +{ + u32 shift = pkey_bit_position(pkey); + /* + * shift down the relevant bits to the lowest two, then + * mask off all the other higher bits + */ + return ((reg >> shift) & PKEY_MASK); +} + +extern u64 shadow_pkey_reg; + +static inline u64 _read_pkey_reg(int line) +{ + u64 pkey_reg = __read_pkey_reg(); + + dprintf4("read_pkey_reg(line=%d) pkey_reg: %016llx" + " shadow: %016llx\n", + line, pkey_reg, shadow_pkey_reg); + assert(pkey_reg == shadow_pkey_reg); + + return pkey_reg; +} + +#define read_pkey_reg() _read_pkey_reg(__LINE__) + +static inline void write_pkey_reg(u64 pkey_reg) +{ + dprintf4("%s() changing %016llx to %016llx\n", __func__, + __read_pkey_reg(), pkey_reg); + /* will do the shadow check for us: */ + read_pkey_reg(); + __write_pkey_reg(pkey_reg); + shadow_pkey_reg = pkey_reg; + dprintf4("%s(%016llx) pkey_reg: %016llx\n", __func__, + pkey_reg, __read_pkey_reg()); +} + +/* + * These are technically racy. since something could + * change PKEY register between the read and the write. + */ +static inline void __pkey_access_allow(int pkey, int do_allow) +{ + u64 pkey_reg = read_pkey_reg(); + int bit = pkey * 2; + + if (do_allow) + pkey_reg &= (1<<bit); + else + pkey_reg |= (1<<bit); + + dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); + write_pkey_reg(pkey_reg); +} + +static inline void __pkey_write_allow(int pkey, int do_allow_write) +{ + u64 pkey_reg = read_pkey_reg(); + int bit = pkey * 2 + 1; + + if (do_allow_write) + pkey_reg &= (1<<bit); + else + pkey_reg |= (1<<bit); + + write_pkey_reg(pkey_reg); + dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); +} + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) \ + ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) \ + ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) +{ +#ifdef si_pkey + return &si->si_pkey; +#else + return (u32 *)(((u8 *)si) + si_pkey_offset); +#endif +} + +static inline int kernel_has_pkeys(void) +{ + /* try allocating a key and see if it succeeds */ + int ret = sys_pkey_alloc(0, 0); + if (ret <= 0) { + return 0; + } + sys_pkey_free(ret); + return 1; +} + +static inline int is_pkeys_supported(void) +{ + /* check if the cpu supports pkeys */ + if (!cpu_has_pkeys()) { + dprintf1("SKIP: %s: no CPU support\n", __func__); + return 0; + } + + /* check if the kernel supports pkeys */ + if (!kernel_has_pkeys()) { + dprintf1("SKIP: %s: no kernel support\n", __func__); + return 0; + } + + return 1; +} + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h new file mode 100644 index 000000000000..1ebb586b2fbc --- /dev/null +++ b/tools/testing/selftests/vm/pkey-powerpc.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _PKEYS_POWERPC_H +#define _PKEYS_POWERPC_H + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 386 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 384 +# define SYS_pkey_free 385 +#endif +#define REG_IP_IDX PT_NIP +#define REG_TRAPNO PT_TRAP +#define gregs gp_regs +#define fpregs fp_regs +#define si_pkey_offset 0x20 + +#undef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */ + +#undef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 + +#define NR_PKEYS 32 +#define NR_RESERVED_PKEYS_4K 27 /* pkey-0, pkey-1, exec-only-pkey + and 24 other keys that cannot be + represented in the PTE */ +#define NR_RESERVED_PKEYS_64K_3KEYS 3 /* PowerNV and KVM: pkey-0, + pkey-1 and exec-only key */ +#define NR_RESERVED_PKEYS_64K_4KEYS 4 /* PowerVM: pkey-0, pkey-1, + pkey-31 and exec-only key */ +#define PKEY_BITS_PER_PKEY 2 +#define HPAGE_SIZE (1UL << 24) +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +static inline u32 pkey_bit_position(int pkey) +{ + return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; +} + +static inline u64 __read_pkey_reg(void) +{ + u64 pkey_reg; + + asm volatile("mfspr %0, 0xd" : "=r" (pkey_reg)); + + return pkey_reg; +} + +static inline void __write_pkey_reg(u64 pkey_reg) +{ + u64 amr = pkey_reg; + + dprintf4("%s() changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); + + asm volatile("isync; mtspr 0xd, %0; isync" + : : "r" ((unsigned long)(amr)) : "memory"); + + dprintf4("%s() pkey register after changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); +} + +static inline int cpu_has_pkeys(void) +{ + /* No simple way to determine this */ + return 1; +} + +static inline bool arch_is_powervm() +{ + struct stat buf; + + if ((stat("/sys/firmware/devicetree/base/ibm,partition-name", &buf) == 0) && + (stat("/sys/firmware/devicetree/base/hmc-managed?", &buf) == 0) && + (stat("/sys/firmware/devicetree/base/chosen/qemu,graphic-width", &buf) == -1) ) + return true; + + return false; +} + +static inline int get_arch_reserved_keys(void) +{ + if (sysconf(_SC_PAGESIZE) == 4096) + return NR_RESERVED_PKEYS_4K; + else + if (arch_is_powervm()) + return NR_RESERVED_PKEYS_64K_4KEYS; + else + return NR_RESERVED_PKEYS_64K_3KEYS; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ + /* + * powerpc does not allow userspace to change permissions of exec-only + * keys since those keys are not allocated by userspace. The signal + * handler wont be able to reset the permissions, which means the code + * will infinitely continue to segfault here. + */ + return; +} + +/* 4-byte instructions * 16384 = 64K page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + void *ptr; + int ret; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + + ret = syscall(__NR_subpage_prot, ptr, size, NULL); + if (ret) { + perror("subpage_perm"); + return PTR_ERR_ENOTSUP; + } + + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size, prot); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +} + +#endif /* _PKEYS_POWERPC_H */ diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h new file mode 100644 index 000000000000..3be20f5d5275 --- /dev/null +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _PKEYS_X86_H +#define _PKEYS_X86_H + +#ifdef __i386__ + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif + +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 + +#else + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif + +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 + +#endif + +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif + +#define NR_PKEYS 16 +#define NR_RESERVED_PKEYS 2 /* pkey-0 and exec-only-pkey */ +#define PKEY_BITS_PER_PKEY 2 +#define HPAGE_SIZE (1UL<<21) +#define PAGE_SIZE 4096 +#define MB (1<<20) + +static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + +static inline u64 __read_pkey_reg(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned pkey_reg; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkey_reg = eax; + return pkey_reg; +} + +static inline void __write_pkey_reg(u64 pkey_reg) +{ + unsigned int eax = pkey_reg; + unsigned int ecx = 0; + unsigned int edx = 0; + + dprintf4("%s() changing %016llx to %016llx\n", __func__, + __read_pkey_reg(), pkey_reg); + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); + assert(pkey_reg == __read_pkey_reg()); +} + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ +#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ + +static inline int cpu_has_pkeys(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + eax = 0x7; + ecx = 0x0; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (!(ecx & X86_FEATURE_PKU)) { + dprintf2("cpu does not have PKU\n"); + return 0; + } + if (!(ecx & X86_FEATURE_OSPKE)) { + dprintf2("cpu does not have OSPKE\n"); + return 0; + } + return 1; +} + +static inline u32 pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +#define XSTATE_PKEY_BIT (9) +#define XSTATE_PKEY 0x200 + +int pkey_reg_xstate_offset(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int xstate_offset; + int xstate_size; + unsigned long XSTATE_CPUID = 0xd; + int leaf; + + /* assume that XSTATE_PKEY is set in XCR0 */ + leaf = XSTATE_PKEY_BIT; + { + eax = XSTATE_CPUID; + ecx = leaf; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (leaf == XSTATE_PKEY_BIT) { + xstate_offset = ebx; + xstate_size = eax; + } + } + + if (xstate_size == 0) { + printf("could not find size/offset of PKEY in xsave state\n"); + return 0; + } + + return xstate_offset; +} + +static inline int get_arch_reserved_keys(void) +{ + return NR_RESERVED_PKEYS; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ + int ptr_contents; + + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pkey_fault(pkey); +} + +void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + return PTR_ERR_ENOTSUP; +} + +#endif /* _PKEYS_X86_H */ diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 480995bceefa..fc19addcb5c8 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst) + * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt) * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant + * * how to set/clear bits in pkey registers (the rights register) + * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * * Things to add: @@ -22,8 +22,10 @@ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ #include <errno.h> #include <linux/futex.h> +#include <time.h> #include <sys/time.h> #include <sys/syscall.h> #include <string.h> @@ -48,34 +50,10 @@ int iteration_nr = 1; int test_nr; -unsigned int shadow_pkru; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - +u64 shadow_pkey_reg; int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - exit(__LINE__); \ - } \ -} while (0) - void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); @@ -158,12 +136,6 @@ void abort_hooks(void) #endif } -static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - /* * This attempts to have roughly a page of instructions followed by a few * instructions that do a write, and another page of instructions. That @@ -174,7 +146,12 @@ static inline void __page_o_noops(void) * will then fault, which makes sure that the fault code handles * execute-only memory properly. */ +#ifdef __powerpc64__ +/* This way, both 4K and 64K alignment are maintained */ +__attribute__((__aligned__(65536))) +#else __attribute__((__aligned__(PAGE_SIZE))) +#endif void lots_o_noops_around_write(int *write_to_me) { dprintf3("running %s()\n", __func__); @@ -186,51 +163,134 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); } -/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t +void dump_mem(void *dumpme, int len_bytes) +{ + char *c = (void *)dumpme; + int i; -#ifdef __i386__ + for (i = 0; i < len_bytes; i += sizeof(u64)) { + u64 *ptr = (u64 *)(c + i); + dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr); + } +} -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif +static u32 hw_pkey_get(int pkey, unsigned long flags) +{ + u64 pkey_reg = __read_pkey_reg(); -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif + dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", + __func__, pkey, flags, 0, 0); + dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg); -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 + return (u32) get_pkey_bits(pkey_reg, pkey); +} -#else +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u64 old_pkey_reg = __read_pkey_reg(); + u64 new_pkey_reg; -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif + /* make sure that 'rights' only contains the bits we expect: */ + assert(!(rights & ~mask)); -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif + /* modify bits accordingly in old pkey_reg and assign it */ + new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights); -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 + __write_pkey_reg(new_pkey_reg); -#endif + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" + " pkey_reg now: %016llx old_pkey_reg: %016llx\n", + __func__, pkey, rights, flags, 0, __read_pkey_reg(), + old_pkey_reg); + return 0; +} -void dump_mem(void *dumpme, int len_bytes) +void pkey_disable_set(int pkey, int flags) { - char *c = (void *)dumpme; - int i; + unsigned long syscall_flags = 0; + int ret; + int pkey_rights; + u64 orig_pkey_reg = read_pkey_reg(); - for (i = 0; i < len_bytes; i += sizeof(u64)) { - u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); - } + dprintf1("START->%s(%d, 0x%x)\n", __func__, + pkey, flags); + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + pkey_rights = hw_pkey_get(pkey, syscall_flags); + + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); + assert(!ret); + /* pkey_reg and flags have the same format */ + shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); + dprintf1("%s(%d) shadow: 0x%016llx\n", + __func__, pkey, shadow_pkey_reg); + + pkey_assert(ret >= 0); + + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkey_reg: 0x%016llx\n", + __func__, pkey, read_pkey_reg()); + if (flags) + pkey_assert(read_pkey_reg() >= orig_pkey_reg); + dprintf1("END<---%s(%d, 0x%x)\n", __func__, + pkey, flags); +} + +void pkey_disable_clear(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights = hw_pkey_get(pkey, syscall_flags); + u64 orig_pkey_reg = read_pkey_reg(); + + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights &= ~flags; + + ret = hw_pkey_set(pkey, pkey_rights, 0); + shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); + pkey_assert(ret >= 0); + + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, + pkey, read_pkey_reg()); + if (flags) + assert(read_pkey_reg() <= orig_pkey_reg); +} + +void pkey_write_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); +} +void pkey_write_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_WRITE); +} +void pkey_access_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); +} +void pkey_access_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); } /* Failed address bound checks: */ @@ -255,7 +315,7 @@ static char *si_code_str(int si_code) return "UNKNOWN"; } -int pkru_faults; +int pkey_faults; int last_si_pkey = -1; void signal_handler(int signum, siginfo_t *si, void *vucontext) { @@ -263,24 +323,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkru_ptr; +#if defined(__i386__) || defined(__x86_64__) /* arch */ + u32 *pkey_reg_ptr; + int pkey_reg_offset; +#endif /* arch */ u64 siginfo_pkey; u32 *si_pkey_ptr; - int pkru_offset; - fpregset_t fpregset; dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", + __func__, __LINE__, + __read_pkey_reg(), shadow_pkey_reg); trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; + fpregs = (char *) uctxt->uc_mcontext.fpregs; - dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); + dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", + __func__, trapno, ip, si_code_str(si->si_code), + si->si_code); + +#if defined(__i386__) || defined(__x86_64__) /* arch */ #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -288,20 +352,22 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) * state. We just assume that it is here. */ fpregs += 0x70; -#endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); +#endif /* i386 */ + pkey_reg_offset = pkey_reg_xstate_offset(); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); - dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * If we got a PKEY fault, we *HAVE* to have at least one bit set in * here. */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); + dump_mem(pkey_reg_ptr - 128, 256); + pkey_assert(*pkey_reg_ptr); +#endif /* arch */ + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs); if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -310,20 +376,29 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); } - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + si_pkey_ptr = siginfo_get_pkey_ptr(si); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); dump_mem((u8 *)si_pkey_ptr - 8, 24); siginfo_pkey = *si_pkey_ptr; pkey_assert(siginfo_pkey < NR_PKEYS); last_si_pkey = siginfo_pkey; - dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + dprintf1("signal pkey_reg from pkey_reg: %016llx\n", + __read_pkey_reg()); + dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey); +#if defined(__i386__) || defined(__x86_64__) /* arch */ + dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + *(u64 *)pkey_reg_ptr = 0x00000000; + dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n"); +#elif defined(__powerpc64__) /* arch */ + /* restore access and let the faulting instruction continue */ + pkey_access_allow(siginfo_pkey); +#endif /* arch */ + pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0; } @@ -391,143 +466,6 @@ pid_t fork_lazy_child(void) return forkret; } -#ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -#endif - -#ifndef PKEY_DISABLE_WRITE -# define PKEY_DISABLE_WRITE 0x2 -#endif - -static u32 hw_pkey_get(int pkey, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; - - dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", - __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); - - shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); - /* - * shift down the relevant bits to the lowest two, then - * mask off all the other high bits. - */ - return masked_pkru; -} - -static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; - - /* make sure that 'rights' only contains the bits we expect: */ - assert(!(rights & ~mask)); - - /* copy old pkru */ - new_pkru = old_pkru; - /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); - /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); - - __wrpkru(new_pkru); - - dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); - return 0; -} - -void pkey_disable_set(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights; - u32 orig_pkru = rdpkru(); - - dprintf1("START->%s(%d, 0x%x)\n", __func__, - pkey, flags); - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); - assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); - - pkey_assert(ret >= 0); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - pkey_assert(rdpkru() > orig_pkru); - dprintf1("END<---%s(%d, 0x%x)\n", __func__, - pkey, flags); -} - -void pkey_disable_clear(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); - - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = hw_pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); - pkey_assert(ret >= 0); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - assert(rdpkru() > orig_pkru); -} - -void pkey_write_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); -} -void pkey_write_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_WRITE); -} -void pkey_access_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); -} -void pkey_access_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); -} - int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey) { @@ -561,33 +499,44 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0; - dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", + __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: + * pkey_alloc() sets PKEY register, so we need to reflect it in + * shadow_pkey_reg: */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, + ~PKEY_MASK); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, + __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); /* * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) + * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkru |= (init_val << (ret * 2)); + shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, + init_val); } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); + dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + read_pkey_reg(); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); return ret; } @@ -612,10 +561,10 @@ int alloc_random_pkey(void) int nr_alloced = 0; int random_index; memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + srand((unsigned int)time(NULL)); /* allocate every possible key and make a note of which ones we got */ max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey(); if (new_pkey < 0) @@ -638,8 +587,9 @@ int alloc_random_pkey(void) free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", __func__, + __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; } @@ -657,11 +607,15 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, if (nr_iterations-- < 0) break; - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); } pkey_assert(pkey < NR_PKEYS); @@ -669,8 +623,9 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", __func__, + __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; } @@ -752,7 +707,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) void *ptr; int ret; - rdpkru(); + read_pkey_reg(); dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, size, prot, pkey); pkey_assert(pkey < NR_PKEYS); @@ -761,7 +716,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); record_pkey_malloc(ptr, size, prot); - rdpkru(); + read_pkey_reg(); dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); return ptr; @@ -798,12 +753,15 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) } int hugetlb_setup_ok; +#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" #define GET_NR_HUGE_PAGES 10 void setup_hugetlbfs(void) { int err; int fd; - char buf[] = "123"; + char buf[256]; + long hpagesz_kb; + long hpagesz_mb; if (geteuid() != 0) { fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); @@ -814,11 +772,16 @@ void setup_hugetlbfs(void) /* * Now go make sure that we got the pages and that they - * are 2M pages. Someone might have made 1G the default. + * are PMD-level pages. Someone might have made PUD-level + * pages the default. */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + hpagesz_kb = HPAGE_SIZE / 1024; + hpagesz_mb = hpagesz_kb / 1024; + sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb); + fd = open(buf, O_RDONLY); if (fd < 0) { - perror("opening sysfs 2M hugetlb config"); + fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n", + hpagesz_mb, strerror(errno)); return; } @@ -826,13 +789,14 @@ void setup_hugetlbfs(void) err = read(fd, buf, sizeof(buf)-1); close(fd); if (err <= 0) { - perror("reading sysfs 2M hugetlb config"); + fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n", + hpagesz_mb, strerror(errno)); return; } if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", - buf, GET_NR_HUGE_PAGES); + fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n", + hpagesz_mb, buf, GET_NR_HUGE_PAGES); return; } @@ -886,6 +850,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { malloc_pkey_with_mprotect, + malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb /* can not do direct with the pkey_mprotect() API: @@ -924,14 +889,14 @@ void *malloc_pkey(long size, int prot, u16 pkey) return ret; } -int last_pkru_faults; +int last_pkey_faults; #define UNKNOWN_PKEY -2 -void expected_pk_fault(int pkey) +void expected_pkey_fault(int pkey) { - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", + __func__, last_pkey_faults, pkey_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); + pkey_assert(last_pkey_faults + 1 == pkey_faults); /* * For exec-only memory, we do not know the pkey in @@ -940,24 +905,28 @@ void expected_pk_fault(int pkey) if (pkey != UNKNOWN_PKEY) pkey_assert(last_si_pkey == pkey); +#if defined(__i386__) || defined(__x86_64__) /* arch */ /* - * The signal handler shold have cleared out PKRU to let the + * The signal handler shold have cleared out PKEY register to let the * test program continue. We now have to restore it. */ - if (__rdpkru() != 0) + if (__read_pkey_reg() != 0) +#else /* arch */ + if (__read_pkey_reg() != shadow_pkey_reg) +#endif /* arch */ pkey_assert(0); - __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; + __write_pkey_reg(shadow_pkey_reg); + dprintf1("%s() set pkey_reg=%016llx to restore state after signal " + "nuked it\n", __func__, shadow_pkey_reg); + last_pkey_faults = pkey_faults; last_si_pkey = -1; } -#define do_not_expect_pk_fault(msg) do { \ - if (last_pkru_faults != pkru_faults) \ - dprintf0("unexpected PK fault: %s\n", msg); \ - pkey_assert(last_pkru_faults == pkru_faults); \ +#define do_not_expect_pkey_fault(msg) do { \ + if (last_pkey_faults != pkey_faults) \ + dprintf0("unexpected PKey fault: %s\n", msg); \ + pkey_assert(last_pkey_faults == pkey_faults); \ } while (0) int test_fds[10] = { -1 }; @@ -1000,6 +969,58 @@ __attribute__((noinline)) int read_ptr(int *ptr) return *ptr; } +void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) +{ + int i, err; + int max_nr_pkey_allocs; + int alloced_pkeys[NR_PKEYS]; + int nr_alloced = 0; + long size; + + pkey_assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + + /* allocate every possible key and make sure key-0 never got allocated */ + max_nr_pkey_allocs = NR_PKEYS; + for (i = 0; i < max_nr_pkey_allocs; i++) { + int new_pkey = alloc_pkey(); + pkey_assert(new_pkey != 0); + + if (new_pkey < 0) + break; + alloced_pkeys[nr_alloced++] = new_pkey; + } + /* free all the allocated keys */ + for (i = 0; i < nr_alloced; i++) { + int free_ret; + + if (!alloced_pkeys[i]) + continue; + free_ret = sys_pkey_free(alloced_pkeys[i]); + pkey_assert(!free_ret); + } + + /* attach key-0 in various modes */ + err = sys_mprotect_pkey(ptr, size, PROT_READ, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0); + pkey_assert(!err); +} + void test_read_of_write_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1015,26 +1036,67 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) int ptr_contents; dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); + read_pkey_reg(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pkey_fault(pkey); +} + +void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); + ptr_contents = read_ptr(ptr); + dprintf1("reading ptr before disabling the read : %d\n", + ptr_contents); + read_pkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } + +void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling write access; after accessing the page, " + "to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } + +void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling access; after accessing the page, " + " to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; @@ -1160,9 +1222,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ + dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx" + " shadow: 0x%016llx\n", + __func__, __LINE__, err, __read_pkey_reg(), + shadow_pkey_reg); + read_pkey_reg(); /* for shadow checking */ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", @@ -1188,6 +1252,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) dprintf3("%s()::%d\n", __func__, __LINE__); /* + * On x86: * There are 16 pkeys supported in hardware. Three are * allocated by the time we get here: * 1. The default key (0) @@ -1195,13 +1260,21 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) * 3. One allocated by the test code and passed in via * 'pkey' to this function. * Ensure that we can allocate at least another 13 (16-3). + * + * On powerpc: + * There are either 5, 28, 29 or 32 pkeys supported in + * hardware depending on the page size (4K or 64K) and + * platform (powernv or powervm). Four are allocated by + * the time we get here. These include pkey-0, pkey-1, + * exec-only pkey and the one allocated by the test code. + * Ensure that we can allocate the remaining. */ - pkey_assert(i >= NR_PKEYS-3); + pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1)); for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); pkey_assert(!err); - rdpkru(); /* for shadow checking */ + read_pkey_reg(); /* for shadow checking */ } } @@ -1287,7 +1360,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect an exception: */ peek_result = read_ptr(ptr); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); /* * Try to access the NON-pkey-protected "plain_ptr" via ptrace: @@ -1297,7 +1370,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault("read plain pointer after ptrace"); + do_not_expect_pkey_fault("read plain pointer after ptrace"); ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1347,17 +1420,15 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); pkey_access_deny(pkey); - dprintf2("pkru: %x\n", rdpkru()); + dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); /* * Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); + do_not_expect_pkey_fault("executing on PROT_EXEC memory"); + expect_fault_on_read_execonly_key(p1, pkey); } void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) @@ -1378,15 +1449,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); pkey_assert(!ret); - dprintf2("pkru: %x\n", rdpkru()); + dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); /* Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(UNKNOWN_PKEY); + do_not_expect_pkey_fault("executing on PROT_EXEC memory"); + expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY); /* * Put the memory back to non-PROT_EXEC. Should clear the @@ -1400,7 +1469,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); pkey_assert(!ret); ptr_contents = read_ptr(p1); - do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); + do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); } void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) @@ -1408,7 +1477,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) int size = PAGE_SIZE; int sret; - if (cpu_has_pku()) { + if (cpu_has_pkeys()) { dprintf1("SKIP: %s: no CPU support\n", __func__); return; } @@ -1420,8 +1489,11 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, + test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, + test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, + test_write_of_access_disabled_region_with_page_already_mapped, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region, test_kernel_gup_of_access_disabled_region, @@ -1433,6 +1505,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, + test_pkey_alloc_free_attach_pkey0, }; void run_tests_once(void) @@ -1442,7 +1515,7 @@ void run_tests_once(void) for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { int pkey; - int orig_pkru_faults = pkru_faults; + int orig_pkey_faults = pkey_faults; dprintf1("======================\n"); dprintf1("test %d preparing...\n", test_nr); @@ -1457,8 +1530,8 @@ void run_tests_once(void) free_pkey_malloc(ptr); sys_pkey_free(pkey); - dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + dprintf1("pkey_faults: %d\n", pkey_faults); + dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults); tracing_off(); close_test_fds(); @@ -1471,18 +1544,19 @@ void run_tests_once(void) void pkey_setup_shadow(void) { - shadow_pkru = __rdpkru(); + shadow_pkey_reg = __read_pkey_reg(); } int main(void) { int nr_iterations = 22; + int pkeys_supported = is_pkeys_supported(); setup_handlers(); - printf("has pku: %d\n", cpu_has_pku()); + printf("has pkeys: %d\n", pkeys_supported); - if (!cpu_has_pku()) { + if (!pkeys_supported) { int size = PAGE_SIZE; int *ptr; @@ -1495,7 +1569,7 @@ int main(void) } pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); + printf("startup pkey_reg: %016llx\n", read_pkey_reg()); setup_hugetlbfs(); while (nr_iterations-- > 0) diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore index 022a1f3b64ef..1aaef5bf119a 100644 --- a/tools/testing/selftests/x86/.gitignore +++ b/tools/testing/selftests/x86/.gitignore @@ -12,5 +12,4 @@ ldt_gdt iopl mpx-mini-test ioperm -protection_keys test_vdso diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5d49bfec1e9a..5f16821c7f63 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - protection_keys test_vdso test_vsyscall mov_ss_trap \ + test_vdso test_vsyscall mov_ss_trap \ syscall_arg_fault TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h deleted file mode 100644 index 254e5436bdd9..000000000000 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ /dev/null @@ -1,219 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PKEYS_HELPER_H -#define _PKEYS_HELPER_H -#define _GNU_SOURCE -#include <string.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> - -#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 - -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 0 -#endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 -extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -static inline void sigsafe_printf(const char *format, ...) -{ - va_list ap; - - if (!dprint_in_signal) { - va_start(ap, format); - vprintf(format, ap); - va_end(ap); - } else { - int ret; - /* - * No printf() functions are signal-safe. - * They deadlock easily. Write the format - * string to get some output, even if - * incomplete. - */ - ret = write(1, format, strlen(format)); - if (ret < 0) - exit(1); - } -} -#define dprintf_level(level, args...) do { \ - if (level <= DEBUG_LEVEL) \ - sigsafe_printf(args); \ -} while (0) -#define dprintf0(args...) dprintf_level(0, args) -#define dprintf1(args...) dprintf_level(1, args) -#define dprintf2(args...) dprintf_level(2, args) -#define dprintf3(args...) dprintf_level(3, args) -#define dprintf4(args...) dprintf_level(4, args) - -extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) -{ - unsigned int eax, edx; - unsigned int ecx = 0; - unsigned int pkru; - - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (eax), "=d" (edx) - : "c" (ecx)); - pkru = eax; - return pkru; -} - -static inline unsigned int _rdpkru(int line) -{ - unsigned int pkru = __rdpkru(); - - dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); - - return pkru; -} - -#define rdpkru() _rdpkru(__LINE__) - -static inline void __wrpkru(unsigned int pkru) -{ - unsigned int eax = pkru; - unsigned int ecx = 0; - unsigned int edx = 0; - - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); -} - -static inline void wrpkru(unsigned int pkru) -{ - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); -} - -/* - * These are technically racy. since something could - * change PKRU between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - unsigned int pkru = rdpkru(); - int bit = pkey * 2; - - if (do_allow) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - long pkru = rdpkru(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); -} - -#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - -#define PAGE_SIZE 4096 -#define MB (1<<20) - -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ -#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ - -static inline int cpu_has_pku(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (!(ecx & X86_FEATURE_PKU)) { - dprintf2("cpu does not have PKU\n"); - return 0; - } - if (!(ecx & X86_FEATURE_OSPKE)) { - dprintf2("cpu does not have OSPKE\n"); - return 0; - } - return 1; -} - -#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 - -int pkru_xstate_offset(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - int xstate_offset; - int xstate_size; - unsigned long XSTATE_CPUID = 0xd; - int leaf; - - /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; - { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (leaf == XSTATE_PKRU_BIT) { - xstate_offset = ebx; - xstate_size = eax; - } - } - - if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); - return 0; - } - - return xstate_offset; -} - -#endif /* _PKEYS_HELPER_H */ |