From 6baf4bec02dbc41645c3a5130ee15a8e1d62b80f Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 6 Apr 2018 13:55:07 -0700 Subject: x86/espfix: Document use of _PAGE_GLOBAL The "normal" kernel page table creation mechanisms using PAGE_KERNEL_* page protections will never set _PAGE_GLOBAL with PTI. The few places in the kernel that always want _PAGE_GLOBAL must avoid using PAGE_KERNEL_*. Document that we want it here and its use is not accidental. Signed-off-by: Dave Hansen Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180406205507.BCF4D4F0@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/espfix_64.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index e5ec3cafa72e..aebd0d5bc086 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -195,6 +195,10 @@ void init_espfix_ap(int cpu) pte_p = pte_offset_kernel(&pmd, addr); stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); + /* + * __PAGE_KERNEL_* includes _PAGE_GLOBAL, which we want since + * this is mapped to userspace. + */ pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask)); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); -- cgit v1.2.3 From fb43d6cb91ef57d9e58d5f69b423784ff4a4c374 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 6 Apr 2018 13:55:09 -0700 Subject: x86/mm: Do not auto-massage page protections A PTE is constructed from a physical address and a pgprotval_t. __PAGE_KERNEL, for instance, is a pgprot_t and must be converted into a pgprotval_t before it can be used to create a PTE. This is done implicitly within functions like pfn_pte() by massage_pgprot(). However, this makes it very challenging to set bits (and keep them set) if your bit is being filtered out by massage_pgprot(). This moves the bit filtering out of pfn_pte() and friends. For users of PAGE_KERNEL*, filtering will be done automatically inside those macros but for users of __PAGE_KERNEL*, they need to do their own filtering now. Note that we also just move pfn_pte/pmd/pud() over to check_pgprot() instead of massage_pgprot(). This way, we still *look* for unsupported bits and properly warn about them if we find them. This might happen if an unfiltered __PAGE_KERNEL* value was passed in, for instance. - printk format warning fix from: Arnd Bergmann - boot crash fix from: Tom Lendacky - crash bisected by: Mike Galbraith Signed-off-by: Dave Hansen Reported-and-fixed-by: Arnd Bergmann Fixed-by: Tom Lendacky Bisected-by: Mike Galbraith Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180406205509.77E1D7F6@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/ldt.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0c855deee165..0c408f8c4ed4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -195,6 +195,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pud[i + 1] = (pudval_t)pmd + pgtable_flags; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; + /* Filter out unsupported __PAGE_KERNEL_* bits: */ + pmd_entry &= __supported_pte_mask; pmd_entry += sme_get_me_mask(); pmd_entry += physaddr; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 26d713ecad34..d41d896481b8 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -145,6 +145,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) unsigned long offset = i << PAGE_SHIFT; const void *src = (char *)ldt->entries + offset; unsigned long pfn; + pgprot_t pte_prot; pte_t pte, *ptep; va = (unsigned long)ldt_slot_va(slot) + offset; @@ -163,7 +164,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) * target via some kernel interface which misses a * permission check. */ - pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)); + pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); + /* Filter out unsuppored __PAGE_KERNEL* bits: */ + pgprot_val(pte_prot) |= __supported_pte_mask; + pte = pfn_pte(pfn, pte_prot); set_pte_at(mm, va, ptep, pte); pte_unmap_unlock(ptep, ptl); } -- cgit v1.2.3 From 430d4005b8b41c19966dd3bfdb33004bdb2de01c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 6 Apr 2018 13:55:13 -0700 Subject: x86/mm: Comment _PAGE_GLOBAL mystery I was mystified as to where the _PAGE_GLOBAL in the kernel page tables for kernel text came from. I audited all the places I could find, but I missed one: head_64.S. The page tables that we create in here live for a long time, and they also have _PAGE_GLOBAL set, despite whether the processor supports it or not. It's harmless, and we got *lucky* that the pageattr code accidentally clears it when we wipe it out of __supported_pte_mask and then later try to mark kernel text read-only. Comment some of these properties to make it easier to find and understand in the future. Signed-off-by: Dave Hansen Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180406205513.079BB265@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 48385c1074a5..8344dd2f310a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -399,8 +399,13 @@ NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .fill 511, 8, 0 NEXT_PAGE(level2_ident_pgt) - /* Since I easily can, map the first 1G. + /* + * Since I easily can, map the first 1G. * Don't set NX because code runs from these pages. + * + * Note: This sets _PAGE_GLOBAL despite whether + * the CPU supports it or it is enabled. But, + * the CPU should ignore the bit. */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #else @@ -431,6 +436,10 @@ NEXT_PAGE(level2_kernel_pgt) * (NOTE: at +512MB starts the module area, see MODULES_VADDR. * If you want to increase this then increase MODULES_VADDR * too.) + * + * This table is eventually used by the kernel during normal + * runtime. Care must be taken to clear out undesired bits + * later, like _PAGE_RW or _PAGE_GLOBAL in some cases. */ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) -- cgit v1.2.3