From b00f80fcfaa098f987dde99585e73e8ed7edae51 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 10 Feb 2020 11:39:51 +0800 Subject: PCI: hv: Move hypercall related definitions into tlfs header Currently HVCALL_RETARGET_INTERRUPT and HV_PARTITION_ID_SELF are defined in pci-hyperv.c. However, similar to other hypercall related definitions, it makes more sense to put them in the tlfs header file. Besides, these definitions are arch-dependent, so for the support of virtual PCI on non-x86 archs in the future, move them into arch-specific tlfs header file. Signed-off-by: Boqun Feng (Microsoft) Signed-off-by: Lorenzo Pieralisi Reviewed-by: Andrew Murray Reviewed-by: Dexuan Cui --- arch/x86/include/asm/hyperv-tlfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 92abc1e42bfc..dffed0e10a68 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -376,6 +376,7 @@ struct hv_tsc_emulation_status { #define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 @@ -405,6 +406,8 @@ enum HV_GENERIC_SET_FORMAT { HV_GENERIC_SET_ALL, }; +#define HV_PARTITION_ID_SELF ((u64)-1) + #define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) #define HV_HYPERCALL_FAST_BIT BIT(16) #define HV_HYPERCALL_VARHEAD_OFFSET 17 -- cgit v1.2.3 From 61bfd920abbf2c8c9c3b10bb335475e707247573 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 10 Feb 2020 11:39:52 +0800 Subject: PCI: hv: Move retarget related structures into tlfs header Currently, retarget_msi_interrupt and other structures it relys on are defined in pci-hyperv.c. However, those structures are actually defined in Hypervisor Top-Level Functional Specification [1] and may be different in sizes of fields or layout from architecture to architecture. Let's move those definitions into x86's tlfs header file to support virtual PCI on non-x86 architectures in the future. Note that "__packed" attribute is added to these structures during the movement for the same reason as we use the attribute for other TLFS structures in the header file: make sure the structures meet the specification and avoid anything unexpected from the compilers. Additionally, rename struct retarget_msi_interrupt to hv_retarget_msi_interrupt for the consistent naming convention, also mirroring the name in TLFS. [1]: https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs Signed-off-by: Boqun Feng (Microsoft) Signed-off-by: Lorenzo Pieralisi Reviewed-by: Dexuan Cui --- arch/x86/include/asm/hyperv-tlfs.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index dffed0e10a68..a0b6a88d2f05 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -912,4 +912,35 @@ struct hv_tlb_flush_ex { struct hv_partition_assist_pg { u32 tlb_lock_count; }; + +struct hv_interrupt_entry { + u32 source; /* 1 for MSI(-X) */ + u32 reserved1; + u32 address; + u32 data; +} __packed; + +/* + * flags for hv_device_interrupt_target.flags + */ +#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 +#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 + +struct hv_device_interrupt_target { + u32 vector; + u32 flags; + union { + u64 vp_mask; + struct hv_vpset vp_set; + }; +} __packed; + +/* HvRetargetDeviceInterrupt hypercall */ +struct hv_retarget_device_interrupt { + u64 partition_id; /* use "self" */ + u64 device_id; + struct hv_interrupt_entry int_entry; + u64 reserved2; + struct hv_device_interrupt_target int_target; +} __packed __aligned(8); #endif -- cgit v1.2.3 From 1cf106d93245f436c10e73cd3d4b885067d4bbcc Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 10 Feb 2020 11:39:53 +0800 Subject: PCI: hv: Introduce hv_msi_entry Add a new structure (hv_msi_entry), which is also defined in the TLFS, to describe the msi entry for HVCALL_RETARGET_INTERRUPT. The structure is needed because its layout may be different from architecture to architecture. Also add a new generic interface hv_set_msi_entry_from_desc() to allow different archs to set the msi entry from msi_desc. No functional change, only preparation for the future support of virtual PCI on non-x86 architectures. Signed-off-by: Boqun Feng (Microsoft) Signed-off-by: Lorenzo Pieralisi Reviewed-by: Dexuan Cui --- arch/x86/include/asm/hyperv-tlfs.h | 11 +++++++++-- arch/x86/include/asm/mshyperv.h | 8 ++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index a0b6a88d2f05..29336574d0bc 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -913,11 +913,18 @@ struct hv_partition_assist_pg { u32 tlb_lock_count; }; +union hv_msi_entry { + u64 as_uint64; + struct { + u32 address; + u32 data; + } __packed; +}; + struct hv_interrupt_entry { u32 source; /* 1 for MSI(-X) */ u32 reserved1; - u32 address; - u32 data; + union hv_msi_entry msi_entry; } __packed; /* diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6b79515abb82..81fc30240122 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -240,6 +241,13 @@ bool hv_vcpu_is_preempted(int vcpu); static inline void hv_apic_init(void) {} #endif +static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + struct msi_desc *msi_desc) +{ + msi_entry->address = msi_desc->msg.address_lo; + msi_entry->data = msi_desc->msg.data; +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} -- cgit v1.2.3 From 5a281062af1d43d3f3956a6b429c2d727bc92603 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 6 Apr 2020 20:05:33 -0700 Subject: userfaultfd: wp: add WP pagetable tracking to x86 Accurate userfaultfd WP tracking is possible by tracking exactly which virtual memory ranges were writeprotected by userland. We can't relay only on the RW bit of the mapped pagetable because that information is destroyed by fork() or KSM or swap. If we were to relay on that, we'd need to stay on the safe side and generate false positive wp faults for every swapped out page. [peterx@redhat.com: append _PAGE_UFD_WP to _PAGE_CHG_MASK] Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-4-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 52 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/pgtable_64.h | 8 +++++- arch/x86/include/asm/pgtable_types.h | 12 ++++++++- 3 files changed, 70 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index afda66a6d325..c37e1649fb7e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -25,6 +25,7 @@ #include #include #include +#include extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -313,6 +314,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_UFFD_WP); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY); @@ -392,6 +410,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_UFFD_WP; +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_UFFD_WP); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); @@ -1374,6 +1409,23 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0b6c4042942a..df1373415f11 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -189,7 +189,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -197,9 +197,15 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * erratum where they can be incorrectly set by hardware on * non-present PTEs. * + * SD Bits 1-4 are not used in non-present format and available for + * special use described below: + * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * + * F (2) in swp entry is used to record when a pagetable is + * writeprotected by userfaultfd WP support. + * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 65c2ecd730c5..b6606fe6cfdf 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -32,6 +32,7 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 @@ -100,6 +101,14 @@ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP) +#define _PAGE_SWP_UFFD_WP _PAGE_USER +#else +#define _PAGE_UFFD_WP (_AT(pteval_t, 0)) +#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) @@ -118,7 +127,8 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_UFFD_WP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* -- cgit v1.2.3 From 2e3d5dc508cf001c4fb2d15515ebe6f30df88f76 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 6 Apr 2020 20:05:57 -0700 Subject: userfaultfd: wp: add pmd_swp_*uffd_wp() helpers Adding these missing helpers for uffd-wp operations with pmd swap/migration entries. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-10-peterx@redhat.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c37e1649fb7e..28838d790191 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1424,6 +1424,21 @@ static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); } + +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); +} + +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); +} #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define PKRU_AD_BIT 0x1 -- cgit v1.2.3 From 418d6e295e4375e1097c511da11795e592aedbeb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Mar 2020 17:00:50 +0900 Subject: x86: remove unneeded defined(__ASSEMBLY__) check from asm/dwarf2.h This header file has the following check at the top: #ifndef __ASSEMBLY__ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif So, we expect defined(__ASSEMBLY__) is always true. Signed-off-by: Masahiro Yamada Reviewed-by: Jason A. Donenfeld Reviewed-by: Nick Desaulniers Acked-by: Ingo Molnar --- arch/x86/include/asm/dwarf2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index f71a0cce9373..87054cc8272e 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -36,7 +36,7 @@ #define CFI_SIGNAL_FRAME #endif -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) +#if defined(CONFIG_AS_CFI_SECTIONS) #ifndef BUILD_VDSO /* * Emit CFI data in .debug_frame sections, not .eh_frame sections. -- cgit v1.2.3 From 0f2661c4b935e5aa7d309f6c5072d4c49465137f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Mar 2020 17:00:51 +0900 Subject: x86: remove always-defined CONFIG_AS_CFI CONFIG_AS_CFI was introduced by commit e2414910f212 ("[PATCH] x86: Detect CFI support in the assembler at runtime"), and extended by commit f0f12d85af85 ("x86_64: Check for .cfi_rel_offset in CFI probe"). We raise the minimal supported binutils version from time to time. The last bump was commit 1fb12b35e5ff ("kbuild: Raise the minimum required binutils version to 2.21"). I confirmed the code in $(call as-instr,...) can be assembled by the binutils 2.21 assembler and also by LLVM integrated assembler. Remove CONFIG_AS_CFI, which is always defined. Signed-off-by: Masahiro Yamada Reviewed-by: Jason A. Donenfeld Reviewed-by: Nick Desaulniers Acked-by: Ingo Molnar --- arch/x86/include/asm/dwarf2.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 87054cc8272e..76cc07ff1002 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,15 +6,6 @@ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif -/* - * Macros for dwarf2 CFI unwind table entries. - * See "as.info" for details on these pseudo ops. Unfortunately - * they are only supported in very new binutils, so define them - * away for older version. - */ - -#ifdef CONFIG_AS_CFI - #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc #define CFI_DEF_CFA .cfi_def_cfa @@ -55,31 +46,4 @@ #endif #endif -#else - -/* - * Due to the structure of pre-exisiting code, don't use assembler line - * comment character # to ignore the arguments. Instead, use a dummy macro. - */ -.macro cfi_ignore a=0, b=0, c=0, d=0 -.endm - -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore -#define CFI_DEF_CFA_REGISTER cfi_ignore -#define CFI_DEF_CFA_OFFSET cfi_ignore -#define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_ESCAPE cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore - -#endif - #endif /* _ASM_X86_DWARF2_H */ -- cgit v1.2.3 From 46427f658e90eda40b85313b5cff05bdc56c1988 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Mar 2020 17:00:52 +0900 Subject: x86: remove unneeded (CONFIG_AS_)CFI_SIGNAL_FRAME Commit 131484c8da97 ("x86/debug: Remove perpetually broken, unmaintainable dwarf annotations") removes all the users of CFI_SIGNAL_FRAME. Remove the CFI_SIGNAL_FRAME and CONFIG_AS_CFI_SIGNAL_FRAME. Signed-off-by: Masahiro Yamada Reviewed-by: Jason A. Donenfeld Reviewed-by: Nick Desaulniers Acked-by: Ingo Molnar --- arch/x86/include/asm/dwarf2.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 76cc07ff1002..882db09d5dfb 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -21,12 +21,6 @@ #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape -#ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame -#else -#define CFI_SIGNAL_FRAME -#endif - #if defined(CONFIG_AS_CFI_SECTIONS) #ifndef BUILD_VDSO /* -- cgit v1.2.3 From 48e24723d0b54a2c9e53dd96c8864cc0afc7b0ee Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Mar 2020 17:00:53 +0900 Subject: x86: remove always-defined CONFIG_AS_CFI_SECTIONS CONFIG_AS_CFI_SECTIONS was introduced by commit 9e565292270a ("x86: Use .cfi_sections for assembly code"). We raise the minimal supported binutils version from time to time. The last bump was commit 1fb12b35e5ff ("kbuild: Raise the minimum required binutils version to 2.21"). I confirmed the code in $(call as-instr,...) can be assembled by the binutils 2.21 assembler and also by LLVM integrated assembler. Remove CONFIG_AS_CFI_SECTIONS, which is always defined. Signed-off-by: Masahiro Yamada Reviewed-by: Jason A. Donenfeld Reviewed-by: Nick Desaulniers Acked-by: Ingo Molnar --- arch/x86/include/asm/dwarf2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 882db09d5dfb..430fca13bb56 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -21,7 +21,6 @@ #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape -#if defined(CONFIG_AS_CFI_SECTIONS) #ifndef BUILD_VDSO /* * Emit CFI data in .debug_frame sections, not .eh_frame sections. @@ -38,6 +37,5 @@ */ .cfi_sections .eh_frame, .debug_frame #endif -#endif #endif /* _ASM_X86_DWARF2_H */ -- cgit v1.2.3 From 42251572c4687813d8e7b1d363a23d0f9201e69f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Mar 2020 17:00:55 +0900 Subject: x86: remove always-defined CONFIG_AS_AVX CONFIG_AS_AVX was introduced by commit ea4d26ae24e5 ("raid5: add AVX optimized RAID5 checksumming"). We raise the minimal supported binutils version from time to time. The last bump was commit 1fb12b35e5ff ("kbuild: Raise the minimum required binutils version to 2.21"). I confirmed the code in $(call as-instr,...) can be assembled by the binutils 2.21 assembler and also by LLVM integrated assembler. Remove CONFIG_AS_AVX, which is always defined. Signed-off-by: Masahiro Yamada Reviewed-by: Jason A. Donenfeld Acked-by: Ingo Molnar --- arch/x86/include/asm/xor_avx.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index d61ddf3d052b..0c4e5b5e3852 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -11,8 +11,6 @@ * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines */ -#ifdef CONFIG_AS_AVX - #include #include @@ -170,11 +168,4 @@ do { \ #define AVX_SELECT(FASTEST) \ (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) -#else - -#define AVX_XOR_SPEED {} - -#define AVX_SELECT(FASTEST) (FASTEST) - -#endif #endif -- cgit v1.2.3 From c97078bd219cbe1a878b24bb4e61d312f19ece1f Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:58 -0700 Subject: mm: define pte_index as macro for x86 pte_index() is either defined as a macro (e.g. sparc64) or as an inlined function (e.g. x86). vm_insert_pages() depends on pte_index but it is not defined on all platforms (e.g. m68k). To fix compilation of vm_insert_pages() on architectures not providing pte_index(), we perform the following fix: 0. For platforms where it is meaningful, and defined as a macro, no change is needed. 1. For platforms where it is meaningful and defined as an inlined function, and we want to use it with vm_insert_pages(), we define a degenerate macro of the form: #define pte_index pte_index 2. vm_insert_pages() checks for the existence of a pte_index macro definition. If found, it implements a batched insert. If not found, it devolves to calling vm_insert_page() in a loop. This patch implements step 1 for x86. v3 of this patch fixes a compilation warning for an unused method. v2 of this patch moved a macro definition to a more readable location. Signed-off-by: Arjun Roy Signed-off-by: Andrew Morton Cc: David Miller Cc: Eric Dumazet Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Soheil Hassas Yeganeh Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200228054714.204424-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 28838d790191..abad0da0973a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -860,7 +860,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -- cgit v1.2.3 From c62da0c35d58518ddb26ff641d2485596567fd96 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:05 -0700 Subject: mm/vma: define a default value for VM_DATA_DEFAULT_FLAGS There are many platforms with exact same value for VM_DATA_DEFAULT_FLAGS This creates a default value for VM_DATA_DEFAULT_FLAGS in line with the existing VM_STACK_DEFAULT_FLAGS. While here, also define some more macros with standard VMA access flag combinations that are used frequently across many platforms. Apart from simplification, this reduces code duplication as well. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Geert Uytterhoeven Cc: Richard Henderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Guo Ren Cc: Yoshinori Sato Cc: Brian Cain Cc: Tony Luck Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Ley Foon Tan Cc: Jonas Bonn Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Walmsley Cc: Heiko Carstens Cc: Rich Felker Cc: "David S. Miller" Cc: Guan Xuetao Cc: Thomas Gleixner Cc: Jeff Dike Cc: Chris Zankel Link: http://lkml.kernel.org/r/1583391014-8170-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_types.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..e27aa6be6320 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) -- cgit v1.2.3 From c164fbb40c43f8041f4d05ec9996d8ee343c92b1 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:24 -0700 Subject: x86/mm: thread pgprot_t through init_memory_mapping() In preparation to support a pgprot_t argument for arch_add_memory(). It's required to move the prototype of init_memory_mapping() seeing the original location came before the definition of pgprot_t. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-4-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_types.h | 3 --- arch/x86/include/asm/pgtable.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e27aa6be6320..a506a411474d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -71,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index abad0da0973a..4d02e64af1b3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1081,6 +1081,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else -- cgit v1.2.3 From 30796e18c29942c4d64bf89c4135c975393ec1ad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:28 -0700 Subject: x86/mm: introduce __set_memory_prot() For use in the 32bit arch_add_memory() to set the pgprot type of the memory to add. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-5-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/set_memory.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 950532ccbc4a..ec2c0a094b5d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); -- cgit v1.2.3 From d7e94dbdac1a40924626b0efc7ff530c8baf5e4a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Apr 2020 13:54:00 +0200 Subject: x86/split_lock: Provide handle_guest_split_lock() Without at least minimal handling for split lock detection induced #AC, VMX will just run into the same problem as the VMWare hypervisor, which was reported by Kenneth. It will inject the #AC blindly into the guest whether the guest is prepared or not. Provide a function for guest mode which acts depending on the host SLD mode. If mode == sld_warn, treat it like user space, i.e. emit a warning, disable SLD and mark the task accordingly. Otherwise force SIGBUS. [ bp: Add a !CPU_SUP_INTEL stub for handle_guest_split_lock(). ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Paolo Bonzini Link: https://lkml.kernel.org/r/20200410115516.978037132@linutronix.de Link: https://lkml.kernel.org/r/20200402123258.895628824@linutronix.de --- arch/x86/include/asm/cpu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index ff6f3ca649b3..dd17c2da1af5 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -44,6 +44,7 @@ unsigned int x86_stepping(unsigned int sig); extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); +extern bool handle_guest_split_lock(unsigned long ip); #else static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {} static inline void switch_to_sld(unsigned long tifn) {} @@ -51,5 +52,10 @@ static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) { return false; } + +static inline bool handle_guest_split_lock(unsigned long ip) +{ + return false; +} #endif #endif /* _ASM_X86_CPU_H */ -- cgit v1.2.3 From a088b858f16af85e3db359b6c6aaa92dd3bc0921 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Apr 2020 09:43:20 +0200 Subject: efi/x86: Revert struct layout change to fix kexec boot regression Commit 0a67361dcdaa29 ("efi/x86: Remove runtime table address from kexec EFI setup data") removed the code that retrieves the non-remapped UEFI runtime services pointer from the data structure provided by kexec, as it was never really needed on the kexec boot path: mapping the runtime services table at its non-remapped address is only needed when calling SetVirtualAddressMap(), which never happens during a kexec boot in the first place. However, dropping the 'runtime' member from struct efi_setup_data was a mistake. That struct is shared ABI between the kernel and the kexec tooling for x86, and so we cannot simply change its layout. So let's put back the removed field, but call it 'unused' to reflect the fact that we never look at its contents. While at it, add a comment to remind our future selves that the layout is external ABI. Fixes: 0a67361dcdaa29 ("efi/x86: Remove runtime table address from kexec EFI setup data") Reported-by: Theodore Ts'o Tested-by: Theodore Ts'o Reviewed-by: Dave Young Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cdcf48d52a12..8391c115c0ec 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -178,8 +178,10 @@ extern void efi_free_boot_services(void); extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); +/* kexec external ABI */ struct efi_setup_data { u64 fw_vendor; + u64 __unused; u64 tables; u64 smbios; u64 reserved[8]; -- cgit v1.2.3 From bdf89df3c54518eed879d8fac7577fcfb220c67e Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 9 Apr 2020 10:34:29 -0500 Subject: x86/microcode/AMD: Increase microcode PATCH_MAX_SIZE Future AMD CPUs will have microcode patches that exceed the default 4K patch size. Raise our limit. Signed-off-by: John Allen Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v4.14.. Link: https://lkml.kernel.org/r/20200409152931.GA685273@mojo.amd.com --- arch/x86/include/asm/microcode_amd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 6685e1218959..7063b5a43220 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); -- cgit v1.2.3 From 637543a8d61c6afe4e9be64bfb43c78701a83375 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 7 Apr 2020 01:13:09 -0500 Subject: KVM: x86: Fixes posted interrupt check for IRQs delivery modes Current logic incorrectly uses the enum ioapic_irq_destination_types to check the posted interrupt destination types. However, the value was set using APIC_DM_XXX macros, which are left-shifted by 8 bits. Fixes by using the APIC_DM_FIXED and APIC_DM_LOWEST instead. Fixes: (fdcf75621375 'KVM: x86: Disable posted interrupts for non-standard IRQs delivery modes') Cc: Alexander Graf Signed-off-by: Suravee Suthikulpanit Message-Id: <1586239989-58305-1-git-send-email-suravee.suthikulpanit@amd.com> Reviewed-by: Maxim Levitsky Tested-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 42a2d0d3984a..0dea9f122bb9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1663,8 +1663,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ - return (irq->delivery_mode == dest_Fixed || - irq->delivery_mode == dest_LowestPrio); + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 4d5523cfd5d298c58743eb31c003886cfc856709 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 5 May 2020 07:33:20 -0400 Subject: KVM: x86: fix DR6 delivery for various cases of #DB injection Go through kvm_queue_exception_p so that the payload is correctly delivered through the exit qualification, and add a kvm_update_dr6 call to kvm_deliver_exception_payload that is needed on AMD. Reported-by: Peter Xu Reviewed-by: Peter Xu Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0dea9f122bb9..8c247bcb037e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1449,6 +1449,7 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); -- cgit v1.2.3 From 5679b803e44ed8947e8c2a7f44cdef1d93ea24d5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 4 May 2020 11:28:25 -0400 Subject: KVM: SVM: keep DR6 synchronized with vcpu->arch.dr6 kvm_x86_ops.set_dr6 is only ever called with vcpu->arch.dr6 as the second argument. Ensure that the VMCB value is synchronized to vcpu->arch.dr6 on #DB (both "normal" and nested) and nested vmentry, so that the current value of DR6 is always available in vcpu->arch.dr6. The get_dr6 callback can just access vcpu->arch.dr6 and becomes redundant. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8c247bcb037e..de0c28814348 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1093,7 +1093,6 @@ struct kvm_x86_ops { void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - u64 (*get_dr6)(struct kvm_vcpu *vcpu); void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); @@ -1624,6 +1623,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +void kvm_update_dr6(struct kvm_vcpu *vcpu); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); -- cgit v1.2.3 From d67668e9dd76d98136048935723947156737932b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 May 2020 06:40:04 -0400 Subject: KVM: x86, SVM: isolate vcpu->arch.dr6 from vmcb->save.dr6 There are two issues with KVM_EXIT_DEBUG on AMD, whose root cause is the different handling of DR6 on intercepted #DB exceptions on Intel and AMD. On Intel, #DB exceptions transmit the DR6 value via the exit qualification field of the VMCS, and the exit qualification only contains the description of the precise event that caused a vmexit. On AMD, instead the DR6 field of the VMCB is filled in as if the #DB exception was to be injected into the guest. This has two effects when guest debugging is in use: * the guest DR6 is clobbered * the kvm_run->debug.arch.dr6 field can accumulate more debug events, rather than just the last one that happened (the testcase in the next patch covers this issue). This patch fixes both issues by emulating, so to speak, the Intel behavior on AMD processors. The important observation is that (after the previous patches) the VMCB value of DR6 is only ever observable from the guest is KVM_DEBUGREG_WONT_EXIT is set. Therefore we can actually set vmcb->save.dr6 to any value we want as long as KVM_DEBUGREG_WONT_EXIT is clear, which it will be if guest debugging is enabled. Therefore it is possible to enter the guest with an all-zero DR6, reconstruct the #DB payload from the DR6 we get at exit time, and let kvm_deliver_exception_payload move the newly set bits into vcpu->arch.dr6. Some extra bits may be included in the payload if KVM_DEBUGREG_WONT_EXIT is set, but this is harmless. This may not be the most optimized way to deal with this, but it is simple and, being confined within SVM code, it gets rid of the set_dr6 callback and kvm_update_dr6. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index de0c28814348..9e8263b1e6fe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1093,7 +1093,6 @@ struct kvm_x86_ops { void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); @@ -1623,7 +1622,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); -void kvm_update_dr6(struct kvm_vcpu *vcpu); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); -- cgit v1.2.3 From 37486135d3a7b03acc7755b63627a130437f066a Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Tue, 12 May 2020 18:59:06 -0500 Subject: KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c Though rdpkru and wrpkru are contingent upon CR4.PKE, the PKRU resource isn't. It can be read with XSAVE and written with XRSTOR. So, if we don't set the guest PKRU value here(kvm_load_guest_xsave_state), the guest can read the host value. In case of kvm_load_host_xsave_state, guest with CR4.PKE clear could potentially use XRSTOR to change the host PKRU value. While at it, move pkru state save/restore to common code and the host_pkru field to kvm_vcpu_arch. This will let SVM support protection keys. Cc: stable@vger.kernel.org Reported-by: Jim Mattson Signed-off-by: Babu Moger Message-Id: <158932794619.44260.14508381096663848853.stgit@naples-babu.amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9e8263b1e6fe..0a6b35353fc7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -578,6 +578,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; -- cgit v1.2.3