From 172b1d6b5a9337eb8c1ec294b80e448e03a9ac17 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Fri, 12 Aug 2016 06:24:42 -0400 Subject: crypto: sha256-mb - fix ctx pointer and digest copy 1. fix ctx pointer Use req_ctx which is the ctx for the next job that have been completed in the lanes instead of the first completed job rctx, whose completion could have been called and released. 2. fix digest copy Use XMM register to copy another 16 bytes sha256 digest instead of a regular register. Signed-off-by: Xiaodong Liu Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-mb/sha256_mb.c | 4 ++-- arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 89fa85e8b10c..6f97fb33ae21 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -485,10 +485,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index b691da981cd9..a78a0694ddef 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -265,13 +265,14 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - movl _args_digest+4*32(state, idx, 4), tmp2_w + vmovd _args_digest(state , idx, 4) , %xmm0 vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 - vmovdqu %xmm0, _result_digest(job_rax) - movl tmp2_w, _result_digest+1*16(job_rax) + vmovdqu %xmm0, _result_digest(job_rax) + offset = (_result_digest + 1*16) + vmovdqu %xmm1, offset(job_rax) pop %rbx -- cgit v1.2.3 From e67479b13ede47cc2f5beb5b51e67fdb30778ee8 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Fri, 12 Aug 2016 06:28:31 -0400 Subject: crypto: sha512-mb - fix ctx pointer 1. fix ctx pointer Use req_ctx which is the ctx for the next job that have been completed in the lanes instead of the first completed job rctx, whose completion could have been called and released. Signed-off-by: Xiaodong Liu Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_mb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index f4cf5b78fd36..d210174a52b0 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -497,10 +497,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } -- cgit v1.2.3 From d048c098218e91ed0e10dfa1f0f80e2567fe4ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 8 Aug 2016 20:16:22 +0200 Subject: KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit msr bitmap can be used to avoid a VM exit (interception) on guest MSR accesses. In some configurations of VMX controls, the guest can even directly access host's x2APIC MSRs. See SDM 29.5 VIRTUALIZING MSR-BASED APIC ACCESSES. L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI. To do so, L1 would first trick KVM to disable all possible interceptions by enabling APICv features and then would turn those features off; nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would not intercept previously enabled MSRs even though they were not safe with the new configuration. Correctly re-enabling interceptions is not enough as a second bug would still allow L1+L2 to access host's MSRs: msr bitmap was shared for all VMCSs, so L1 could trigger a race to get the desired combination of msr bitmap and VMX controls. This fix allocates a msr bitmap for every L1 VCPU, allows only safe x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would have to intercept everything anyway. Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap") Reported-by: Jim Mattson Suggested-by: Wincy Van Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 107 ++++++++++++++++++++++------------------------------- 1 file changed, 44 insertions(+), 63 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a45d8580f91e..c66ac2c70d22 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -435,6 +435,8 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; + unsigned long *msr_bitmap; + struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) unsigned long *msr_bitmap; if (is_guest_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_nested; + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; else if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { @@ -6363,13 +6364,6 @@ static __init int hardware_setup(void) if (!vmx_msr_bitmap_longmode_x2apic) goto out4; - if (nested) { - vmx_msr_bitmap_nested = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_nested) - goto out5; - } - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) goto out6; @@ -6392,8 +6386,6 @@ static __init int hardware_setup(void) memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (nested) - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; @@ -6529,9 +6521,6 @@ out8: out7: free_page((unsigned long)vmx_vmread_bitmap); out6: - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); -out5: free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); @@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void) free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmread_bitmap); - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); free_kvm_area(); } @@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->nested.msr_bitmap) + goto out_msr_bitmap; + } + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) - return -ENOMEM; + goto out_cached_vmcs12; if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); - if (!shadow_vmcs) { - kfree(vmx->nested.cached_vmcs12); - return -ENOMEM; - } + if (!shadow_vmcs) + goto out_shadow_vmcs; /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; + +out_shadow_vmcs: + kfree(vmx->nested.cached_vmcs12); + +out_cached_vmcs12: + free_page((unsigned long)vmx->nested.msr_bitmap); + +out_msr_bitmap: + return -ENOMEM; } /* @@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); + if (vmx->nested.msr_bitmap) { + free_page((unsigned long)vmx->nested.msr_bitmap); + vmx->nested.msr_bitmap = NULL; + } if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); kfree(vmx->nested.cached_vmcs12); @@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, { int msr; struct page *page; - unsigned long *msr_bitmap; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) return false; @@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, WARN_ON(1); return false; } - msr_bitmap = (unsigned long *)kmap(page); - if (!msr_bitmap) { + msr_bitmap_l1 = (unsigned long *)kmap(page); + if (!msr_bitmap_l1) { nested_release_page_clean(page); WARN_ON(1); return false; } + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_apic_reg_virt(vmcs12)) for (msr = 0x800; msr <= 0x8ff; msr++) nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, msr, MSR_TYPE_R); - /* TPR is allowed */ - nested_vmx_disable_intercept_for_msr(msr_bitmap, - vmx_msr_bitmap_nested, + + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_TASKPRI >> 4), MSR_TYPE_R | MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { - /* EOI and self-IPI are allowed */ nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_EOI >> 4), MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_SELF_IPI >> 4), MSR_TYPE_W); } - } else { - /* - * Enable reading intercept of all the x2apic - * MSRs. We should not rely on vmcs12 to do any - * optimizations here, it may have been modified - * by L1. - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - msr, - MSR_TYPE_R); - - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); } kunmap(page); nested_release_page_clean(page); @@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS) { - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); - /* MSR_BITMAP will be set by following vmx_set_efer. */ - } else + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ + else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; /* -- cgit v1.2.3 From dccbfcf52cebb8963246eba5b177b77f26b34da0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 8 Aug 2016 20:16:23 +0200 Subject: KVM: nVMX: postpone VMCS changes on MSR_IA32_APICBASE write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vmcs12 does not intercept APIC_BASE writes, then KVM will handle the write with vmcs02 as the current VMCS. This will incorrectly apply modifications intended for vmcs01 to vmcs02 and L2 can use it to gain access to L0's x2APIC registers by disabling virtualized x2APIC while using msr bitmap that assumes enabled. Postpone execution of vmx_set_virtual_x2apic_mode until vmcs01 is the current VMCS. An alternative solution would temporarily make vmcs01 the current VMCS, but it requires more care. Fixes: 8d14695f9542 ("x86, apicv: add virtual x2apic support") Reported-by: Jim Mattson Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c66ac2c70d22..ae111a07acc4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -422,6 +422,7 @@ struct nested_vmx { struct list_head vmcs02_pool; int vmcs02_num; u64 vmcs01_tsc_offset; + bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; /* @@ -8424,6 +8425,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) { u32 sec_exec_control; + /* Postpone execution until vmcs01 is the current VMCS. */ + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true; + return; + } + /* * There is not point to enable virtualize x2apic without enable * apicv @@ -10749,6 +10756,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); + if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { + vmx->nested.change_vmcs01_virtual_x2apic_mode = false; + vmx_set_virtual_x2apic_mode(vcpu, + vcpu->arch.apic_base & X2APIC_ENABLE); + } + /* This is needed for same reason as it was needed in prepare_vmcs02 */ vmx->host_rsp = 0; -- cgit v1.2.3 From c95ba92afb238ac565c68968fc72e38ca8d1b6e8 Mon Sep 17 00:00:00 2001 From: Peter Feiner Date: Wed, 17 Aug 2016 09:36:47 -0700 Subject: kvm: nVMX: fix nested tsc scaling When the host supported TSC scaling, L2 would use a TSC multiplier of 0, which causes a VM entry failure. Now L2's TSC uses the same multiplier as L1. Signed-off-by: Peter Feiner Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ae111a07acc4..5cede40e2552 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2200,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) new.control) != old.control); } +static void decache_tsc_multiplier(struct vcpu_vmx *vmx) +{ + vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -2258,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Setup TSC multiplier */ if (kvm_has_tsc_control && - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { - vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; - vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); - } + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) + decache_tsc_multiplier(vmx); vmx_vcpu_pi_load(vcpu, cpu); vmx->host_pkru = read_pkru(); @@ -9999,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); else vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); if (enable_vpid) { /* @@ -10755,6 +10761,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, else vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { vmx->nested.change_vmcs01_virtual_x2apic_mode = false; -- cgit v1.2.3 From 21c80c9fefc3db10b530a96eb0478c29eb28bf77 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 23 Aug 2016 16:36:42 -0500 Subject: x86/PCI: VMD: Fix infinite loop executing irq's We can't initialize the list head on deletion as this causes the node to point to itself, which causes an infinite loop if vmd_irq() happens to be servicing that node. The list initialization was trying to fix a bug from multiple calls to disable the same IRQ. Fix this instead by having the VMD driver track if the interrupt is enabled. [bhelgaas: changelog, add "Fixes"] Fixes: 97e923063575 ("x86/PCI: VMD: Initialize list item in IRQ disable") Reported-by: Grzegorz Koczot Tested-by: Miroslaw Drost Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by Jon Derrick: --- arch/x86/pci/vmd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index b814ca675131..7948be342ee9 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -41,6 +41,7 @@ static DEFINE_RAW_SPINLOCK(list_lock); * @node: list item for parent traversal. * @rcu: RCU callback item for freeing. * @irq: back pointer to parent. + * @enabled: true if driver enabled IRQ * @virq: the virtual IRQ value provided to the requesting driver. * * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to @@ -50,6 +51,7 @@ struct vmd_irq { struct list_head node; struct rcu_head rcu; struct vmd_irq_list *irq; + bool enabled; unsigned int virq; }; @@ -122,7 +124,9 @@ static void vmd_irq_enable(struct irq_data *data) unsigned long flags; raw_spin_lock_irqsave(&list_lock, flags); + WARN_ON(vmdirq->enabled); list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); + vmdirq->enabled = true; raw_spin_unlock_irqrestore(&list_lock, flags); data->chip->irq_unmask(data); @@ -136,8 +140,10 @@ static void vmd_irq_disable(struct irq_data *data) data->chip->irq_mask(data); raw_spin_lock_irqsave(&list_lock, flags); - list_del_rcu(&vmdirq->node); - INIT_LIST_HEAD_RCU(&vmdirq->node); + if (vmdirq->enabled) { + list_del_rcu(&vmdirq->node); + vmdirq->enabled = false; + } raw_spin_unlock_irqrestore(&list_lock, flags); } -- cgit v1.2.3 From 2e63ad4bd5dd583871e6602f9d398b9322d358d9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 23 Aug 2016 20:07:19 +0800 Subject: x86/apic: Do not init irq remapping if ioapic is disabled native_smp_prepare_cpus -> default_setup_apic_routing -> enable_IR_x2apic -> irq_remapping_prepare -> intel_prepare_irq_remapping -> intel_setup_irq_remapping So IR table is setup even if "noapic" boot parameter is added. As a result we crash later when the interrupt affinity is set due to a half initialized remapping infrastructure. Prevent remap initialization when IOAPIC is disabled. Signed-off-by: Wanpeng Li Cc: Peter Zijlstra Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1471954039-3942-1-git-send-email-wanpeng.li@hotmail.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cea4fc19e844..50c95af0f017 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1623,6 +1623,9 @@ void __init enable_IR_x2apic(void) unsigned long flags; int ret, ir_stat; + if (skip_ioapic_setup) + return; + ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) return; -- cgit v1.2.3 From 55467dea2967259f21f4f854fc99d39cc5fea60e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 29 Jul 2016 11:06:48 +0200 Subject: xen: change the type of xen_vcpu_id to uint32_t We pass xen_vcpu_id mapping information to hypercalls which require uint32_t type so it would be cleaner to have it as uint32_t. The initializer to -1 can be dropped as we always do the mapping before using it and we never check the 'not set' value anyway. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel --- arch/x86/xen/enlighten.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8ffb089b19a5..b86ebb1a9a7f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -118,7 +118,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); enum xen_domain_type xen_domain_type = XEN_NATIVE; -- cgit v1.2.3 From a5ff1b34e16c203397542d98c49c5c7783193946 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Aug 2016 15:17:02 -0700 Subject: treewide: replace config_enabled() with IS_ENABLED() (2nd round) Commit 97f2645f358b ("tree-wide: replace config_enabled() with IS_ENABLED()") mostly killed config_enabled(), but some new users have appeared for v4.8-rc1. They are all used for a boolean option, so can be replaced with IS_ENABLED() safely. Link: http://lkml.kernel.org/r/1471970749-24867-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Kees Cook Acked-by: Peter Oberparleiter Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ralf Baechle Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ec8654f117d8..bda8d5eef04d 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -77,7 +77,7 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region) */ static inline bool kaslr_memory_enabled(void) { - return kaslr_enabled() && !config_enabled(CONFIG_KASAN); + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); } /* Initialize base and padding for each memory region randomized with KASLR */ -- cgit v1.2.3 From 0d025d271e55f3de21f0aaaf54b42d20404d2b23 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 30 Aug 2016 08:04:16 -0500 Subject: mm/usercopy: get rid of CONFIG_DEBUG_STRICT_USER_COPY_CHECKS There are three usercopy warnings which are currently being silenced for gcc 4.6 and newer: 1) "copy_from_user() buffer size is too small" compile warning/error This is a static warning which happens when object size and copy size are both const, and copy size > object size. I didn't see any false positives for this one. So the function warning attribute seems to be working fine here. Note this scenario is always a bug and so I think it should be changed to *always* be an error, regardless of CONFIG_DEBUG_STRICT_USER_COPY_CHECKS. 2) "copy_from_user() buffer size is not provably correct" compile warning This is another static warning which happens when I enable __compiletime_object_size() for new compilers (and CONFIG_DEBUG_STRICT_USER_COPY_CHECKS). It happens when object size is const, but copy size is *not*. In this case there's no way to compare the two at build time, so it gives the warning. (Note the warning is a byproduct of the fact that gcc has no way of knowing whether the overflow function will be called, so the call isn't dead code and the warning attribute is activated.) So this warning seems to only indicate "this is an unusual pattern, maybe you should check it out" rather than "this is a bug". I get 102(!) of these warnings with allyesconfig and the __compiletime_object_size() gcc check removed. I don't know if there are any real bugs hiding in there, but from looking at a small sample, I didn't see any. According to Kees, it does sometimes find real bugs. But the false positive rate seems high. 3) "Buffer overflow detected" runtime warning This is a runtime warning where object size is const, and copy size > object size. All three warnings (both static and runtime) were completely disabled for gcc 4.6 with the following commit: 2fb0815c9ee6 ("gcc4: disable __compiletime_object_size for GCC 4.6+") That commit mistakenly assumed that the false positives were caused by a gcc bug in __compiletime_object_size(). But in fact, __compiletime_object_size() seems to be working fine. The false positives were instead triggered by #2 above. (Though I don't have an explanation for why the warnings supposedly only started showing up in gcc 4.6.) So remove warning #2 to get rid of all the false positives, and re-enable warnings #1 and #3 by reverting the above commit. Furthermore, since #1 is a real bug which is detected at compile time, upgrade it to always be an error. Having done all that, CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is no longer needed. Signed-off-by: Josh Poimboeuf Cc: Kees Cook Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Steven Rostedt Cc: Brian Gerst Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Byungchul Park Cc: Nilay Vaish Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 - arch/x86/include/asm/uaccess.h | 69 ++++++------------------------------------ 2 files changed, 9 insertions(+), 61 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c580d8c33562..2a1f0ce7c59a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,6 @@ config X86 select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a0ae610b9280..c3f291195294 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -697,43 +697,14 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long __must_check _copy_to_user(void __user *to, const void *from, unsigned n); -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -# define copy_user_diag __compiletime_error -#else -# define copy_user_diag __compiletime_warning -#endif - -extern void copy_user_diag("copy_from_user() buffer size is too small") -copy_from_user_overflow(void); -extern void copy_user_diag("copy_to_user() buffer size is too small") -copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); - -#undef copy_user_diag - -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS - -extern void -__compiletime_warning("copy_from_user() buffer size is not provably correct") -__copy_from_user_overflow(void) __asm__("copy_from_user_overflow"); -#define __copy_from_user_overflow(size, count) __copy_from_user_overflow() - -extern void -__compiletime_warning("copy_to_user() buffer size is not provably correct") -__copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); -#define __copy_to_user_overflow(size, count) __copy_to_user_overflow() - -#else +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); -static inline void -__copy_from_user_overflow(int size, unsigned long count) +static inline void copy_user_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } -#define __copy_to_user_overflow __copy_from_user_overflow - -#endif - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { @@ -743,31 +714,13 @@ copy_from_user(void *to, const void __user *from, unsigned long n) kasan_check_write(to, n); - /* - * While we would like to have the compiler do the checking for us - * even in the non-constant size case, any false positives there are - * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even - * without - the [hopefully] dangerous looking nature of the warning - * would make people go look at the respecitive call sites over and - * over again just to find that there's no problem). - * - * And there are cases where it's just not realistic for the compiler - * to prove the count to be in range. For example when multiple call - * sites of a helper function - perhaps in different source files - - * all doing proper range checking, yet the helper function not doing - * so again. - * - * Therefore limit the compile time checking to the constant size - * case, and do only runtime checking for non-constant sizes. - */ - if (likely(sz < 0 || sz >= n)) { check_object_size(to, n, false); n = _copy_from_user(to, from, n); - } else if (__builtin_constant_p(n)) - copy_from_user_overflow(); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - __copy_from_user_overflow(sz, n); + __bad_copy_user(); return n; } @@ -781,21 +734,17 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); - /* See the comment in copy_from_user() above. */ if (likely(sz < 0 || sz >= n)) { check_object_size(from, n, true); n = _copy_to_user(to, from, n); - } else if (__builtin_constant_p(n)) - copy_to_user_overflow(); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - __copy_to_user_overflow(sz, n); + __bad_copy_user(); return n; } -#undef __copy_from_user_overflow -#undef __copy_to_user_overflow - /* * We rely on the nested NMI work to allow atomic faults from the NMI path; the * nested NMI paths are careful to preserve CR2. -- cgit v1.2.3 From 236dec051078a8691950f56949612b4b74107e48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 1 Sep 2016 16:14:47 -0700 Subject: kconfig: tinyconfig: provide whole choice blocks to avoid warnings Using "make tinyconfig" produces a couple of annoying warnings that show up for build test machines all the time: .config:966:warning: override: NOHIGHMEM changes choice state .config:965:warning: override: SLOB changes choice state .config:963:warning: override: KERNEL_XZ changes choice state .config:962:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state .config:933:warning: override: SLOB changes choice state .config:930:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state .config:870:warning: override: SLOB changes choice state .config:868:warning: override: KERNEL_XZ changes choice state .config:867:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state I've made a previous attempt at fixing them and we discussed a number of alternatives. I tried changing the Makefile to use "merge_config.sh -n $(fragment-list)" but couldn't get that to work properly. This is yet another approach, based on the observation that we do want to see a warning for conflicting 'choice' options, and that we can simply make them non-conflicting by listing all other options as disabled. This is a trivial patch that we can apply independent of plans for other changes. Link: http://lkml.kernel.org/r/20160829214952.1334674-2-arnd@arndb.de Link: https://storage.kernelci.org/mainline/v4.7-rc6/x86-tinyconfig/build.log https://patchwork.kernel.org/patch/9212749/ Signed-off-by: Arnd Bergmann Reviewed-by: Josh Triplett Reviewed-by: Masahiro Yamada Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/configs/tiny.config | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config index 4e2ecfa23c15..4b429df40d7a 100644 --- a/arch/x86/configs/tiny.config +++ b/arch/x86/configs/tiny.config @@ -1 +1,3 @@ CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set -- cgit v1.2.3 From 15301a570754c7af60335d094dd2d1808b0641a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 May 2016 13:47:26 -0400 Subject: x86/paravirt: Do not trace _paravirt_ident_*() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Łukasz Daniluk reported that on a RHEL kernel that his machine would lock up after enabling function tracer. I asked him to bisect the functions within available_filter_functions, which he did and it came down to three: _paravirt_nop(), _paravirt_ident_32() and _paravirt_ident_64() It was found that this is only an issue when noreplace-paravirt is added to the kernel command line. This means that those functions are most likely called within critical sections of the funtion tracer, and must not be traced. In newer kenels _paravirt_nop() is defined within gcc asm(), and is no longer an issue. But both _paravirt_ident_{32,64}() causes the following splat when they are traced: mm/pgtable-generic.c:33: bad pmd ffff8800d2435150(0000000001d00054) mm/pgtable-generic.c:33: bad pmd ffff8800d3624190(0000000001d00070) mm/pgtable-generic.c:33: bad pmd ffff8800d36a5110(0000000001d00054) mm/pgtable-generic.c:33: bad pmd ffff880118eb1450(0000000001d00054) NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [systemd-journal:469] Modules linked in: e1000e CPU: 2 PID: 469 Comm: systemd-journal Not tainted 4.6.0-rc4-test+ #513 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 task: ffff880118f740c0 ti: ffff8800d4aec000 task.ti: ffff8800d4aec000 RIP: 0010:[] [] queued_spin_lock_slowpath+0x118/0x1a0 RSP: 0018:ffff8800d4aefb90 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011eb16d40 RDX: ffffffff82485760 RSI: 000000001f288820 RDI: ffffea0000008030 RBP: ffff8800d4aefb90 R08: 00000000000c0000 R09: 0000000000000000 R10: ffffffff821c8e0e R11: 0000000000000000 R12: ffff880000200fb8 R13: 00007f7a4e3f7000 R14: ffffea000303f600 R15: ffff8800d4b562e0 FS: 00007f7a4e3d7840(0000) GS:ffff88011eb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7a4e3f7000 CR3: 00000000d3e71000 CR4: 00000000001406e0 Call Trace: _raw_spin_lock+0x27/0x30 handle_pte_fault+0x13db/0x16b0 handle_mm_fault+0x312/0x670 __do_page_fault+0x1b1/0x4e0 do_page_fault+0x22/0x30 page_fault+0x28/0x30 __vfs_read+0x28/0xe0 vfs_read+0x86/0x130 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1e/0xa8 Code: 12 48 c1 ea 0c 83 e8 01 83 e2 30 48 98 48 81 c2 40 6d 01 00 48 03 14 c5 80 6a 5d 82 48 89 0a 8b 41 08 85 c0 75 09 f3 90 8b 41 08 <85> c0 74 f7 4c 8b 09 4d 85 c9 74 08 41 0f 18 09 eb 02 f3 90 8b Reported-by: Łukasz Daniluk Signed-off-by: Steven Rostedt Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ad5bc9578a73..1acfd76e3e26 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -56,12 +56,12 @@ asm (".pushsection .entry.text, \"ax\"\n" ".popsection"); /* identity function, which can be inlined */ -u32 _paravirt_ident_32(u32 x) +u32 notrace _paravirt_ident_32(u32 x) { return x; } -u64 _paravirt_ident_64(u64 x) +u64 notrace _paravirt_ident_64(u64 x) { return x; } -- cgit v1.2.3 From d1992996753132e2dafe955cccb2fb0714d3cfc4 Mon Sep 17 00:00:00 2001 From: Emanuel Czirai Date: Fri, 2 Sep 2016 07:35:50 +0200 Subject: x86/AMD: Apply erratum 665 on machines without a BIOS fix AMD F12h machines have an erratum which can cause DIV/IDIV to behave unpredictably. The workaround is to set MSRC001_1029[31] but sometimes there is no BIOS update containing that workaround so let's do it ourselves unconditionally. It is simple enough. [ Borislav: Wrote commit message. ] Signed-off-by: Emanuel Czirai Signed-off-by: Borislav Petkov Cc: Yaowu Xu Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160902053550.18097-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/amd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f5c69d8974e1..b81fe2d63e15 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -669,6 +669,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } +#define MSR_AMD64_DE_CFG 0xC0011029 + +static void init_amd_ln(struct cpuinfo_x86 *c) +{ + /* + * Apply erratum 665 fix unconditionally so machines without a BIOS + * fix work. + */ + msr_set_bit(MSR_AMD64_DE_CFG, 31); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -726,6 +737,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 6: init_amd_k7(c); break; case 0xf: init_amd_k8(c); break; case 0x10: init_amd_gh(c); break; + case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; } -- cgit v1.2.3 From dadb57abc37499f565b23933dbf49b435c3ba8af Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:51 -0600 Subject: efi/libstub: Allocate headspace in efi_get_memory_map() efi_get_memory_map() allocates a buffer to store the memory map that it retrieves. This buffer may need to be reused by the client after ExitBootServices() is called, at which point allocations are not longer permitted. To support this usecase, provide the allocated buffer size back to the client, and allocate some additional headroom to account for any reasonable growth in the map that is likely to happen between the call to efi_get_memory_map() and the client reusing the buffer. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff574dad95cc..c5b7c7b4f0d7 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1008,7 +1008,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { struct efi_info *efi = &boot_params->efi_info; - unsigned long map_sz, key, desc_size; + unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; const char *signature; @@ -1019,14 +1019,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, bool called_exit = false; u8 nr_entries; int i; - - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; + struct efi_boot_memmap map; + + nr_desc = 0; + e820ext = NULL; + e820ext_size = 0; + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; get_map: - status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size, - &desc_version, &key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) return status; -- cgit v1.2.3 From d64934019f6cc39202e2f78063709f61ca5cb364 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:54 -0600 Subject: x86/efi: Use efi_exit_boot_services() The eboot code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The eboot code attempts allocations after calling ExitBootSerives which is not permitted per the spec. Call the efi_exit_boot_services() helper intead, which handles the allocation scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 136 +++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 69 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c5b7c7b4f0d7..94dd4a31f5b3 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1004,85 +1004,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } +struct exit_boot_struct { + struct boot_params *boot_params; + struct efi_info *efi; + struct setup_data *e820ext; + __u32 e820ext_size; + bool is64; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + static bool first = true; + const char *signature; + __u32 nr_desc; + efi_status_t status; + struct exit_boot_struct *p = priv; + + if (first) { + nr_desc = *map->buff_size / *map->desc_size; + if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) { + u32 nr_e820ext = nr_desc - + ARRAY_SIZE(p->boot_params->e820_map); + + status = alloc_e820ext(nr_e820ext, &p->e820ext, + &p->e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + first = false; + } + + signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); + + p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_memdesc_size = *map->desc_size; + p->efi->efi_memdesc_version = *map->desc_ver; + p->efi->efi_memmap = (unsigned long)*map->map; + p->efi->efi_memmap_size = *map->map_size; + +#ifdef CONFIG_X86_64 + p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; +#endif + + return EFI_SUCCESS; +} + static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { - struct efi_info *efi = &boot_params->efi_info; unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; - const char *signature; __u32 e820ext_size; - __u32 nr_desc, prev_nr_desc; efi_status_t status; __u32 desc_version; - bool called_exit = false; - u8 nr_entries; - int i; struct efi_boot_memmap map; + struct exit_boot_struct priv; + + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; + priv.boot_params = boot_params; + priv.efi = &boot_params->efi_info; + priv.e820ext = NULL; + priv.e820ext_size = 0; + priv.is64 = is64; - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; - map.map = &mem_map; - map.map_size = &map_sz; - map.desc_size = &desc_size; - map.desc_ver = &desc_version; - map.key_ptr = &key; - map.buff_size = &buff_size; - -get_map: - status = efi_get_memory_map(sys_table, &map); - + /* Might as well exit boot services now */ + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status != EFI_SUCCESS) return status; - prev_nr_desc = nr_desc; - nr_desc = map_sz / desc_size; - if (nr_desc > prev_nr_desc && - nr_desc > ARRAY_SIZE(boot_params->e820_map)) { - u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map); - - status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size); - if (status != EFI_SUCCESS) - goto free_mem_map; - - efi_call_early(free_pool, mem_map); - goto get_map; /* Allocated memory, get map again */ - } - - signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; - memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); - - efi->efi_systab = (unsigned long)sys_table; - efi->efi_memdesc_size = desc_size; - efi->efi_memdesc_version = desc_version; - efi->efi_memmap = (unsigned long)mem_map; - efi->efi_memmap_size = map_sz; - -#ifdef CONFIG_X86_64 - efi->efi_systab_hi = (unsigned long)sys_table >> 32; - efi->efi_memmap_hi = (unsigned long)mem_map >> 32; -#endif - - /* Might as well exit boot services now */ - status = efi_call_early(exit_boot_services, handle, key); - if (status != EFI_SUCCESS) { - /* - * ExitBootServices() will fail if any of the event - * handlers change the memory map. In which case, we - * must be prepared to retry, but only once so that - * we're guaranteed to exit on repeated failures instead - * of spinning forever. - */ - if (called_exit) - goto free_mem_map; - - called_exit = true; - efi_call_early(free_pool, mem_map); - goto get_map; - } - + e820ext = priv.e820ext; + e820ext_size = priv.e820ext_size; /* Historic? */ boot_params->alt_mem_k = 32 * 1024; @@ -1091,10 +1093,6 @@ get_map: return status; return EFI_SUCCESS; - -free_mem_map: - efi_call_early(free_pool, mem_map); - return status; } /* -- cgit v1.2.3