From a5f1005517534aeb1fac20180badfbf0896c183c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 1 Dec 2017 18:47:32 +0100 Subject: s390/pci: handle insufficient resources during dma tlb flush In a virtualized setup lazy flushing can lead to the hypervisor running out of resources when lots of guest pages need to be pinned. In this situation simply trigger a global flush to give the hypervisor a chance to free some of these resources. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Reviewed-by: Pierre Morel Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 21 +++++++++++++++++++-- arch/s390/pci/pci_insn.c | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f7aa5a77827e..2d15d84c20ed 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -181,6 +181,9 @@ out_unlock: static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, size_t size, int flags) { + unsigned long irqflags; + int ret; + /* * With zdev->tlb_refresh == 0, rpcit is not required to establish new * translations when previously invalid translation-table entries are @@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, return 0; } - return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); + ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, + PAGE_ALIGN(size)); + if (ret == -ENOMEM && !s390_iommu_strict) { + /* enable the hypervisor to free some resources */ + if (zpci_refresh_global(zdev)) + goto out; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); + bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, + zdev->lazy_bitmap, zdev->iommu_pages); + bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); + ret = 0; + } +out: + return ret; } static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 19bcb3b45a70..f069929e8211 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) if (cc) zpci_err_insn(cc, status, addr, range); + if (cc == 1 && (status == 4 || status == 16)) + return -ENOMEM; + return (cc) ? -EIO : 0; } -- cgit v1.2.3 From bdcf0a423ea1c40bbb40e7ee483b50fc8aa3d758 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Thu, 14 Dec 2017 15:33:12 -0800 Subject: kernel: make groups_sort calling a responsibility group_info allocators In testing, we found that nfsd threads may call set_groups in parallel for the same entry cached in auth.unix.gid, racing in the call of groups_sort, corrupting the groups for that entry and leading to permission denials for the client. This patch: - Make groups_sort globally visible. - Move the call to groups_sort to the modifiers of group_info - Remove the call to groups_sort from set_groups Link: http://lkml.kernel.org/r/20171211151420.18655-1-thiago.becker@gmail.com Signed-off-by: Thiago Rafael Becker Reviewed-by: Matthew Wilcox Reviewed-by: NeilBrown Acked-by: "J. Bruce Fields" Cc: Al Viro Cc: Martin Schwidefsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_linux.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index f04db3779b34..59eea9c65d3e 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); -- cgit v1.2.3 From 9f37e797547cca9d14fe1f0f43f5c89b261ff0b0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 15 Dec 2017 14:16:04 +0100 Subject: s390: fix preemption race in disable_sacf_uaccess With CONFIG_PREEMPT=y there is a possible race in disable_sacf_uaccess. The new set_fs value needs to be stored the the task structure first, the control register update needs to be second. Otherwise a preemptive schedule may interrupt the code right after the control register update has been done and the next time the task is scheduled we get an incorrect value in the control register due to the old set_fs setting. Fixes: 0aaba41b58 ("s390: remove all code using the access register mode") Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index cae5a1e16cbd..c4f8039a35e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess); void disable_sacf_uaccess(mm_segment_t old_fs) { + current->thread.mm_segment = old_fs; if (old_fs == USER_DS && test_facility(27)) { __ctl_load(S390_lowcore.user_asce, 1, 1); clear_cpu_flag(CIF_ASCE_PRIMARY); } - current->thread.mm_segment = old_fs; } EXPORT_SYMBOL(disable_sacf_uaccess); -- cgit v1.2.3 From 6d59b7dbf72ed20d0138e2f9b75ca3d4a9d4faca Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:23 +0100 Subject: bpf, s390x: do not reload skb pointers in non-skb context The assumption of unconditionally reloading skb pointers on BPF helper calls where bpf_helper_changes_pkt_data() holds true is wrong. There can be different contexts where the BPF helper would enforce a reload such as in case of XDP. Here, we do have a struct xdp_buff instead of struct sk_buff as context, thus this will access garbage. JITs only ever need to deal with cached skb pointer reload when ld_abs/ind was seen, therefore guard the reload behind SEEN_SKB only. Tested on s390x. Fixes: 9db7f2b81880 ("s390/bpf: recache skb->data/hlen for skb_vlan_push/pop") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Michael Holzheu Signed-off-by: Alexei Starovoitov --- arch/s390/net/bpf_jit_comp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e81c16838b90..9557d8b516df 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -55,8 +55,7 @@ struct bpf_jit { #define SEEN_LITERAL 8 /* code uses literals */ #define SEEN_FUNC 16 /* calls C functions */ #define SEEN_TAIL_CALL 32 /* code uses tail calls */ -#define SEEN_SKB_CHANGE 64 /* code changes skb data */ -#define SEEN_REG_AX 128 /* code uses constant blinding */ +#define SEEN_REG_AX 64 /* code uses constant blinding */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); } - if (jit->seen & SEEN_SKB) + if (jit->seen & SEEN_SKB) { emit_load_skb_data_hlen(jit); - if (jit->seen & SEEN_SKB_CHANGE) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); + } } /* @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT2(0x0d00, REG_14, REG_W1); /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); - if (bpf_helper_changes_pkt_data((void *)func)) { - jit->seen |= SEEN_SKB_CHANGE; + if ((jit->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data((void *)func)) { /* lg %b1,ST_OFF_SKBP(%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); -- cgit v1.2.3 From f6f3732162b5ae3c771b9285a5a32d72b8586920 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 15 Dec 2017 18:53:22 -0800 Subject: Revert "mm: replace p??_write with pte_access_permitted in fault + gup paths" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commits 5c9d2d5c269c, c7da82b894e9, and e7fe7b5cae90. We'll probably need to revisit this, but basically we should not complicate the get_user_pages_fast() case, and checking the actual page table protection key bits will require more care anyway, since the protection keys depend on the exact state of the VM in question. Particularly when doing a "remote" page lookup (ie in somebody elses VM, not your own), you need to be much more careful than this was. Dave Hansen says: "So, the underlying bug here is that we now a get_user_pages_remote() and then go ahead and do the p*_access_permitted() checks against the current PKRU. This was introduced recently with the addition of the new p??_access_permitted() calls. We have checks in the VMA path for the "remote" gups and we avoid consulting PKRU for them. This got missed in the pkeys selftests because I did a ptrace read, but not a *write*. I also didn't explicitly test it against something where a COW needed to be done" It's also not entirely clear that it makes sense to check the protection key bits at this level at all. But one possible eventual solution is to make the get_user_pages_fast() case just abort if it sees protection key bits set, which makes us fall back to the regular get_user_pages() case, which then has a vma and can do the check there if we want to. We'll see. Somewhat related to this all: what we _do_ want to do some day is to check the PAGE_USER bit - it should obviously always be set for user pages, but it would be a good check to have back. Because we have no generic way to test for it, we lost it as part of moving over from the architecture-specific x86 GUP implementation to the generic one in commit e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation"). Cc: Peter Zijlstra Cc: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Cc: Andrew Morton Cc: Al Viro Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57d7bc92e0b8..0a6b0286c32e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } -#define pud_write pud_write -static inline int pud_write(pud_t pud) -{ - return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; -} - static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { -- cgit v1.2.3 From 32aa144fc32abfcbf7140f473dfbd94c5b9b4105 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 15 Dec 2017 13:14:31 +0100 Subject: KVM: s390: fix cmma migration for multiple memory slots When multiple memory slots are present the cmma migration code does not allocate enough memory for the bitmap. The memory slots are sorted in reverse order, so we must use gfn and size of slot[0] instead of the last one. Signed-off-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Cc: stable@vger.kernel.org # 4.13+ Fixes: 190df4a212a7 (KVM: s390: CMMA tracking, ESSA emulation, migration mode) Reviewed-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index efa439f6ffb3..abcd24fdde3f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { /* - * Get the last slot. They should be sorted by base_gfn, so the - * last slot is also the one at the end of the address space. - * We have verified above that at least one slot is present. + * Get the first slot. They are reverse sorted by base_gfn, so + * the first slot is also the one at the end of the address + * space. We have verified above that at least one slot is + * present. */ - ms = slots->memslots + slots->used_slots - 1; + ms = slots->memslots; /* round up so we only use full longs */ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); /* allocate enough bytes to store all the bits */ -- cgit v1.2.3 From c2cf265d860882b51a200e4a7553c17827f2b730 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 21 Dec 2017 09:18:22 +0100 Subject: KVM: s390: prevent buffer overrun on memory hotplug during migration We must not go beyond the pre-allocated buffer. This can happen when a new memory slot is added during migration. Reported-by: David Hildenbrand Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # 4.13+ Fixes: 190df4a212a7 (KVM: s390: CMMA tracking, ESSA emulation, migration mode) Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 572496c688cc..0714bfa56da0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) cbrlo[entries] = gfn << PAGE_SHIFT; } - if (orc) { + if (orc && gfn < ms->bitmap_size) { /* increment only if we are really flipping the bit to 1 */ if (!test_and_set_bit(gfn, ms->pgste_bitmap)) atomic64_inc(&ms->dirty_pages); -- cgit v1.2.3 From 0500871f21b237b2bea2d9db405eadf78e5aab05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Construct init thread stack in the linker script rather than by union Construct the init thread stack in the linker script rather than doing it by means of a union so that ia64's init_task.c can be got rid of. The following symbols are then made available from INIT_TASK_DATA() linker script macro: init_thread_union init_stack INIT_TASK_DATA() also expands the region to THREAD_SIZE to accommodate the size of the init stack. init_thread_union is given its own section so that it can be placed into the stack space in the right order. I'm assuming that the ia64 ordering is correct and that the task_struct is first and the thread_info second. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- arch/s390/include/asm/thread_info.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0880a37b6d3b..25d6ec3aaddd 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -42,8 +42,6 @@ struct thread_info { .flags = 0, \ } -#define init_stack (init_thread_union.stack) - void arch_release_task_struct(struct task_struct *tsk); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -- cgit v1.2.3 From 35b3fde6203b932b2b1a5b53b3d8808abc9c4f60 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 17 Jan 2018 14:44:34 +0100 Subject: KVM: s390: wire up bpb feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new firmware interfaces for branch prediction behaviour changes are transparently available for the guest. Nevertheless, there is new state attached that should be migrated and properly resetted. Provide a mechanism for handling reset, migration and VSIE. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck [Changed capability number to 152. - Radim] Signed-off-by: Radim Krčmář --- arch/s390/include/asm/kvm_host.h | 3 ++- arch/s390/include/uapi/asm/kvm.h | 5 ++++- arch/s390/kvm/kvm-s390.c | 12 ++++++++++++ arch/s390/kvm/vsie.c | 10 ++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e14f381757f6..c1b0a9ac1dc8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -207,7 +207,8 @@ struct kvm_s390_sie_block { __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ - __u8 reserved60; /* 0x0060 */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ #define ECB_GS 0x40 #define ECB_TE 0x10 #define ECB_SRSI 0x04 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 38535a57fef8..4cdaa55fabfe 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -224,6 +224,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) +#define KVM_SYNC_BPBC (1UL << 10) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -247,7 +248,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding1[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ union { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2c93cbbcd15e..2598cf243b86 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_GS: r = test_facility(133); break; + case KVM_CAP_S390_BPB: + r = test_facility(82); + break; default: r = 0; } @@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133)) vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With @@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) @@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; vcpu->arch.gs_enabled = 1; } + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && + test_kvm_facility(vcpu->kvm, 82)) { + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5d6ae0326d9e..751348348477 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy(scb_o->gcr, scb_s->gcr, 128); scb_o->pp = scb_s->pp; + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) { + scb_o->fpf &= ~FPF_BPBC; + scb_o->fpf |= scb_s->fpf & FPF_BPBC; + } + /* interrupt intercept */ switch (scb_s->icptcode) { case ICPT_PROGI: @@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecb3 = 0; scb_s->ecd = 0; scb_s->fac = 0; + scb_s->fpf = 0; rc = prepare_cpuflags(vcpu, vsie_page); if (rc) @@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= scb_o->ecb & ECB_TE; } + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) + scb_s->fpf |= scb_o->fpf & FPF_BPBC; /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { scb_s->eca |= scb_o->eca & ECA_VX; -- cgit v1.2.3 From 1de1ea7efeb9e8543212210e34518b4049ccd285 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 22 Dec 2017 10:54:20 +0100 Subject: KVM: s390: add proper locking for CMMA migration bitmap Some parts of the cmma migration bitmap is already protected with the kvm->lock (e.g. the migration start). On the other hand the read of the cmma bits is not protected against a concurrent free, neither is the emulation of the ESSA instruction. Let's extend the locking to all related ioctls by using the slots lock for - kvm_s390_vm_start_migration - kvm_s390_vm_stop_migration - kvm_s390_set_cmma_bits - kvm_s390_get_cmma_bits In addition to that, we use synchronize_srcu before freeing the migration structure as all users hold kvm->srcu for read. (e.g. the ESSA handler). Reported-by: David Hildenbrand Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # 4.13+ Fixes: 190df4a212a7 (KVM: s390: CMMA tracking, ESSA emulation, migration mode) Reviewed-by: Claudio Imbrenda Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index abcd24fdde3f..52880e980a33 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -766,7 +766,7 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) /* * Must be called with kvm->srcu held to avoid races on memslots, and with - * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. + * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. */ static int kvm_s390_vm_start_migration(struct kvm *kvm) { @@ -822,7 +822,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) } /* - * Must be called with kvm->lock to avoid races with ourselves and + * Must be called with kvm->slots_lock to avoid races with ourselves and * kvm_s390_vm_start_migration. */ static int kvm_s390_vm_stop_migration(struct kvm *kvm) @@ -837,6 +837,8 @@ static int kvm_s390_vm_stop_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); + /* We have to wait for the essa emulation to finish */ + synchronize_srcu(&kvm->srcu); vfree(mgs->pgste_bitmap); } kfree(mgs); @@ -846,14 +848,12 @@ static int kvm_s390_vm_stop_migration(struct kvm *kvm) static int kvm_s390_vm_set_migration(struct kvm *kvm, struct kvm_device_attr *attr) { - int idx, res = -ENXIO; + int res = -ENXIO; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->slots_lock); switch (attr->attr) { case KVM_S390_VM_MIGRATION_START: - idx = srcu_read_lock(&kvm->srcu); res = kvm_s390_vm_start_migration(kvm); - srcu_read_unlock(&kvm->srcu, idx); break; case KVM_S390_VM_MIGRATION_STOP: res = kvm_s390_vm_stop_migration(kvm); @@ -861,7 +861,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm, default: break; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->slots_lock); return res; } @@ -1751,7 +1751,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&args, argp, sizeof(args))) break; + mutex_lock(&kvm->slots_lock); r = kvm_s390_get_cmma_bits(kvm, &args); + mutex_unlock(&kvm->slots_lock); if (!r) { r = copy_to_user(argp, &args, sizeof(args)); if (r) @@ -1765,7 +1767,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&args, argp, sizeof(args))) break; + mutex_lock(&kvm->slots_lock); r = kvm_s390_set_cmma_bits(kvm, &args); + mutex_unlock(&kvm->slots_lock); break; } default: -- cgit v1.2.3