From 916cda1aa1b412d7cf2991c3af7479544942d121 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 26 Jan 2016 14:10:34 +0100 Subject: s390: add a system call for guarded storage This adds a new system call to enable the use of guarded storage for user space processes. The system call takes two arguments, a command and pointer to a guarded storage control block: s390_guarded_storage(int command, struct gs_cb *gs_cb); The second argument is relevant only for the GS_SET_BC_CB command. The commands in detail: 0 - GS_ENABLE Enable the guarded storage facility for the current task. The initial content of the guarded storage control block will be all zeros. After the enablement the user space code can use load-guarded-storage-controls instruction (LGSC) to load an arbitrary control block. While a task is enabled the kernel will save and restore the current content of the guarded storage registers on context switch. 1 - GS_DISABLE Disables the use of the guarded storage facility for the current task. The kernel will cease to save and restore the content of the guarded storage registers, the task specific content of these registers is lost. 2 - GS_SET_BC_CB Set a broadcast guarded storage control block. This is called per thread and stores a specific guarded storage control block in the task struct of the current task. This control block will be used for the broadcast event GS_BROADCAST. 3 - GS_CLEAR_BC_CB Clears the broadcast guarded storage control block. The guarded- storage control block is removed from the task struct that was established by GS_SET_BC_CB. 4 - GS_BROADCAST Sends a broadcast to all thread siblings of the current task. Every sibling that has established a broadcast guarded storage control block will load this control block and will be enabled for guarded storage. The broadcast guarded storage control block is used up, a second broadcast without a refresh of the stored control block with GS_SET_BC_CB will not have any effect. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/early.c | 2 + arch/s390/kernel/entry.S | 26 +++++++- arch/s390/kernel/entry.h | 2 + arch/s390/kernel/guarded_storage.c | 128 +++++++++++++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 13 +++- arch/s390/kernel/nmi.c | 19 +++++- arch/s390/kernel/process.c | 7 +- arch/s390/kernel/processor.c | 2 +- arch/s390/kernel/ptrace.c | 86 +++++++++++++++++++++---- arch/s390/kernel/setup.c | 18 +++++- arch/s390/kernel/smp.c | 43 +++++++++++-- arch/s390/kernel/syscalls.S | 2 +- 15 files changed, 322 insertions(+), 31 deletions(-) create mode 100644 arch/s390/kernel/guarded_storage.c (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..aa5adbdaf200 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o obj-y += entry.o reipl.o relocate_kernel.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -175,7 +175,7 @@ int main(void) /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index e89cc2e71db1..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..95298a41076f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } + if (test_facility(133)) + S390_lowcore.machine_flags |= MACHINE_FLAG_GS; } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..fa8b8f28e08b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE) + _TIF_UPROBE | _TIF_GUARDED_STORAGE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ @@ -332,6 +332,8 @@ ENTRY(system_call) TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lsysc_guarded_storage TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep TSTMSK __TI_flags(%r12),_TIF_SIGPENDING @@ -408,6 +410,14 @@ ENTRY(system_call) jg uprobe_notify_resume #endif +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lsysc_guarded_storage: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,.Lsysc_return + jg gs_load_bc_cb + # # _PIF_PER_TRAP is set, call do_per_trap # @@ -663,6 +673,8 @@ ENTRY(io_int_handler) jo .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -696,6 +708,18 @@ ENTRY(io_int_handler) larl %r14,.Lio_return jg load_fpu_regs +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lio_guarded_storage: + # TRACE_IRQS_ON already done at .Lio_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,gs_load_bc_cb + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j .Lio_return + # # _TIF_NEED_RESCHED is set, call schedule # diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,12 +74,14 @@ long sys_sigreturn(void); long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); void verify_facilities(void); +void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,128 @@ +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include "entry.h" + +void exit_thread_gs(void) +{ + kfree(current->thread.gs_cb); + kfree(current->thread.gs_bc_cb); + current->thread.gs_cb = current->thread.gs_bc_cb = NULL; +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + __ctl_clear_bit(2, 4); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!MACHINE_HAS_GS) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..db5658daf994 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -27,6 +27,7 @@ #include #include #include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image) */ static noinline void __machine_kdump(void *image) { + struct mcesa *mcesa; + unsigned long cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_VX) - save_vx_regs((void *) &S390_lowcore.vector_save_area); + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (MACHINE_HAS_GS) { + __ctl_store(cr2_old, 2, 2); + cr2_new = cr2_old | (1UL << 4); + __ctl_load(cr2_new, 2, 2); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + __ctl_load(cr2_old, 2, 2); + } /* * To create a good backchain for this CPU in the dump store_status * is passed the address of a function. The address is saved into diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) int kill_task; u64 zero; void *fpt_save_area; + struct mcesa *mcesa; kill_task = 0; zero = 0; @@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "Q" (S390_lowcore.fpt_creg_save_area)); } + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (!MACHINE_HAS_VX) { /* Validate floating point registers */ asm volatile( @@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) " la 1,%0\n" " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) - &S390_lowcore.vector_save_area) : "1"); + : : "Q" (*(struct vx_array *) mcesa->vector_save_area) + : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Validate access registers */ @@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } + /* Validate guarded storage registers */ + if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { + if (!mci.gs) + /* + * Guarded storage register can't be restored and + * the current processes uses guarded storage. + * It has to be terminated. + */ + kill_task = 1; + else + load_gs_cb((struct gs_cb *) + mcesa->guarded_storage_save_area); + } /* * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f29e41c5e2ec..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) + if (tsk == current) { exit_thread_runtime_instr(); + exit_thread_gs(); + } } void flush_thread(void) @@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Don't copy guarded storage control block */ + p->thread.gs_cb = NULL; + p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..c73709869447 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" + "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" }; static const char * const int_hwcap_str[] = { "sie" diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..c933e255b5d5 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; - + unsigned long cr0_old, cr0_new; + unsigned long cr2_old, cr2_new; + int cr0_changed, cr2_changed; + + __ctl_store(cr0_old, 0, 0); + __ctl_store(cr2_old, 2, 2); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr, cr_new; - - __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); + cr0_new |= (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - if (cr_new != cr) - __ctl_load(cr_new, 0, 0); + cr0_new &= ~(1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; + cr2_new &= ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; + cr2_new |= 1UL; else - cr_new |= 2UL; + cr2_new |= 2UL; } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } + /* Take care of enable/disable of guarded storage. */ + if (MACHINE_HAS_GS) { + cr2_new &= ~(1UL << 4); + if (task->thread.gs_cb) + cr2_new |= (1UL << 4); + } + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new != cr0_old; + cr2_changed = cr2_new != cr2_old; + if (cr0_changed) + __ctl_load(cr0_new, 0, 0); + if (cr2_changed) + __ctl_load(cr2_new, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target, data, 0, sizeof(unsigned int)); } +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -339,9 +339,15 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + unsigned long bits, size; + + bits = MACHINE_HAS_GS ? 11 : 10; + size = 1UL << bits; + lc->mcesad = (__u64) memblock_virt_alloc(size, size); + if (MACHINE_HAS_GS) + lc->mcesad |= bits; + } lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; @@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_VXRS_BCD; } + /* + * Guarded storage support HWCAP_S390_GS is bit 12. + */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_S390_GS; + get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..286bcee800f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -78,6 +79,8 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +static struct kmem_cache *pcpu_mcesa_cache; + unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; + unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; + mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = (unsigned long) + kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); + if (!mcesa_origin) + goto out; + mcesa_bits = MACHINE_HAS_GS ? 11 : 0; + } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; lowcore_ptr[cpu] = lc; @@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { + if (mcesa_origin) + kmem_cache_free(pcpu_mcesa_cache, + (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -229,11 +244,17 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long mcesa_origin; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); + } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -550,9 +571,11 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!MACHINE_HAS_VX) + if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->vector_save_area_addr); + pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + if (MACHINE_HAS_GS) + pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + unsigned long size; + /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + /* create slab cache for the machine-check-extended-save-areas */ + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + size = 1UL << (MACHINE_HAS_GS ? 11 : 10); + pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", + size, size, 0, NULL); + if (!pcpu_mcesa_cache) + panic("Couldn't create nmi save area cache"); + } } void __init smp_prepare_boot_cpu(void) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2659b5cfeddb..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) -NI_SYSCALL +SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_statx,compat_sys_statx) -- cgit v1.2.3 From 157467ba9fb7e379f0540707dd89111de441e45e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 25 Feb 2017 12:16:48 +0100 Subject: s390/cpuinfo: show facilities as reported by stfle Add a new line to /proc/cpuinfo which shows all available facilities as reported by the stfle instruction: > cat /proc/cpuinfo ... facilities : 0 1 2 3 4 6 7 ... ... Reviewed-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index c73709869447..778cd6536175 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include +#include #include #include #include @@ -91,6 +92,18 @@ int cpu_have_feature(unsigned int num) } EXPORT_SYMBOL(cpu_have_feature); +static void show_facilities(struct seq_file *m) +{ + unsigned int bit; + long *facilities; + + facilities = (long *)&S390_lowcore.stfle_fac_list; + seq_puts(m, "facilities :"); + for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT) + seq_printf(m, " %d", bit); + seq_putc(m, '\n'); +} + static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { @@ -116,6 +129,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); + show_facilities(m); show_cacheinfo(m); for_each_online_cpu(cpu) { struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); -- cgit v1.2.3 From 251ea0ca408b827e888d622eac5e89e4b9502ea2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 9 Mar 2017 10:02:28 +0100 Subject: s390/topology: get rid of core mask array Use a single long value instead of a single element array to represent the core mask. The array is a leftover from 32/31 bit code so we were able to use bitops helper functions. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 17660e800e74..0537130fb915 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -83,6 +83,8 @@ static cpumask_t cpu_thread_map(unsigned int cpu) return mask; } +#define TOPOLOGY_CORE_BITS 64 + static void add_cpus_to_mask(struct topology_core *tl_core, struct mask_info *drawer, struct mask_info *book, @@ -91,7 +93,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core, struct cpu_topology_s390 *topo; unsigned int core; - for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { + for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { unsigned int rcore; int lcpu, i; -- cgit v1.2.3 From 4fd4dd8bffb112d1e6549e0ff09e9fa3c8cc2b96 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2017 13:36:10 +0100 Subject: s390/topology: fix typo in early topology code Use MACHINE_FLAG_TOPOLOGY instead of MACHINE_HAS_TOPOLOGY when clearing the bit that indicates if the machine provides topology information (and if it should be used). Currently works anyway. Fixes: 68cc795d1933 ("s390/topology: make "topology=off" parameter work") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 95298a41076f..01cd7fe08d18 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -377,7 +377,7 @@ static int __init topology_setup(char *str) rc = kstrtobool(str, &enabled); if (!rc && !enabled) - S390_lowcore.machine_flags &= ~MACHINE_HAS_TOPOLOGY; + S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY; return rc; } early_param("topology", topology_setup); -- cgit v1.2.3 From 59808fc81942834964e3b4e65f08e5855a3914a2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2017 15:45:54 +0100 Subject: s390/sysinfo: allow compiler warnings again Allow compiler warnings again for the sysinfo file. Compiler warnings were disabled when the bogomips calculation with math-emu code was introduced ("[S390] Calibrate delay and bogomips."). Since that code is gone, we can enable warnings again. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index aa5adbdaf200..08e7924ea2f9 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -51,8 +51,6 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -CFLAGS_sysinfo.o += -w - obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o -- cgit v1.2.3 From d2f039742537c4aec6488fa4de0c91a641210fd9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Mar 2017 11:58:27 +0100 Subject: s390/dump_stack: remove whitespace from arch description The arch description provided for the "Hardware name:" contains lots of extra whitespace due to the way the SYSIB contents are defined (strings aren't zero terminated). This looks a bit odd and therefore remove the extra whitespace characters. This also gives the opportunity to add more information, if required, without hitting the magic 80 characters per line limit. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 01cd7fe08d18..914f273b1983 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -231,9 +231,28 @@ static noinline __init void detect_machine_type(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +/* Remove leading, trailing and double whitespace. */ +static inline void strim_all(char *str) +{ + char *s; + + s = strim(str); + if (s != str) + memmove(str, s, strlen(s)); + while (*str) { + if (!isspace(*str++)) + continue; + if (isspace(*str)) { + s = skip_spaces(str); + memmove(str, s, strlen(s) + 1); + } + } +} + static noinline __init void setup_arch_string(void) { struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; + char mstr[80]; if (stsi(mach, 1, 1, 1)) return; @@ -241,11 +260,11 @@ static noinline __init void setup_arch_string(void) EBCASC(mach->type, sizeof(mach->type)); EBCASC(mach->model, sizeof(mach->model)); EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); - dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)", - mach->manufacturer, - mach->type, - mach->model, - mach->model_capacity, + sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s", + mach->manufacturer, mach->type, + mach->model, mach->model_capacity); + strim_all(mstr); + dump_stack_set_arch_desc("%s (%s)", mstr, MACHINE_IS_LPAR ? "LPAR" : MACHINE_IS_VM ? "z/VM" : MACHINE_IS_KVM ? "KVM" : "unknown"); -- cgit v1.2.3 From 2f8876f98447ccbddb5bccc8f74ee20a69e83a3e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Mar 2017 12:22:11 +0100 Subject: s390/dump_stack: use control program identification string If running within a level 3 hypervisor, the hypervisor provides a SYSIB block which contains a control program indentifier string. Use this string instead of the simple KVM and z/VM strings only. In case of z/VM this provides addtional information: the z/VM version. The new string looks similar to this: Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 914f273b1983..251391e3f8bc 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,7 +252,8 @@ static inline void strim_all(char *str) static noinline __init void setup_arch_string(void) { struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; - char mstr[80]; + struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page; + char mstr[80], hvstr[17]; if (stsi(mach, 1, 1, 1)) return; @@ -264,10 +265,17 @@ static noinline __init void setup_arch_string(void) mach->manufacturer, mach->type, mach->model, mach->model_capacity); strim_all(mstr); - dump_stack_set_arch_desc("%s (%s)", mstr, - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + if (stsi(vm, 3, 2, 2) == 0 && vm->count) { + EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi)); + sprintf(hvstr, "%-16.16s", vm->vm[0].cpi); + strim_all(hvstr); + } else { + sprintf(hvstr, "%s", + MACHINE_IS_LPAR ? "LPAR" : + MACHINE_IS_VM ? "z/VM" : + MACHINE_IS_KVM ? "KVM" : "unknown"); + } + dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } static __init void setup_topology(void) -- cgit v1.2.3 From 050f99b1b85876a8456b76ebfee7d43746d37f6c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2017 15:58:35 +0100 Subject: s390/debugfs: introduce top-level 's390' directory Introduce a top-level 's390' directory which should be used when adding new s390 specific debug feature files and/or directories. This makes hopefully sure that the contents of the s390 directory will be a bit more structured. Right now we have a couple of top-level files where it is not easy to tell to which subsystem they belong to. Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/kdebugfs.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kernel/kdebugfs.c (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 08e7924ea2f9..adb3fe2e3d42 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -56,7 +56,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o -obj-y += entry.o reipl.o relocate_kernel.o +obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c new file mode 100644 index 000000000000..ee85e17dd79d --- /dev/null +++ b/arch/s390/kernel/kdebugfs.c @@ -0,0 +1,15 @@ +#include +#include +#include + +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + +static int __init arch_kdebugfs_init(void) +{ + arch_debugfs_dir = debugfs_create_dir("s390", NULL); + if (IS_ERR(arch_debugfs_dir)) + arch_debugfs_dir = NULL; + return 0; +} +postcore_initcall(arch_kdebugfs_init); -- cgit v1.2.3 From ae5ca67acaf070ced4e3ba324160ad6a99637e71 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2017 15:58:59 +0100 Subject: s390/sysinfo,topology: provide raw stsi 15,1,x data via debugfs Provide the raw stsi 15,1,x data contents via debugfs. This makes it much easier to debug unexpected scheduling domains on machines that provide cpu topology information. Therefore this file adds a new 's390/stsi' debugfs directory with a file for each possible topology nesting level that is allowed by the architecture. The files will be created regardless if the machine supports all, or any, level. If a level is not supported, or no data is available, user space can recognize this with a -EINVAL error code when trying to read such data. In addition a 'topology' symlink is created that points to the file that contains the data that is used to create the scheduling domains. Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sysinfo.c | 80 +++++++++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/topology.c | 2 +- 2 files changed, 81 insertions(+), 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 12b6b138e354..1d4680e38378 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -4,6 +4,7 @@ * Martin Schwidefsky , */ +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -485,3 +487,81 @@ void calibrate_delay(void) "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); } + +#ifdef CONFIG_DEBUG_FS + +#define STSI_FILE(fc, s1, s2) \ +static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\ +{ \ + file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \ + if (!file->private_data) \ + return -ENOMEM; \ + if (stsi(file->private_data, fc, s1, s2)) { \ + free_page((unsigned long)file->private_data); \ + file->private_data = NULL; \ + return -EACCES; \ + } \ + return nonseekable_open(inode, file); \ +} \ + \ +static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \ + .open = stsi_open_##fc##_##s1##_##s2, \ + .release = stsi_release, \ + .read = stsi_read, \ + .llseek = no_llseek, \ +}; + +static int stsi_release(struct inode *inode, struct file *file) +{ + free_page((unsigned long)file->private_data); + return 0; +} + +static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) +{ + return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE); +} + +STSI_FILE(15, 1, 2); +STSI_FILE(15, 1, 3); +STSI_FILE(15, 1, 4); +STSI_FILE(15, 1, 5); +STSI_FILE(15, 1, 6); + +struct stsi_file { + const struct file_operations *fops; + char *name; +}; + +static struct stsi_file stsi_file[] __initdata = { + {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"}, + {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"}, + {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"}, + {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"}, + {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"}, +}; + +static __init int stsi_init_debugfs(void) +{ + struct dentry *stsi_root; + struct stsi_file *sf; + int i; + + stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir); + if (IS_ERR_OR_NULL(stsi_root)) + return 0; + for (i = 0; i < ARRAY_SIZE(stsi_file); i++) { + sf = &stsi_file[i]; + debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); + } + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + char link_to[10]; + + sprintf(link_to, "15_1_%d", topology_mnest_limit()); + debugfs_create_symlink("topology", stsi_root, link_to); + } + return 0; +} +device_initcall(stsi_init_debugfs); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 0537130fb915..bb47c92476f0 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -246,7 +246,7 @@ static void update_cpu_masks(void) void store_topology(struct sysinfo_15_1_x *info) { - stsi(info, 15, 1, min(topology_max_mnest, 4)); + stsi(info, 15, 1, topology_mnest_limit()); } static int __arch_update_cpu_topology(void) -- cgit v1.2.3 From cdd3bd9d618d7a4a3920a193e20ca40736b878e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2017 16:15:06 +0100 Subject: s390/sysinfo: provide remaining stsi information via debugfs Provide the remaining stsi information via debugfs files. This also might be useful for debugging purposes. Suggested-by: Christian Borntraeger Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sysinfo.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 1d4680e38378..eefcb54872a5 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -522,6 +522,12 @@ static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_ return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE); } +STSI_FILE( 1, 1, 1); +STSI_FILE( 1, 2, 1); +STSI_FILE( 1, 2, 2); +STSI_FILE( 2, 2, 1); +STSI_FILE( 2, 2, 2); +STSI_FILE( 3, 2, 2); STSI_FILE(15, 1, 2); STSI_FILE(15, 1, 3); STSI_FILE(15, 1, 4); @@ -534,6 +540,12 @@ struct stsi_file { }; static struct stsi_file stsi_file[] __initdata = { + {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"}, + {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"}, + {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"}, + {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"}, + {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"}, + {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"}, {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"}, {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"}, {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"}, @@ -541,15 +553,21 @@ static struct stsi_file stsi_file[] __initdata = { {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"}, }; +static u8 stsi_0_0_0; + static __init int stsi_init_debugfs(void) { struct dentry *stsi_root; struct stsi_file *sf; - int i; + int lvl, i; stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir); if (IS_ERR_OR_NULL(stsi_root)) return 0; + lvl = stsi(NULL, 0, 0, 0); + if (lvl > 0) + stsi_0_0_0 = lvl; + debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0); for (i = 0; i < ARRAY_SIZE(stsi_file); i++) { sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); -- cgit v1.2.3 From 0b7bb6af1d734b15dbebec942767708e8ca40ca3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Mar 2017 14:39:28 +0100 Subject: s390/facilities: get rid of __ASSEMBLY__ in facility header file There is no need for the __ASSEMBLY__ ifdefery anymore since the architecture level set code that deals with facility bits was converted to C in the meantime. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 0b5ebf8a3d30..eff5b31671d4 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -25,7 +25,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From dcc00b79fc3d076832f7240de8870f492629b171 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 23 Mar 2017 21:02:54 +0100 Subject: s390/kdump: Add final note Since linux v3.14 with commit 38dfac843cb6d7be1 ("vmcore: prevent PT_NOTE p_memsz overflow during header update") on s390 we get the following message in the kdump kernel: Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x6b6b6b6b, n_descsz=0x6b6b6b6b The reason for this is that we don't create a final zero note in the ELF header which the proc/vmcore code uses to find out the end of the notes section (see also kernel/kexec_core.c:final_note()). It still worked on s390 by chance because we (most of the time?) have the byte pattern 0x6b6b6b6b after the notes section which also makes the notes parsing code stop in update_note_header_size_elf64() because 0x6b6b6b6b is interpreded as note size: if ((real_sz + sz) > max_sz) { pr_warn("Warning: Exceeded p_memsz, dropping P ...); break; } So fix this and add the missing final note to the ELF header. We don't have to adjust the memory size for ELF header ("alloc_size") because the new ELF note still fits into the 0x1000 base memory. Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/crash_dump.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd1d5c62c374..d628afc26708 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -428,6 +428,20 @@ static void *nt_vmcoreinfo(void *ptr) return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } +/* + * Initialize final note (needed for /proc/vmcore code) + */ +static void *nt_final(void *ptr) +{ + Elf64_Nhdr *note; + + note = (Elf64_Nhdr *) ptr; + note->n_namesz = 0; + note->n_descsz = 0; + note->n_type = 0; + return PTR_ADD(ptr, sizeof(Elf64_Nhdr)); +} + /* * Initialize ELF header (new kernel) */ @@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) if (sa->prefix != 0) ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); + ptr = nt_final(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; phdr->p_offset = notes_offset; -- cgit v1.2.3 From 20ba46da369e239aa454cc57d0c7e33d51a8de51 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 13 Feb 2017 12:30:09 +0100 Subject: s390/cpum_cf: update counter numbers to ecctr limits Use the highest counter number that can be specified for the ecctr (extract CPU counter) instruction for perf. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1aba10e90906..1c3f93812817 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -370,7 +370,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (ev == -1) return -ENOENT; - if (ev >= PERF_CPUM_CF_MAX_CTR) + if (ev > PERF_CPUM_CF_MAX_CTR) return -EINVAL; /* Use the hardware perf event structure to store the counter number -- cgit v1.2.3 From db17160dce4ffe4d5bd70b58ee94f398ef9cabb1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 13 Feb 2017 12:32:17 +0100 Subject: s390/cpum_cf: cleanup event/counter validation The validate_event() function just checked for reserved counters in particular CPU-MF counter sets. Because the number of counters in counter sets vary among different hardware models, remove the explicit check to tolerate new models. Reserved counters are not accounted and, thus, will return zero. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1c3f93812817..6fe1428e9dfc 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,7 +1,7 @@ /* * Performance event support for s390x - CPU-measurement Counter Facility * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner * * This program is free software; you can redistribute it and/or modify @@ -102,26 +102,6 @@ static int get_counter_set(u64 event) return set; } -static int validate_event(const struct hw_perf_event *hwc) -{ - switch (hwc->config_base) { - case CPUMF_CTR_SET_BASIC: - case CPUMF_CTR_SET_USER: - case CPUMF_CTR_SET_CRYPTO: - case CPUMF_CTR_SET_EXT: - /* check for reserved counters */ - if ((hwc->config >= 6 && hwc->config <= 31) || - (hwc->config >= 38 && hwc->config <= 63) || - (hwc->config >= 80 && hwc->config <= 127)) - return -EOPNOTSUPP; - break; - default: - return -EINVAL; - } - - return 0; -} - static int validate_ctr_version(const struct hw_perf_event *hwc) { struct cpu_hw_events *cpuhw; @@ -381,15 +361,6 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->config = ev; hwc->config_base = get_counter_set(ev); - /* Validate the counter that is assigned to this event. - * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicitly - * validate event groups (event->group_leader != event). - */ - err = validate_event(hwc); - if (err) - return err; - /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { mutex_lock(&pmc_reserve_mutex); -- cgit v1.2.3 From ee699f329a239bc3cc8a8c336b9615166993bffc Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 3 Jun 2016 16:55:03 +0200 Subject: s390/cpum_cf: add support for the MT-diagnostic counter set (z13) Complete the IBM z13 support and support counters from the MT-diagnostic counter set. Note that this counter set is available only if SMT is enabled. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 89 +++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 25 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 6fe1428e9dfc..52a9ae0272c9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -22,19 +22,12 @@ #include #include -/* CPU-measurement counter facility supports these CPU counter sets: - * For CPU counter sets: - * Basic counter set: 0-31 - * Problem-state counter set: 32-63 - * Crypto-activity counter set: 64-127 - * Extented counter set: 128-159 - */ enum cpumf_ctr_set { - /* CPU counter sets */ - CPUMF_CTR_SET_BASIC = 0, - CPUMF_CTR_SET_USER = 1, - CPUMF_CTR_SET_CRYPTO = 2, - CPUMF_CTR_SET_EXT = 3, + CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ + CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ + CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ + CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ + CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ /* Maximum number of counter sets */ CPUMF_CTR_SET_MAX, @@ -47,6 +40,7 @@ static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { [CPUMF_CTR_SET_USER] = 0x04, [CPUMF_CTR_SET_CRYPTO] = 0x08, [CPUMF_CTR_SET_EXT] = 0x01, + [CPUMF_CTR_SET_MT_DIAG] = 0x20, }; static void ctr_set_enable(u64 *state, int ctr_set) @@ -76,19 +70,20 @@ struct cpu_hw_events { }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { - [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), }, .state = 0, .flags = 0, .txn_flags = 0, }; -static int get_counter_set(u64 event) +static enum cpumf_ctr_set get_counter_set(u64 event) { - int set = -1; + int set = CPUMF_CTR_SET_MAX; if (event < 32) set = CPUMF_CTR_SET_BASIC; @@ -98,6 +93,8 @@ static int get_counter_set(u64 event) set = CPUMF_CTR_SET_CRYPTO; else if (event < 256) set = CPUMF_CTR_SET_EXT; + else if (event >= 448 && event < 496) + set = CPUMF_CTR_SET_MT_DIAG; return set; } @@ -106,6 +103,7 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) { struct cpu_hw_events *cpuhw; int err = 0; + u16 mtdiag_ctl; cpuhw = &get_cpu_var(cpu_hw_events); @@ -125,6 +123,27 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) (cpuhw->info.csvn > 2 && hwc->config > 255)) err = -EOPNOTSUPP; break; + case CPUMF_CTR_SET_MT_DIAG: + if (cpuhw->info.csvn <= 3) + err = -EOPNOTSUPP; + /* + * MT-diagnostic counters are read-only. The counter set + * is automatically enabled and activated on all CPUs with + * multithreading (SMT). Deactivation of multithreading + * also disables the counter set. State changes are ignored + * by lcctl(). Because Linux controls SMT enablement through + * a kernel parameter only, the counter set is either disabled + * or enabled and active. + * + * Thus, the counters can only be used if SMT is on and the + * counter set is enabled and active. + */ + mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG]; + if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && + (cpuhw->info.enable_ctl & mtdiag_ctl) && + (cpuhw->info.act_ctl & mtdiag_ctl))) + err = -EOPNOTSUPP; + break; } put_cpu_var(cpu_hw_events); @@ -230,6 +249,11 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* loss of counter data alert */ if (alert & CPU_MF_INT_CF_LCDA) pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + + /* loss of MT counter data alert */ + if (alert & CPU_MF_INT_CF_MTDA) + pr_warn("CPU[%i] MT counter data was lost\n", + smp_processor_id()); } #define PMC_INIT 0 @@ -310,6 +334,7 @@ static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; + enum cpumf_ctr_set set; int err; u64 ev; @@ -353,13 +378,27 @@ static int __hw_perf_event_init(struct perf_event *event) if (ev > PERF_CPUM_CF_MAX_CTR) return -EINVAL; - /* Use the hardware perf event structure to store the counter number - * in 'config' member and the counter set to which the counter belongs - * in the 'config_base'. The counter set (config_base) is then used - * to enable/disable the counters. - */ - hwc->config = ev; - hwc->config_base = get_counter_set(ev); + /* Obtain the counter set to which the specified counter belongs */ + set = get_counter_set(ev); + switch (set) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + case CPUMF_CTR_SET_MT_DIAG: + /* + * Use the hardware perf event structure to store the + * counter number in the 'config' member and the counter + * set number in the 'config_base'. The counter set number + * is then later used to enable/disable the counter(s). + */ + hwc->config = ev; + hwc->config_base = set; + break; + case CPUMF_CTR_SET_MAX: + /* The counter could not be associated to a counter set */ + return -EINVAL; + }; /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { -- cgit v1.2.3 From 3fc7acebaecf940697ea9a5a927cf10766d4b00e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 17 Feb 2017 12:16:00 +0100 Subject: s390/cpum_cf: add IBM z13 counter event names Add the event names for the IBM z13/z13s specific CPU-MF counters. Also improve the merging of the generic and model specific events so that their sysfs attribute definitions completely reside in memory. Hence, flagging the generic event attribute definitions as initdata too. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf_events.c | 130 +++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 7 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index c343ac2cf6c5..33d90c29bb3d 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -114,8 +114,64 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f); +CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0); +CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1); +CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2); +CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3); +CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db); +CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); -static struct attribute *cpumcf_pmu_event_attr[] = { +static struct attribute *cpumcf_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf, CPU_CYCLES), CPUMF_EVENT_PTR(cf, INSTRUCTIONS), CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), @@ -236,11 +292,70 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL), + CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumsf_pmu_events_group = { .name = "events", - .attrs = cpumcf_pmu_event_attr, }; PMU_FORMAT_ATTR(event, "config:0-63"); @@ -290,6 +405,7 @@ static __init struct attribute **merge_attr(struct attribute **a, __init const struct attribute_group **cpumf_cf_event_group(void) { struct attribute **combined, **model; + struct attribute *none[] = { NULL }; struct cpuid cpu_id; get_cpu_id(&cpu_id); @@ -306,17 +422,17 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x2828: model = cpumcf_zec12_pmu_event_attr; break; + case 0x2964: + case 0x2965: + model = cpumcf_z13_pmu_event_attr; + break; default: - model = NULL; + model = none; break; } - if (!model) - goto out; - combined = merge_attr(cpumcf_pmu_event_attr, model); if (combined) cpumsf_pmu_events_group.attrs = combined; -out: return cpumsf_pmu_attr_groups; } -- cgit v1.2.3 From 66a49784f909cb3da54b56591979971ebb7e7cac Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 17 Feb 2017 12:53:55 +0100 Subject: s390/cpum_cf: correct variable naming (cleanup) Make clear that the event definitions relate to the counter facility (cf) and not to the sampling facility (sf). Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf_events.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 33d90c29bb3d..d3133285b7d1 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -354,25 +354,25 @@ static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = { /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ -static struct attribute_group cpumsf_pmu_events_group = { +static struct attribute_group cpumcf_pmu_events_group = { .name = "events", }; PMU_FORMAT_ATTR(event, "config:0-63"); -static struct attribute *cpumsf_pmu_format_attr[] = { +static struct attribute *cpumcf_pmu_format_attr[] = { &format_attr_event.attr, NULL, }; -static struct attribute_group cpumsf_pmu_format_group = { +static struct attribute_group cpumcf_pmu_format_group = { .name = "format", - .attrs = cpumsf_pmu_format_attr, + .attrs = cpumcf_pmu_format_attr, }; -static const struct attribute_group *cpumsf_pmu_attr_groups[] = { - &cpumsf_pmu_events_group, - &cpumsf_pmu_format_group, +static const struct attribute_group *cpumcf_pmu_attr_groups[] = { + &cpumcf_pmu_events_group, + &cpumcf_pmu_format_group, NULL, }; @@ -433,6 +433,6 @@ __init const struct attribute_group **cpumf_cf_event_group(void) combined = merge_attr(cpumcf_pmu_event_attr, model); if (combined) - cpumsf_pmu_events_group.attrs = combined; - return cpumsf_pmu_attr_groups; + cpumcf_pmu_events_group.attrs = combined; + return cpumcf_pmu_attr_groups; } -- cgit v1.2.3 From 485527ba578254bb1171b13c55394257fd63cd59 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 20 Feb 2017 16:10:38 +0100 Subject: s390/cpum_cf: make hw_perf_event_update() a void function The return code of hw_perf_event_update() is not evaluated by its callers. Hence, simplify the function by removing the return code. Reported-by: Heiko Carstens Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 52a9ae0272c9..746d03423333 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -462,7 +462,7 @@ static int hw_perf_event_reset(struct perf_event *event) return err; } -static int hw_perf_event_update(struct perf_event *event) +static void hw_perf_event_update(struct perf_event *event) { u64 prev, new, delta; int err; @@ -471,14 +471,12 @@ static int hw_perf_event_update(struct perf_event *event) prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) - goto out; + return; } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; /* overflow */ local64_add(delta, &event->count); -out: - return err; } static void cpumf_pmu_read(struct perf_event *event) -- cgit v1.2.3 From cab36c262ef9a5ddf3c7ae0f8031b191338b3142 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 3 Apr 2017 13:30:23 +0200 Subject: s390: use 64-bit lctlg to load task pid to cr4 on context switch The 32-bit lctl instruction is quite a bit slower than the 64-bit counter part lctlg. Use the faster instruction. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index fa8b8f28e08b..02f11018e2df 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -190,7 +190,9 @@ ENTRY(__switch_to) stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + xc __SF_EMPTY(8,%r15),__SF_EMPTY(%r15) + mvc __SF_EMPTY+4(4,%r15),__TASK_pid(%r3) + lctlg %c4,%c4,__SF_EMPTY(%r15) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP -- cgit v1.2.3 From df26c2e87e6cf3ced1fbd589e40d633a6a7f20cb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 4 Apr 2017 12:55:11 +0200 Subject: s390/cpumf: simplify detection of guest samples There are three different code levels in regard to the identification of guest samples. They differ in the way the LPP instruction is used. 1) Old kernels without the LPP instruction. The guest program parameter is always zero. 2) Newer kernels load the process pid into the program parameter with LPP. The guest program parameter is non-zero if the guest executes in a process != idle. 3) The latest kernels load ((1UL << 31) | pid) with LPP to make the value non-zero even for the idle task. The guest program parameter is non-zero if the guest is running. All kernels load the process pid to CR4 on context switch. The CPU sampling code uses the value in CR4 to decide between guest and host samples in case the guest program parameter is zero. The three cases: 1) CR4==pid, gpp==0 2) CR4==pid, gpp==pid 3) CR4==pid, gpp==((1UL << 31) | pid) The load-control instruction to load the pid into CR4 is expensive and the goal is to remove it. To distinguish the host CR4 from the guest pid for the idle process the maximum value 0xffff for the PASN is used. This adds a fourth case for a guest OS with an updated kernel: 4) CR4==0xffff, gpp=((1UL << 31) | pid) The host kernel will have CR4==0xffff and will use (gpp!=0 || CR4!==0xffff) to identify guest samples. This works nicely with all 4 cases, the only possible issue would be a guest with an old kernel (gpp==0) and a process pid of 0xffff. Well, don't do that.. Suggested-by: Christian Borntraeger Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 4 ---- arch/s390/kernel/head64.S | 2 +- arch/s390/kernel/perf_cpum_sf.c | 7 +++---- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 02f11018e2df..c6cf338c9327 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -189,10 +189,6 @@ ENTRY(__switch_to) stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ - xc __SF_EMPTY(8,%r15),__SF_EMPTY(%r15) - mvc __SF_EMPTY+4(4,%r15),__TASK_pid(%r3) - lctlg %c4,%c4,__SF_EMPTY(%r15) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 482d3526e32b..31c91f24e562 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -52,7 +52,7 @@ ENTRY(startup_continue) .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization - .quad 0 # cr4: instruction authorization + .quad 0xffff # cr4: instruction authorization .quad .Lduct # cr5: primary-aste origin .quad 0 # cr6: I/O interrupts .quad 0 # cr7: secondary space segment table diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1c0b58545c04..9a4f279d25ca 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1009,8 +1009,8 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) * sample. Some early samples or samples from guests without * lpp usage would be misaccounted to the host. We use the asn * value as an addon heuristic to detect most of these guest samples. - * If the value differs from the host hpp value, we assume to be a - * KVM guest. + * If the value differs from 0xffff (the host value), we assume to + * be a KVM guest. */ switch (sfr->basic.CL) { case 1: /* logical partition */ @@ -1020,8 +1020,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) sde_regs->in_guest = 1; break; default: /* old machine, use heuristics */ - if (sfr->basic.gpp || - sfr->basic.prim_asn != (u16)sfr->basic.hpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff) sde_regs->in_guest = 1; break; } -- cgit v1.2.3 From b13de4b7adeb7a5e37a5aa78d5a4926c3cd4e131 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Mar 2017 17:00:45 +0100 Subject: s390/spinlock: remove compare and delay instruction The CAD instruction never worked quite as expected for the spinlock code. It has been disabled by default with git commit 61b0b01686d48220, if the "cad" kernel parameter is specified it is enabled for both user space and the spinlock code. Leave the option to enable the instruction for user space but remove it from the spinlock code. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 251391e3f8bc..5d20182ee8ae 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -434,23 +434,16 @@ early_param("noexec", noexec_setup); static int __init cad_setup(char *str) { - int val; - - get_option(&str, &val); - if (val && test_facility(128)) - S390_lowcore.machine_flags |= MACHINE_FLAG_CAD; - return 0; -} -early_param("cad", cad_setup); + bool enabled; + int rc; -static int __init cad_init(void) -{ - if (MACHINE_HAS_CAD) + rc = kstrtobool(str, &enabled); + if (!rc && enabled && test_facility(128)) /* Enable problem state CAD. */ __ctl_set_bit(2, 3); - return 0; + return rc; } -early_initcall(cad_init); +early_param("cad", cad_setup); static __init void memmove_early(void *dst, const void *src, size_t n) { -- cgit v1.2.3 From e525f8a6e696210d15f8b8277d4da12fc4add299 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 20 Apr 2017 13:54:11 +0200 Subject: s390/gs: add regset for the guarded storage broadcast control block The guarded storage interface allows to register a control block for each thread that is activated with the guarded storage broadcast event. To retrieve the complete state of a process from the kernel a register set for the stored broadcast control block is required. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c933e255b5d5..488c5bb8dc77 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1171,10 +1171,48 @@ static int s390_gs_cb_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_cb = data; + } + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + if (!MACHINE_HAS_GS) return -ENODEV; if (!data) return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_bc_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_bc_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) { + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + target->thread.gs_bc_cb = data; + } return user_regset_copyin(&pos, &count, &kbuf, &ubuf, data, 0, sizeof(struct gs_cb)); } @@ -1244,6 +1282,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_gs_cb_get, .set = s390_gs_cb_set, }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, }; static const struct user_regset_view user_s390_view = { -- cgit v1.2.3