diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/vas.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/vas.c | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 000000000000..b5c1cf1bc64d --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#include <asm/vas.h> +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static struct vas_all_caps caps_all; +static bool copypaste_feat; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + u32 lpid = mfspr(SPRN_PID); + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, lpid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", + rc, win->vas_win.winid, lpid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", + hcall, rc, query_type, result); + return -EIO; +} +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + + return IRQ_HANDLED; +} + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, NULL, + pseries_vas_fault_thread_fn, IRQF_ONESHOT, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > + atomic_read(&cop_feat_caps->target_lpar_creds)) { + pr_err("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + txwin->win_type = cop_feat_caps->win_type; + mutex_lock(&vas_pseries_mutex); + list_add(&txwin->win_list, &caps->list); + mutex_unlock(&vas_pseries_mutex); + + return &txwin->vas_win; + +out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); + h_deallocate_vas_window(txwin->vas_win.winid); +out: + atomic_dec(&cop_feat_caps->used_lpar_creds); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ + rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + + list_del(&win->win_list); + atomic_dec(&caps->used_lpar_creds); + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc; + + if (!copypaste_feat) + return -ENOTSUPP; + + rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->target_lpar_creds, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + copypaste_feat = true; + + return 0; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_cop_feat_caps *hv_cop_caps; + struct hv_vas_all_caps *hv_caps; + int rc; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); + if (!hv_cop_caps) { + rc = -ENOMEM; + goto out; + } + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + + if (rc) + goto out_cop; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); + if (rc) + goto out_cop; + } + + pr_info("GZIP feature is available\n"); + +out_cop: + kfree(hv_cop_caps); +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); |