diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 17:57:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 17:57:35 -0700 |
commit | 0d1e8b8d2bcd3150d51754d8d0fdbf44dc88b0d3 (patch) | |
tree | 2794cb2347daa76b00160a6ffb68663f4138dcc7 /drivers | |
parent | 83c4087ce468601501ecde4d0ec5b2abd5f57c31 (diff) | |
parent | 22a7cdcae6a4a3c8974899e62851d270956f58ce (diff) |
Merge tag 'kvm-4.20-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"ARM:
- Improved guest IPA space support (32 to 52 bits)
- RAS event delivery for 32bit
- PMU fixes
- Guest entry hardening
- Various cleanups
- Port of dirty_log_test selftest
PPC:
- Nested HV KVM support for radix guests on POWER9. The performance
is much better than with PR KVM. Migration and arbitrary level of
nesting is supported.
- Disable nested HV-KVM on early POWER9 chips that need a particular
hardware bug workaround
- One VM per core mode to prevent potential data leaks
- PCI pass-through optimization
- merge ppc-kvm topic branch and kvm-ppc-fixes to get a better base
s390:
- Initial version of AP crypto virtualization via vfio-mdev
- Improvement for vfio-ap
- Set the host program identifier
- Optimize page table locking
x86:
- Enable nested virtualization by default
- Implement Hyper-V IPI hypercalls
- Improve #PF and #DB handling
- Allow guests to use Enlightened VMCS
- Add migration selftests for VMCS and Enlightened VMCS
- Allow coalesced PIO accesses
- Add an option to perform nested VMCS host state consistency check
through hardware
- Automatic tuning of lapic_timer_advance_ns
- Many fixes, minor improvements, and cleanups"
* tag 'kvm-4.20-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
KVM/nVMX: Do not validate that posted_intr_desc_addr is page aligned
Revert "kvm: x86: optimize dr6 restore"
KVM: PPC: Optimize clearing TCEs for sparse tables
x86/kvm/nVMX: tweak shadow fields
selftests/kvm: add missing executables to .gitignore
KVM: arm64: Safety check PSTATE when entering guest and handle IL
KVM: PPC: Book3S HV: Don't use streamlined entry path on early POWER9 chips
arm/arm64: KVM: Enable 32 bits kvm vcpu events support
arm/arm64: KVM: Rename function kvm_arch_dev_ioctl_check_extension()
KVM: arm64: Fix caching of host MDCR_EL2 value
KVM: VMX: enable nested virtualization by default
KVM/x86: Use 32bit xor to clear registers in svm.c
kvm: x86: Introduce KVM_CAP_EXCEPTION_PAYLOAD
kvm: vmx: Defer setting of DR6 until #DB delivery
kvm: x86: Defer setting of CR2 until #PF delivery
kvm: x86: Add payload operands to kvm_multiple_exception
kvm: x86: Add exception payload fields to kvm_vcpu_events
kvm: x86: Add has_payload and payload to kvm_queued_exception
KVM: Documentation: Fix omission in struct kvm_vcpu_events
KVM: selftests: add Enlightened VMCS test
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/iommu/Kconfig | 8 | ||||
-rw-r--r-- | drivers/s390/crypto/Makefile | 4 | ||||
-rw-r--r-- | drivers/s390/crypto/vfio_ap_drv.c | 157 | ||||
-rw-r--r-- | drivers/s390/crypto/vfio_ap_ops.c | 939 | ||||
-rw-r--r-- | drivers/s390/crypto/vfio_ap_private.h | 88 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 23 |
6 files changed, 1217 insertions, 2 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c60395b7470f..83e6d993fca5 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -372,6 +372,14 @@ config S390_CCW_IOMMU Enables bits of IOMMU API required by VFIO. The iommu_ops is not implemented as it is not necessary for VFIO. +config S390_AP_IOMMU + bool "S390 AP IOMMU Support" + depends on S390 && ZCRYPT + select IOMMU_API + help + Enables bits of IOMMU API required by VFIO. The iommu_ops + is not implemented as it is not necessary for VFIO. + config MTK_IOMMU bool "MTK IOMMU Support" depends on ARM || ARM64 diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile index fd5e215c66b7..6ccd93d0b1cb 100644 --- a/drivers/s390/crypto/Makefile +++ b/drivers/s390/crypto/Makefile @@ -15,3 +15,7 @@ obj-$(CONFIG_ZCRYPT) += zcrypt_cex2c.o zcrypt_cex2a.o zcrypt_cex4.o # pkey kernel module pkey-objs := pkey_api.o obj-$(CONFIG_PKEY) += pkey.o + +# adjunct processor matrix +vfio_ap-objs := vfio_ap_drv.o vfio_ap_ops.o +obj-$(CONFIG_VFIO_AP) += vfio_ap.o diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c new file mode 100644 index 000000000000..7667b38728f0 --- /dev/null +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VFIO based AP device driver + * + * Copyright IBM Corp. 2018 + * + * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> + */ + +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/slab.h> +#include <linux/string.h> +#include "vfio_ap_private.h" + +#define VFIO_AP_ROOT_NAME "vfio_ap" +#define VFIO_AP_DEV_TYPE_NAME "ap_matrix" +#define VFIO_AP_DEV_NAME "matrix" + +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018"); +MODULE_LICENSE("GPL v2"); + +static struct ap_driver vfio_ap_drv; + +static struct device_type vfio_ap_dev_type = { + .name = VFIO_AP_DEV_TYPE_NAME, +}; + +struct ap_matrix_dev *matrix_dev; + +/* Only type 10 adapters (CEX4 and later) are supported + * by the AP matrix device driver + */ +static struct ap_device_id ap_queue_ids[] = { + { .dev_type = AP_DEVICE_TYPE_CEX4, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX5, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX6, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { /* end of sibling */ }, +}; + +MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids); + +static int vfio_ap_queue_dev_probe(struct ap_device *apdev) +{ + return 0; +} + +static void vfio_ap_queue_dev_remove(struct ap_device *apdev) +{ + /* Nothing to do yet */ +} + +static void vfio_ap_matrix_dev_release(struct device *dev) +{ + struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev); + + kfree(matrix_dev); +} + +static int vfio_ap_matrix_dev_create(void) +{ + int ret; + struct device *root_device; + + root_device = root_device_register(VFIO_AP_ROOT_NAME); + if (IS_ERR(root_device)) + return PTR_ERR(root_device); + + matrix_dev = kzalloc(sizeof(*matrix_dev), GFP_KERNEL); + if (!matrix_dev) { + ret = -ENOMEM; + goto matrix_alloc_err; + } + + /* Fill in config info via PQAP(QCI), if available */ + if (test_facility(12)) { + ret = ap_qci(&matrix_dev->info); + if (ret) + goto matrix_alloc_err; + } + + mutex_init(&matrix_dev->lock); + INIT_LIST_HEAD(&matrix_dev->mdev_list); + + matrix_dev->device.type = &vfio_ap_dev_type; + dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME); + matrix_dev->device.parent = root_device; + matrix_dev->device.release = vfio_ap_matrix_dev_release; + matrix_dev->device.driver = &vfio_ap_drv.driver; + + ret = device_register(&matrix_dev->device); + if (ret) + goto matrix_reg_err; + + return 0; + +matrix_reg_err: + put_device(&matrix_dev->device); +matrix_alloc_err: + root_device_unregister(root_device); + + return ret; +} + +static void vfio_ap_matrix_dev_destroy(void) +{ + device_unregister(&matrix_dev->device); + root_device_unregister(matrix_dev->device.parent); +} + +static int __init vfio_ap_init(void) +{ + int ret; + + /* If there are no AP instructions, there is nothing to pass through. */ + if (!ap_instructions_available()) + return -ENODEV; + + ret = vfio_ap_matrix_dev_create(); + if (ret) + return ret; + + memset(&vfio_ap_drv, 0, sizeof(vfio_ap_drv)); + vfio_ap_drv.probe = vfio_ap_queue_dev_probe; + vfio_ap_drv.remove = vfio_ap_queue_dev_remove; + vfio_ap_drv.ids = ap_queue_ids; + + ret = ap_driver_register(&vfio_ap_drv, THIS_MODULE, VFIO_AP_DRV_NAME); + if (ret) { + vfio_ap_matrix_dev_destroy(); + return ret; + } + + ret = vfio_ap_mdev_register(); + if (ret) { + ap_driver_unregister(&vfio_ap_drv); + vfio_ap_matrix_dev_destroy(); + + return ret; + } + + return 0; +} + +static void __exit vfio_ap_exit(void) +{ + vfio_ap_mdev_unregister(); + ap_driver_unregister(&vfio_ap_drv); + vfio_ap_matrix_dev_destroy(); +} + +module_init(vfio_ap_init); +module_exit(vfio_ap_exit); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c new file mode 100644 index 000000000000..272ef427dcc0 --- /dev/null +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -0,0 +1,939 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Adjunct processor matrix VFIO device driver callbacks. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> + * Halil Pasic <pasic@linux.ibm.com> + * Pierre Morel <pmorel@linux.ibm.com> + */ +#include <linux/string.h> +#include <linux/vfio.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/ctype.h> +#include <linux/bitops.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <asm/kvm.h> +#include <asm/zcrypt.h> + +#include "vfio_ap_private.h" + +#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" +#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" + +static void vfio_ap_matrix_init(struct ap_config_info *info, + struct ap_matrix *matrix) +{ + matrix->apm_max = info->apxa ? info->Na : 63; + matrix->aqm_max = info->apxa ? info->Nd : 15; + matrix->adm_max = info->apxa ? info->Nd : 15; +} + +static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev) +{ + struct ap_matrix_mdev *matrix_mdev; + + if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0)) + return -EPERM; + + matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL); + if (!matrix_mdev) { + atomic_inc(&matrix_dev->available_instances); + return -ENOMEM; + } + + vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); + mdev_set_drvdata(mdev, matrix_mdev); + mutex_lock(&matrix_dev->lock); + list_add(&matrix_mdev->node, &matrix_dev->mdev_list); + mutex_unlock(&matrix_dev->lock); + + return 0; +} + +static int vfio_ap_mdev_remove(struct mdev_device *mdev) +{ + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + if (matrix_mdev->kvm) + return -EBUSY; + + mutex_lock(&matrix_dev->lock); + list_del(&matrix_mdev->node); + mutex_unlock(&matrix_dev->lock); + + kfree(matrix_mdev); + mdev_set_drvdata(mdev, NULL); + atomic_inc(&matrix_dev->available_instances); + + return 0; +} + +static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT); +} + +static MDEV_TYPE_ATTR_RO(name); + +static ssize_t available_instances_show(struct kobject *kobj, + struct device *dev, char *buf) +{ + return sprintf(buf, "%d\n", + atomic_read(&matrix_dev->available_instances)); +} + +static MDEV_TYPE_ATTR_RO(available_instances); + +static ssize_t device_api_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING); +} + +static MDEV_TYPE_ATTR_RO(device_api); + +static struct attribute *vfio_ap_mdev_type_attrs[] = { + &mdev_type_attr_name.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_available_instances.attr, + NULL, +}; + +static struct attribute_group vfio_ap_mdev_hwvirt_type_group = { + .name = VFIO_AP_MDEV_TYPE_HWVIRT, + .attrs = vfio_ap_mdev_type_attrs, +}; + +static struct attribute_group *vfio_ap_mdev_type_groups[] = { + &vfio_ap_mdev_hwvirt_type_group, + NULL, +}; + +struct vfio_ap_queue_reserved { + unsigned long *apid; + unsigned long *apqi; + bool reserved; +}; + +/** + * vfio_ap_has_queue + * + * @dev: an AP queue device + * @data: a struct vfio_ap_queue_reserved reference + * + * Flags whether the AP queue device (@dev) has a queue ID containing the APQN, + * apid or apqi specified in @data: + * + * - If @data contains both an apid and apqi value, then @data will be flagged + * as reserved if the APID and APQI fields for the AP queue device matches + * + * - If @data contains only an apid value, @data will be flagged as + * reserved if the APID field in the AP queue device matches + * + * - If @data contains only an apqi value, @data will be flagged as + * reserved if the APQI field in the AP queue device matches + * + * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if + * @data does not contain either an apid or apqi. + */ +static int vfio_ap_has_queue(struct device *dev, void *data) +{ + struct vfio_ap_queue_reserved *qres = data; + struct ap_queue *ap_queue = to_ap_queue(dev); + ap_qid_t qid; + unsigned long id; + + if (qres->apid && qres->apqi) { + qid = AP_MKQID(*qres->apid, *qres->apqi); + if (qid == ap_queue->qid) + qres->reserved = true; + } else if (qres->apid && !qres->apqi) { + id = AP_QID_CARD(ap_queue->qid); + if (id == *qres->apid) + qres->reserved = true; + } else if (!qres->apid && qres->apqi) { + id = AP_QID_QUEUE(ap_queue->qid); + if (id == *qres->apqi) + qres->reserved = true; + } else { + return -EINVAL; + } + + return 0; +} + +/** + * vfio_ap_verify_queue_reserved + * + * @matrix_dev: a mediated matrix device + * @apid: an AP adapter ID + * @apqi: an AP queue index + * + * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device + * driver according to the following rules: + * + * - If both @apid and @apqi are not NULL, then there must be an AP queue + * device bound to the vfio_ap driver with the APQN identified by @apid and + * @apqi + * + * - If only @apid is not NULL, then there must be an AP queue device bound + * to the vfio_ap driver with an APQN containing @apid + * + * - If only @apqi is not NULL, then there must be an AP queue device bound + * to the vfio_ap driver with an APQN containing @apqi + * + * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL. + */ +static int vfio_ap_verify_queue_reserved(unsigned long *apid, + unsigned long *apqi) +{ + int ret; + struct vfio_ap_queue_reserved qres; + + qres.apid = apid; + qres.apqi = apqi; + qres.reserved = false; + + ret = driver_for_each_device(matrix_dev->device.driver, NULL, &qres, + vfio_ap_has_queue); + if (ret) + return ret; + + if (qres.reserved) + return 0; + + return -EADDRNOTAVAIL; +} + +static int +vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid) +{ + int ret; + unsigned long apqi; + unsigned long nbits = matrix_mdev->matrix.aqm_max + 1; + + if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits) + return vfio_ap_verify_queue_reserved(&apid, NULL); + + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) { + ret = vfio_ap_verify_queue_reserved(&apid, &apqi); + if (ret) + return ret; + } + + return 0; +} + +/** + * vfio_ap_mdev_verify_no_sharing + * + * Verifies that the APQNs derived from the cross product of the AP adapter IDs + * and AP queue indexes comprising the AP matrix are not configured for another + * mediated device. AP queue sharing is not allowed. + * + * @matrix_mdev: the mediated matrix device + * + * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE. + */ +static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) +{ + struct ap_matrix_mdev *lstdev; + DECLARE_BITMAP(apm, AP_DEVICES); + DECLARE_BITMAP(aqm, AP_DOMAINS); + + list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) { + if (matrix_mdev == lstdev) + continue; + + memset(apm, 0, sizeof(apm)); + memset(aqm, 0, sizeof(aqm)); + + /* + * We work on full longs, as we can only exclude the leftover + * bits in non-inverse order. The leftover is all zeros. + */ + if (!bitmap_and(apm, matrix_mdev->matrix.apm, + lstdev->matrix.apm, AP_DEVICES)) + continue; + + if (!bitmap_and(aqm, matrix_mdev->matrix.aqm, + lstdev->matrix.aqm, AP_DOMAINS)) + continue; + + return -EADDRINUSE; + } + + return 0; +} + +/** + * assign_adapter_store + * + * @dev: the matrix device + * @attr: the mediated matrix device's assign_adapter attribute + * @buf: a buffer containing the AP adapter number (APID) to + * be assigned + * @count: the number of bytes in @buf + * + * Parses the APID from @buf and sets the corresponding bit in the mediated + * matrix device's APM. + * + * Returns the number of bytes processed if the APID is valid; otherwise, + * returns one of the following errors: + * + * 1. -EINVAL + * The APID is not a valid number + * + * 2. -ENODEV + * The APID exceeds the maximum value configured for the system + * + * 3. -EADDRNOTAVAIL + * An APQN derived from the cross product of the APID being assigned + * and the APQIs previously assigned is not bound to the vfio_ap device + * driver; or, if no APQIs have yet been assigned, the APID is not + * contained in an APQN bound to the vfio_ap device driver. + * + * 4. -EADDRINUSE + * An APQN derived from the cross product of the APID being assigned + * and the APQIs previously assigned is being used by another mediated + * matrix device + */ +static ssize_t assign_adapter_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long apid; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + /* If the guest is running, disallow assignment of adapter */ + if (matrix_mdev->kvm) + return -EBUSY; + + ret = kstrtoul(buf, 0, &apid); + if (ret) + return ret; + + if (apid > matrix_mdev->matrix.apm_max) + return -ENODEV; + + /* + * Set the bit in the AP mask (APM) corresponding to the AP adapter + * number (APID). The bits in the mask, from most significant to least + * significant bit, correspond to APIDs 0-255. + */ + mutex_lock(&matrix_dev->lock); + + ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid); + if (ret) + goto done; + + set_bit_inv(apid, matrix_mdev->matrix.apm); + + ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); + if (ret) + goto share_err; + + ret = count; + goto done; + +share_err: + clear_bit_inv(apid, matrix_mdev->matrix.apm); +done: + mutex_unlock(&matrix_dev->lock); + + return ret; +} +static DEVICE_ATTR_WO(assign_adapter); + +/** + * unassign_adapter_store + * + * @dev: the matrix device + * @attr: the mediated matrix device's unassign_adapter attribute + * @buf: a buffer containing the adapter number (APID) to be unassigned + * @count: the number of bytes in @buf + * + * Parses the APID from @buf and clears the corresponding bit in the mediated + * matrix device's APM. + * + * Returns the number of bytes processed if the APID is valid; otherwise, + * returns one of the following errors: + * -EINVAL if the APID is not a number + * -ENODEV if the APID it exceeds the maximum value configured for the + * system + */ +static ssize_t unassign_adapter_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long apid; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + /* If the guest is running, disallow un-assignment of adapter */ + if (matrix_mdev->kvm) + return -EBUSY; + + ret = kstrtoul(buf, 0, &apid); + if (ret) + return ret; + + if (apid > matrix_mdev->matrix.apm_max) + return -ENODEV; + + mutex_lock(&matrix_dev->lock); + clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); + mutex_unlock(&matrix_dev->lock); + + return count; +} +static DEVICE_ATTR_WO(unassign_adapter); + +static int +vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev, + unsigned long apqi) +{ + int ret; + unsigned long apid; + unsigned long nbits = matrix_mdev->matrix.apm_max + 1; + + if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits) + return vfio_ap_verify_queue_reserved(NULL, &apqi); + + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) { + ret = vfio_ap_verify_queue_reserved(&apid, &apqi); + if (ret) + return ret; + } + + return 0; +} + +/** + * assign_domain_store + * + * @dev: the matrix device + * @attr: the mediated matrix device's assign_domain attribute + * @buf: a buffer containing the AP queue index (APQI) of the domain to + * be assigned + * @count: the number of bytes in @buf + * + * Parses the APQI from @buf and sets the corresponding bit in the mediated + * matrix device's AQM. + * + * Returns the number of bytes processed if the APQI is valid; otherwise returns + * one of the following errors: + * + * 1. -EINVAL + * The APQI is not a valid number + * + * 2. -ENODEV + * The APQI exceeds the maximum value configured for the system + * + * 3. -EADDRNOTAVAIL + * An APQN derived from the cross product of the APQI being assigned + * and the APIDs previously assigned is not bound to the vfio_ap device + * driver; or, if no APIDs have yet been assigned, the APQI is not + * contained in an APQN bound to the vfio_ap device driver. + * + * 4. -EADDRINUSE + * An APQN derived from the cross product of the APQI being assigned + * and the APIDs previously assigned is being used by another mediated + * matrix device + */ +static ssize_t assign_domain_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long apqi; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + unsigned long max_apqi = matrix_mdev->matrix.aqm_max; + + /* If the guest is running, disallow assignment of domain */ + if (matrix_mdev->kvm) + return -EBUSY; + + ret = kstrtoul(buf, 0, &apqi); + if (ret) + return ret; + if (apqi > max_apqi) + return -ENODEV; + + mutex_lock(&matrix_dev->lock); + + ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi); + if (ret) + goto done; + + set_bit_inv(apqi, matrix_mdev->matrix.aqm); + + ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); + if (ret) + goto share_err; + + ret = count; + goto done; + +share_err: + clear_bit_inv(apqi, matrix_mdev->matrix.aqm); +done: + mutex_unlock(&matrix_dev->lock); + + return ret; +} +static DEVICE_ATTR_WO(assign_domain); + + +/** + * unassign_domain_store + * + * @dev: the matrix device + * @attr: the mediated matrix device's unassign_domain attribute + * @buf: a buffer containing the AP queue index (APQI) of the domain to + * be unassigned + * @count: the number of bytes in @buf + * + * Parses the APQI from @buf and clears the corresponding bit in the + * mediated matrix device's AQM. + * + * Returns the number of bytes processed if the APQI is valid; otherwise, + * returns one of the following errors: + * -EINVAL if the APQI is not a number + * -ENODEV if the APQI exceeds the maximum value configured for the system + */ +static ssize_t unassign_domain_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long apqi; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + /* If the guest is running, disallow un-assignment of domain */ + if (matrix_mdev->kvm) + return -EBUSY; + + ret = kstrtoul(buf, 0, &apqi); + if (ret) + return ret; + + if (apqi > matrix_mdev->matrix.aqm_max) + return -ENODEV; + + mutex_lock(&matrix_dev->lock); + clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); + mutex_unlock(&matrix_dev->lock); + + return count; +} +static DEVICE_ATTR_WO(unassign_domain); + +/** + * assign_control_domain_store + * + * @dev: the matrix device + * @attr: the mediated matrix device's assign_control_domain attribute + * @buf: a buffer containing the domain ID to be assigned + * @count: the number of bytes in @buf + * + * Parses the domain ID from @buf and sets the corresponding bit in the mediated + * matrix device's ADM. + * + * Returns the number of bytes processed if the domain ID is valid; otherwise, + * returns one of the following errors: + * -EINVAL if the ID is not a number + * -ENODEV if the ID exceeds the maximum value configured for the system + */ +static ssize_t assign_control_domain_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long id; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + /* If the guest is running, disallow assignment of control domain */ + if (matrix_mdev->kvm) + return -EBUSY; + + ret = kstrtoul(buf, 0, &id); + if (ret) + return ret; + + if (id > matrix_mdev->matrix.adm_max) + return -ENODEV; + + /* Set the bit in the ADM (bitmask) corresponding to the AP control + * domain number (id). The bits in the mask, from most significant to + * least significant, correspond to IDs 0 up to the one less than the + * number of control domains that can be assigned. + */ + mutex_lock(&matrix_dev->lock); + set_bit_inv(id, matrix_mdev->matrix.adm); + mutex_unlock(&matrix_dev->lock); + + return count; +} +static DEVICE_ATTR_WO(assign_control_domain); + +/** + * unassign_control_domain_store + * + * @dev: the matrix device + * @attr: the mediated matrix device's unassign_control_domain attribute + * @buf: a buffer containing the domain ID to be unassigned + * @count: the number of bytes in @buf + * + * Parses the domain ID from @buf and clears the corresponding bit in the + * mediated matrix device's ADM. + * + * Returns the number of bytes processed if the domain ID is valid; otherwise, + * returns one of the following errors: + * -EINVAL if the ID is not a number + * -ENODEV if the ID exceeds the maximum value configured for the system + */ +static ssize_t unassign_control_domain_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long domid; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + unsigned long max_domid = matrix_mdev->matrix.adm_max; + + /* If the guest is running, disallow un-assignment of control domain */ + if (matrix_mdev->kvm) + return -EBUSY; + + ret = kstrtoul(buf, 0, &domid); + if (ret) + return ret; + if (domid > max_domid) + return -ENODEV; + + mutex_lock(&matrix_dev->lock); + clear_bit_inv(domid, matrix_mdev->matrix.adm); + mutex_unlock(&matrix_dev->lock); + + return count; +} +static DEVICE_ATTR_WO(unassign_control_domain); + +static ssize_t control_domains_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + unsigned long id; + int nchars = 0; + int n; + char *bufpos = buf; + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + unsigned long max_domid = matrix_mdev->matrix.adm_max; + + mutex_lock(&matrix_dev->lock); + for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { + n = sprintf(bufpos, "%04lx\n", id); + bufpos += n; + nchars += n; + } + mutex_unlock(&matrix_dev->lock); + + return nchars; +} +static DEVICE_ATTR_RO(control_domains); + +static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mdev_device *mdev = mdev_from_dev(dev); + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + char *bufpos = buf; + unsigned long apid; + unsigned long apqi; + unsigned long apid1; + unsigned long apqi1; + unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1; + unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1; + int nchars = 0; + int n; + + apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits); + apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits); + + mutex_lock(&matrix_dev->lock); + + if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, + naqm_bits) { + n = sprintf(bufpos, "%02lx.%04lx\n", apid, + apqi); + bufpos += n; + nchars += n; + } + } + } else if (apid1 < napm_bits) { + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { + n = sprintf(bufpos, "%02lx.\n", apid); + bufpos += n; + nchars += n; + } + } else if (apqi1 < naqm_bits) { + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) { + n = sprintf(bufpos, ".%04lx\n", apqi); + bufpos += n; + nchars += n; + } + } + + mutex_unlock(&matrix_dev->lock); + + return nchars; +} +static DEVICE_ATTR_RO(matrix); + +static struct attribute *vfio_ap_mdev_attrs[] = { + &dev_attr_assign_adapter.attr, + &dev_attr_unassign_adapter.attr, + &dev_attr_assign_domain.attr, + &dev_attr_unassign_domain.attr, + &dev_attr_assign_control_domain.attr, + &dev_attr_unassign_control_domain.attr, + &dev_attr_control_domains.attr, + &dev_attr_matrix.attr, + NULL, +}; + +static struct attribute_group vfio_ap_mdev_attr_group = { + .attrs = vfio_ap_mdev_attrs +}; + +static const struct attribute_group *vfio_ap_mdev_attr_groups[] = { + &vfio_ap_mdev_attr_group, + NULL +}; + +/** + * vfio_ap_mdev_set_kvm + * + * @matrix_mdev: a mediated matrix device + * @kvm: reference to KVM instance + * + * Verifies no other mediated matrix device has @kvm and sets a reference to + * it in @matrix_mdev->kvm. + * + * Return 0 if no other mediated matrix device has a reference to @kvm; + * otherwise, returns an -EPERM. + */ +static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, + struct kvm *kvm) +{ + struct ap_matrix_mdev *m; + + mutex_lock(&matrix_dev->lock); + + list_for_each_entry(m, &matrix_dev->mdev_list, node) { + if ((m != matrix_mdev) && (m->kvm == kvm)) { + mutex_unlock(&matrix_dev->lock); + return -EPERM; + } + } + + matrix_mdev->kvm = kvm; + mutex_unlock(&matrix_dev->lock); + + return 0; +} + +static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret; + struct ap_matrix_mdev *matrix_mdev; + + if (action != VFIO_GROUP_NOTIFY_SET_KVM) + return NOTIFY_OK; + + matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); + + if (!data) { + matrix_mdev->kvm = NULL; + return NOTIFY_OK; + } + + ret = vfio_ap_mdev_set_kvm(matrix_mdev, data); + if (ret) + return NOTIFY_DONE; + + /* If there is no CRYCB pointer, then we can't copy the masks */ + if (!matrix_mdev->kvm->arch.crypto.crycbd) + return NOTIFY_DONE; + + kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm, + matrix_mdev->matrix.aqm, + matrix_mdev->matrix.adm); + + return NOTIFY_OK; +} + +static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, + unsigned int retry) +{ + struct ap_queue_status status; + + do { + status = ap_zapq(AP_MKQID(apid, apqi)); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + return 0; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + msleep(20); + break; + default: + /* things are really broken, give up */ + return -EIO; + } + } while (retry--); + + return -EBUSY; +} + +static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) +{ + int ret; + int rc = 0; + unsigned long apid, apqi; + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, + matrix_mdev->matrix.apm_max + 1) { + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, + matrix_mdev->matrix.aqm_max + 1) { + ret = vfio_ap_mdev_reset_queue(apid, apqi, 1); + /* + * Regardless whether a queue turns out to be busy, or + * is not operational, we need to continue resetting + * the remaining queues. + */ + if (ret) + rc = ret; + } + } + + return rc; +} + +static int vfio_ap_mdev_open(struct mdev_device *mdev) +{ + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + unsigned long events; + int ret; + + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; + events = VFIO_GROUP_NOTIFY_SET_KVM; + + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, + &events, &matrix_mdev->group_notifier); + if (ret) { + module_put(THIS_MODULE); + return ret; + } + + return 0; +} + +static void vfio_ap_mdev_release(struct mdev_device *mdev) +{ + struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + + if (matrix_mdev->kvm) + kvm_arch_crypto_clear_masks(matrix_mdev->kvm); + + vfio_ap_mdev_reset_queues(mdev); + vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, + &matrix_mdev->group_notifier); + matrix_mdev->kvm = NULL; + module_put(THIS_MODULE); +} + +static int vfio_ap_mdev_get_device_info(unsigned long arg) +{ + unsigned long minsz; + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET; + info.num_regions = 0; + info.num_irqs = 0; + + return copy_to_user((void __user *)arg, &info, minsz); +} + +static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, + unsigned int cmd, unsigned long arg) +{ + int ret; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + ret = vfio_ap_mdev_get_device_info(arg); + break; + case VFIO_DEVICE_RESET: + ret = vfio_ap_mdev_reset_queues(mdev); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +static const struct mdev_parent_ops vfio_ap_matrix_ops = { + .owner = THIS_MODULE, + .supported_type_groups = vfio_ap_mdev_type_groups, + .mdev_attr_groups = vfio_ap_mdev_attr_groups, + .create = vfio_ap_mdev_create, + .remove = vfio_ap_mdev_remove, + .open = vfio_ap_mdev_open, + .release = vfio_ap_mdev_release, + .ioctl = vfio_ap_mdev_ioctl, +}; + +int vfio_ap_mdev_register(void) +{ + atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT); + + return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops); +} + +void vfio_ap_mdev_unregister(void) +{ + mdev_unregister_device(&matrix_dev->device); +} diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h new file mode 100644 index 000000000000..5675492233c7 --- /dev/null +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Private data and functions for adjunct processor VFIO matrix driver. + * + * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> + * Halil Pasic <pasic@linux.ibm.com> + * + * Copyright IBM Corp. 2018 + */ + +#ifndef _VFIO_AP_PRIVATE_H_ +#define _VFIO_AP_PRIVATE_H_ + +#include <linux/types.h> +#include <linux/device.h> +#include <linux/mdev.h> +#include <linux/delay.h> +#include <linux/mutex.h> + +#include "ap_bus.h" + +#define VFIO_AP_MODULE_NAME "vfio_ap" +#define VFIO_AP_DRV_NAME "vfio_ap" + +/** + * ap_matrix_dev - the AP matrix device structure + * @device: generic device structure associated with the AP matrix device + * @available_instances: number of mediated matrix devices that can be created + * @info: the struct containing the output from the PQAP(QCI) instruction + * mdev_list: the list of mediated matrix devices created + * lock: mutex for locking the AP matrix device. This lock will be + * taken every time we fiddle with state managed by the vfio_ap + * driver, be it using @mdev_list or writing the state of a + * single ap_matrix_mdev device. It's quite coarse but we don't + * expect much contention. + */ +struct ap_matrix_dev { + struct device device; + atomic_t available_instances; + struct ap_config_info info; + struct list_head mdev_list; + struct mutex lock; +}; + +extern struct ap_matrix_dev *matrix_dev; + +/** + * The AP matrix is comprised of three bit masks identifying the adapters, + * queues (domains) and control domains that belong to an AP matrix. The bits i + * each mask, from least significant to most significant bit, correspond to IDs + * 0 to 255. When a bit is set, the corresponding ID belongs to the matrix. + * + * @apm_max: max adapter number in @apm + * @apm identifies the AP adapters in the matrix + * @aqm_max: max domain number in @aqm + * @aqm identifies the AP queues (domains) in the matrix + * @adm_max: max domain number in @adm + * @adm identifies the AP control domains in the matrix + */ +struct ap_matrix { + unsigned long apm_max; + DECLARE_BITMAP(apm, 256); + unsigned long aqm_max; + DECLARE_BITMAP(aqm, 256); + unsigned long adm_max; + DECLARE_BITMAP(adm, 256); +}; + +/** + * struct ap_matrix_mdev - the mediated matrix device structure + * @list: allows the ap_matrix_mdev struct to be added to a list + * @matrix: the adapters, usage domains and control domains assigned to the + * mediated matrix device. + * @group_notifier: notifier block used for specifying callback function for + * handling the VFIO_GROUP_NOTIFY_SET_KVM event + * @kvm: the struct holding guest's state + */ +struct ap_matrix_mdev { + struct list_head node; + struct ap_matrix matrix; + struct notifier_block group_notifier; + struct kvm *kvm; +}; + +extern int vfio_ap_mdev_register(void); +extern void vfio_ap_mdev_unregister(void); + +#endif /* _VFIO_AP_PRIVATE_H_ */ diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 96721b154454..b30926e11d87 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -444,7 +444,7 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; int ret; unsigned long hpa = 0; - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); if (!pua) return; @@ -467,8 +467,27 @@ static int tce_iommu_clear(struct tce_container *container, unsigned long oldhpa; long ret; enum dma_data_direction direction; + unsigned long lastentry = entry + pages; + + for ( ; entry < lastentry; ++entry) { + if (tbl->it_indirect_levels && tbl->it_userspace) { + /* + * For multilevel tables, we can take a shortcut here + * and skip some TCEs as we know that the userspace + * addresses cache is a mirror of the real TCE table + * and if it is missing some indirect levels, then + * the hardware table does not have them allocated + * either and therefore does not require updating. + */ + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, + entry); + if (!pua) { + /* align to level_size which is power of two */ + entry |= tbl->it_level_size - 1; + continue; + } + } - for ( ; pages; --pages, ++entry) { cond_resched(); direction = DMA_NONE; |