diff options
-rw-r--r-- | MAINTAINERS | 14 | ||||
-rw-r--r-- | arch/s390/include/asm/pci.h | 4 | ||||
-rw-r--r-- | arch/s390/pci/pci_bus.c | 5 | ||||
-rw-r--r-- | arch/s390/pci/pci_clp.c | 2 | ||||
-rw-r--r-- | drivers/pci/iov.c | 1 | ||||
-rw-r--r-- | drivers/vfio/Kconfig | 1 | ||||
-rw-r--r-- | drivers/vfio/Makefile | 1 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/Kconfig | 9 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/Makefile | 4 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc.c | 683 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 194 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 55 | ||||
-rw-r--r-- | drivers/vfio/pci/Kconfig | 12 | ||||
-rw-r--r-- | drivers/vfio/pci/Makefile | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 38 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 27 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 12 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_zdev.c | 143 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 9 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 23 | ||||
-rw-r--r-- | include/linux/pci.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 29 | ||||
-rw-r--r-- | include/uapi/linux/vfio_zdev.h | 78 |
24 files changed, 1330 insertions, 20 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8b82a02e609c..f5570ae5f451 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15287,6 +15287,14 @@ F: Documentation/s390/vfio-ccw.rst F: drivers/s390/cio/vfio_ccw* F: include/uapi/linux/vfio_ccw.h +S390 VFIO-PCI DRIVER +M: Matthew Rosato <mjrosato@linux.ibm.com> +L: linux-s390@vger.kernel.org +L: kvm@vger.kernel.org +S: Supported +F: drivers/vfio/pci/vfio_pci_zdev.c +F: include/uapi/linux/vfio_zdev.h + S390 ZCRYPT DRIVER M: Harald Freudenberger <freude@linux.ibm.com> L: linux-s390@vger.kernel.org @@ -18385,6 +18393,12 @@ F: drivers/vfio/ F: include/linux/vfio.h F: include/uapi/linux/vfio.h +VFIO FSL-MC DRIVER +M: Diana Craciun <diana.craciun@oss.nxp.com> +L: kvm@vger.kernel.org +S: Maintained +F: drivers/vfio/fsl-mc/ + VFIO MEDIATED DEVICE DRIVERS M: Kirti Wankhede <kwankhede@nvidia.com> L: kvm@vger.kernel.org diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b5380a251df2..212628932ddc 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -132,7 +132,8 @@ struct zpci_dev { u8 rid_available : 1; u8 has_hp_slot : 1; u8 is_physfn : 1; - u8 reserved : 5; + u8 util_str_avail : 1; + u8 reserved : 4; unsigned int devfn; /* DEVFN part of the RID*/ struct mutex lock; @@ -179,6 +180,7 @@ struct zpci_dev { atomic64_t mapped_pages; atomic64_t unmapped_pages; + u8 version; enum pci_bus_speed max_bus_speed; struct dentry *debugfs_dev; diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 0c0db7c3a404..755b46f4c595 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -135,9 +135,10 @@ void pcibios_bus_add_device(struct pci_dev *pdev) * With pdev->no_vf_scan the common PCI probing code does not * perform PF/VF linking. */ - if (zdev->vfn) + if (zdev->vfn) { zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn); - + pdev->no_command_memory = 1; + } } static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 5a34a1359dc5..153720d21ae7 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -102,6 +102,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, zdev->msi_addr = response->msia; zdev->max_msi = response->noi; zdev->fmb_update = response->mui; + zdev->version = response->version; switch (response->version) { case 1: @@ -167,6 +168,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, if (response->util_str_avail) { memcpy(zdev->util_str, response->util_str, sizeof(zdev->util_str)); + zdev->util_str_avail = 1; } zdev->mio_capable = response->mio_addr_avail; for (i = 0; i < PCI_STD_NUM_BARS; i++) { diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index b37e08c4f9d1..4afd4ee4f7f0 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -180,6 +180,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id) virtfn->device = iov->vf_device; virtfn->is_virtfn = 1; virtfn->physfn = pci_dev_get(dev); + virtfn->no_command_memory = 1; if (id == 0) pci_read_vf_config_common(virtfn); diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index fd17db9b432f..5533df91b257 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -47,4 +47,5 @@ menuconfig VFIO_NOIOMMU source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" +source "drivers/vfio/fsl-mc/Kconfig" source "virt/lib/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index de67c4725cce..fee73f3d9480 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ obj-$(CONFIG_VFIO_PLATFORM) += platform/ obj-$(CONFIG_VFIO_MDEV) += mdev/ +obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/ diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig new file mode 100644 index 000000000000..b1a527d6b6f2 --- /dev/null +++ b/drivers/vfio/fsl-mc/Kconfig @@ -0,0 +1,9 @@ +config VFIO_FSL_MC + tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" + depends on VFIO && FSL_MC_BUS && EVENTFD + help + Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc + (Management Complex) devices. This is required to passthrough + fsl-mc bus devices using the VFIO framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/fsl-mc/Makefile b/drivers/vfio/fsl-mc/Makefile new file mode 100644 index 000000000000..cad6dbf0b735 --- /dev/null +++ b/drivers/vfio/fsl-mc/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) + +vfio-fsl-mc-y := vfio_fsl_mc.o vfio_fsl_mc_intr.o +obj-$(CONFIG_VFIO_FSL_MC) += vfio-fsl-mc.o diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c new file mode 100644 index 000000000000..0113a980f974 --- /dev/null +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright 2013-2016 Freescale Semiconductor Inc. + * Copyright 2016-2017,2019-2020 NXP + */ + +#include <linux/device.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vfio.h> +#include <linux/fsl/mc.h> +#include <linux/delay.h> +#include <linux/io-64-nonatomic-hi-lo.h> + +#include "vfio_fsl_mc_private.h" + +static struct fsl_mc_driver vfio_fsl_mc_driver; + +static DEFINE_MUTEX(reflck_lock); + +static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck) +{ + kref_get(&reflck->kref); +} + +static void vfio_fsl_mc_reflck_release(struct kref *kref) +{ + struct vfio_fsl_mc_reflck *reflck = container_of(kref, + struct vfio_fsl_mc_reflck, + kref); + + mutex_destroy(&reflck->lock); + kfree(reflck); + mutex_unlock(&reflck_lock); +} + +static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck) +{ + kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock); +} + +static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void) +{ + struct vfio_fsl_mc_reflck *reflck; + + reflck = kzalloc(sizeof(*reflck), GFP_KERNEL); + if (!reflck) + return ERR_PTR(-ENOMEM); + + kref_init(&reflck->kref); + mutex_init(&reflck->lock); + + return reflck; +} + +static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev) +{ + int ret = 0; + + mutex_lock(&reflck_lock); + if (is_fsl_mc_bus_dprc(vdev->mc_dev)) { + vdev->reflck = vfio_fsl_mc_reflck_alloc(); + ret = PTR_ERR_OR_ZERO(vdev->reflck); + } else { + struct device *mc_cont_dev = vdev->mc_dev->dev.parent; + struct vfio_device *device; + struct vfio_fsl_mc_device *cont_vdev; + + device = vfio_device_get_from_dev(mc_cont_dev); + if (!device) { + ret = -ENODEV; + goto unlock; + } + + cont_vdev = vfio_device_data(device); + if (!cont_vdev || !cont_vdev->reflck) { + vfio_device_put(device); + ret = -ENODEV; + goto unlock; + } + vfio_fsl_mc_reflck_get(cont_vdev->reflck); + vdev->reflck = cont_vdev->reflck; + vfio_device_put(device); + } + +unlock: + mutex_unlock(&reflck_lock); + return ret; +} + +static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int count = mc_dev->obj_desc.region_count; + int i; + + vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region), + GFP_KERNEL); + if (!vdev->regions) + return -ENOMEM; + + for (i = 0; i < count; i++) { + struct resource *res = &mc_dev->regions[i]; + int no_mmap = is_fsl_mc_bus_dprc(mc_dev); + + vdev->regions[i].addr = res->start; + vdev->regions[i].size = resource_size(res); + vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS; + /* + * Only regions addressed with PAGE granularity may be + * MMAPed securely. + */ + if (!no_mmap && !(vdev->regions[i].addr & ~PAGE_MASK) && + !(vdev->regions[i].size & ~PAGE_MASK)) + vdev->regions[i].flags |= + VFIO_REGION_INFO_FLAG_MMAP; + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY)) + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE; + } + + return 0; +} + +static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int i; + + for (i = 0; i < mc_dev->obj_desc.region_count; i++) + iounmap(vdev->regions[i].ioaddr); + kfree(vdev->regions); +} + +static int vfio_fsl_mc_open(void *device_data) +{ + struct vfio_fsl_mc_device *vdev = device_data; + int ret; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&vdev->reflck->lock); + if (!vdev->refcnt) { + ret = vfio_fsl_mc_regions_init(vdev); + if (ret) + goto err_reg_init; + } + vdev->refcnt++; + + mutex_unlock(&vdev->reflck->lock); + + return 0; + +err_reg_init: + mutex_unlock(&vdev->reflck->lock); + module_put(THIS_MODULE); + return ret; +} + +static void vfio_fsl_mc_release(void *device_data) +{ + struct vfio_fsl_mc_device *vdev = device_data; + int ret; + + mutex_lock(&vdev->reflck->lock); + + if (!(--vdev->refcnt)) { + struct fsl_mc_device *mc_dev = vdev->mc_dev; + struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); + struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); + + vfio_fsl_mc_regions_cleanup(vdev); + + /* reset the device before cleaning up the interrupts */ + ret = dprc_reset_container(mc_cont->mc_io, 0, + mc_cont->mc_handle, + mc_cont->obj_desc.id, + DPRC_RESET_OPTION_NON_RECURSIVE); + + if (ret) { + dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n", + ret); + WARN_ON(1); + } + + vfio_fsl_mc_irqs_cleanup(vdev); + + fsl_mc_cleanup_irq_pool(mc_cont); + } + + mutex_unlock(&vdev->reflck->lock); + + module_put(THIS_MODULE); +} + +static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd, + unsigned long arg) +{ + unsigned long minsz; + struct vfio_fsl_mc_device *vdev = device_data; + struct fsl_mc_device *mc_dev = vdev->mc_dev; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.flags = VFIO_DEVICE_FLAGS_FSL_MC; + + if (is_fsl_mc_bus_dprc(mc_dev)) + info.flags |= VFIO_DEVICE_FLAGS_RESET; + + info.num_regions = mc_dev->obj_desc.region_count; + info.num_irqs = mc_dev->obj_desc.irq_count; + + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= mc_dev->obj_desc.region_count) + return -EINVAL; + + /* map offset to the physical address */ + info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index); + info.size = vdev->regions[info.index].size; + info.flags = vdev->regions[info.index].flags; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= mc_dev->obj_desc.irq_count) + return -EINVAL; + + info.flags = VFIO_IRQ_INFO_EVENTFD; + info.count = 1; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_SET_IRQS: + { + struct vfio_irq_set hdr; + u8 *data = NULL; + int ret = 0; + size_t data_size = 0; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + ret = vfio_set_irqs_validate_and_prepare(&hdr, mc_dev->obj_desc.irq_count, + mc_dev->obj_desc.irq_count, &data_size); + if (ret) + return ret; + + if (data_size) { + data = memdup_user((void __user *)(arg + minsz), + data_size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + mutex_lock(&vdev->igate); + ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags, + hdr.index, hdr.start, + hdr.count, data); + mutex_unlock(&vdev->igate); + kfree(data); + + return ret; + } + case VFIO_DEVICE_RESET: + { + int ret; + struct fsl_mc_device *mc_dev = vdev->mc_dev; + + /* reset is supported only for the DPRC */ + if (!is_fsl_mc_bus_dprc(mc_dev)) + return -ENOTTY; + + ret = dprc_reset_container(mc_dev->mc_io, 0, + mc_dev->mc_handle, + mc_dev->obj_desc.id, + DPRC_RESET_OPTION_NON_RECURSIVE); + return ret; + + } + default: + return -ENOTTY; + } +} + +static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_fsl_mc_device *vdev = device_data; + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; + struct fsl_mc_device *mc_dev = vdev->mc_dev; + struct vfio_fsl_mc_region *region; + u64 data[8]; + int i; + + if (index >= mc_dev->obj_desc.region_count) + return -EINVAL; + + region = &vdev->regions[index]; + + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ)) + return -EINVAL; + + if (!region->ioaddr) { + region->ioaddr = ioremap(region->addr, region->size); + if (!region->ioaddr) + return -ENOMEM; + } + + if (count != 64 || off != 0) + return -EINVAL; + + for (i = 7; i >= 0; i--) + data[i] = readq(region->ioaddr + i * sizeof(uint64_t)); + + if (copy_to_user(buf, data, 64)) + return -EFAULT; + + return count; +} + +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000 +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500 + +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data) +{ + int i; + enum mc_cmd_status status; + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000; + + /* Write at command parameter into portal */ + for (i = 7; i >= 1; i--) + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t)); + + /* Write command header in the end */ + writeq(cmd_data[0], ioaddr); + + /* Wait for response before returning to user-space + * This can be optimized in future to even prepare response + * before returning to user-space and avoid read ioctl. + */ + for (;;) { + u64 header; + struct mc_cmd_header *resp_hdr; + + header = cpu_to_le64(readq_relaxed(ioaddr)); + + resp_hdr = (struct mc_cmd_header *)&header; + status = (enum mc_cmd_status)resp_hdr->status; + if (status != MC_CMD_STATUS_READY) + break; + + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS); + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS; + if (timeout_usecs == 0) + return -ETIMEDOUT; + } + + return 0; +} + +static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_fsl_mc_device *vdev = device_data; + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; + struct fsl_mc_device *mc_dev = vdev->mc_dev; + struct vfio_fsl_mc_region *region; + u64 data[8]; + int ret; + + if (index >= mc_dev->obj_desc.region_count) + return -EINVAL; + + region = &vdev->regions[index]; + + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE)) + return -EINVAL; + + if (!region->ioaddr) { + region->ioaddr = ioremap(region->addr, region->size); + if (!region->ioaddr) + return -ENOMEM; + } + + if (count != 64 || off != 0) + return -EINVAL; + + if (copy_from_user(&data, buf, 64)) + return -EFAULT; + + ret = vfio_fsl_mc_send_command(region->ioaddr, data); + if (ret) + return ret; + + return count; + +} + +static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region, + struct vm_area_struct *vma) +{ + u64 size = vma->vm_end - vma->vm_start; + u64 pgoff, base; + u8 region_cacheable; + + pgoff = vma->vm_pgoff & + ((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + base = pgoff << PAGE_SHIFT; + + if (region.size < PAGE_SIZE || base + size > region.size) + return -EINVAL; + + region_cacheable = (region.type & FSL_MC_REGION_CACHEABLE) && + (region.type & FSL_MC_REGION_SHAREABLE); + if (!region_cacheable) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff; + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + size, vma->vm_page_prot); +} + +static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma) +{ + struct vfio_fsl_mc_device *vdev = device_data; + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int index; + + index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT); + + if (vma->vm_end < vma->vm_start) + return -EINVAL; + if (vma->vm_start & ~PAGE_MASK) + return -EINVAL; + if (vma->vm_end & ~PAGE_MASK) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (index >= mc_dev->obj_desc.region_count) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) + && (vma->vm_flags & VM_READ)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) + && (vma->vm_flags & VM_WRITE)) + return -EINVAL; + + vma->vm_private_data = mc_dev; + + return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma); +} + +static const struct vfio_device_ops vfio_fsl_mc_ops = { + .name = "vfio-fsl-mc", + .open = vfio_fsl_mc_open, + .release = vfio_fsl_mc_release, + .ioctl = vfio_fsl_mc_ioctl, + .read = vfio_fsl_mc_read, + .write = vfio_fsl_mc_write, + .mmap = vfio_fsl_mc_mmap, +}; + +static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct vfio_fsl_mc_device *vdev = container_of(nb, + struct vfio_fsl_mc_device, nb); + struct device *dev = data; + struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); + struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent); + + if (action == BUS_NOTIFY_ADD_DEVICE && + vdev->mc_dev == mc_cont) { + mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s", + vfio_fsl_mc_ops.name); + if (!mc_dev->driver_override) + dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n", + dev_name(&mc_cont->dev)); + else + dev_info(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s\n", + dev_name(&mc_cont->dev)); + } else if (action == BUS_NOTIFY_BOUND_DRIVER && + vdev->mc_dev == mc_cont) { + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); + + if (mc_drv && mc_drv != &vfio_fsl_mc_driver) + dev_warn(dev, "VFIO_FSL_MC: Object %s bound to driver %s while DPRC bound to vfio-fsl-mc\n", + dev_name(dev), mc_drv->driver.name); + } + + return 0; +} + +static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int ret; + + /* Non-dprc devices share mc_io from parent */ + if (!is_fsl_mc_bus_dprc(mc_dev)) { + struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent); + + mc_dev->mc_io = mc_cont->mc_io; + return 0; + } + + vdev->nb.notifier_call = vfio_fsl_mc_bus_notifier; + ret = bus_register_notifier(&fsl_mc_bus_type, &vdev->nb); + if (ret) + return ret; + + /* open DPRC, allocate a MC portal */ + ret = dprc_setup(mc_dev); + if (ret) { + dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret); + goto out_nc_unreg; + } + + ret = dprc_scan_container(mc_dev, false); + if (ret) { + dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret); + goto out_dprc_cleanup; + } + + return 0; + +out_dprc_cleanup: + dprc_remove_devices(mc_dev, NULL, 0); + dprc_cleanup(mc_dev); +out_nc_unreg: + bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); + vdev->nb.notifier_call = NULL; + + return ret; +} + +static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) +{ + struct iommu_group *group; + struct vfio_fsl_mc_device *vdev; + struct device *dev = &mc_dev->dev; + int ret; + + group = vfio_iommu_group_get(dev); + if (!group) { + dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n"); + return -EINVAL; + } + + vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL); + if (!vdev) { + ret = -ENOMEM; + goto out_group_put; + } + + vdev->mc_dev = mc_dev; + + ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev); + if (ret) { + dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n"); + goto out_group_put; + } + + ret = vfio_fsl_mc_reflck_attach(vdev); + if (ret) + goto out_group_dev; + + ret = vfio_fsl_mc_init_device(vdev); + if (ret) + goto out_reflck; + + mutex_init(&vdev->igate); + + return 0; + +out_reflck: + vfio_fsl_mc_reflck_put(vdev->reflck); +out_group_dev: + vfio_del_group_dev(dev); +out_group_put: + vfio_iommu_group_put(group, dev); + return ret; +} + +static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) +{ + struct vfio_fsl_mc_device *vdev; + struct device *dev = &mc_dev->dev; + + vdev = vfio_del_group_dev(dev); + if (!vdev) + return -EINVAL; + + mutex_destroy(&vdev->igate); + + vfio_fsl_mc_reflck_put(vdev->reflck); + + if (is_fsl_mc_bus_dprc(mc_dev)) { + dprc_remove_devices(mc_dev, NULL, 0); + dprc_cleanup(mc_dev); + } + + if (vdev->nb.notifier_call) + bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); + + vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); + + return 0; +} + +static struct fsl_mc_driver vfio_fsl_mc_driver = { + .probe = vfio_fsl_mc_probe, + .remove = vfio_fsl_mc_remove, + .driver = { + .name = "vfio-fsl-mc", + .owner = THIS_MODULE, + }, +}; + +static int __init vfio_fsl_mc_driver_init(void) +{ + return fsl_mc_driver_register(&vfio_fsl_mc_driver); +} + +static void __exit vfio_fsl_mc_driver_exit(void) +{ + fsl_mc_driver_unregister(&vfio_fsl_mc_driver); +} + +module_init(vfio_fsl_mc_driver_init); +module_exit(vfio_fsl_mc_driver_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("VFIO for FSL-MC devices - User Level meta-driver"); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c new file mode 100644 index 000000000000..c80dceb46f79 --- /dev/null +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright 2013-2016 Freescale Semiconductor Inc. + * Copyright 2019 NXP + */ + +#include <linux/vfio.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/eventfd.h> +#include <linux/msi.h> + +#include "linux/fsl/mc.h" +#include "vfio_fsl_mc_private.h" + +int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + struct vfio_fsl_mc_irq *mc_irq; + int irq_count; + int ret, i; + + /* Device does not support any interrupt */ + if (mc_dev->obj_desc.irq_count == 0) + return 0; + + /* interrupts were already allocated for this device */ + if (vdev->mc_irqs) + return 0; + + irq_count = mc_dev->obj_desc.irq_count; + + mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL); + if (!mc_irq) + return -ENOMEM; + + /* Allocate IRQs */ + ret = fsl_mc_allocate_irqs(mc_dev); + if (ret) { + kfree(mc_irq); + return ret; + } + + for (i = 0; i < irq_count; i++) { + mc_irq[i].count = 1; + mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD; + } + + vdev->mc_irqs = mc_irq; + + return 0; +} + +static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg) +{ + struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg; + + eventfd_signal(mc_irq->trigger, 1); + return IRQ_HANDLED; +} + +static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev, + int index, int fd) +{ + struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index]; + struct eventfd_ctx *trigger; + int hwirq; + int ret; + + hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq; + if (irq->trigger) { + free_irq(hwirq, irq); + kfree(irq->name); + eventfd_ctx_put(irq->trigger); + irq->trigger = NULL; + } + + if (fd < 0) /* Disable only */ + return 0; + + irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", + hwirq, dev_name(&vdev->mc_dev->dev)); + if (!irq->name) + return -ENOMEM; + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + kfree(irq->name); + return PTR_ERR(trigger); + } + + irq->trigger = trigger; + + ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0, + irq->name, irq); + if (ret) { + kfree(irq->name); + eventfd_ctx_put(trigger); + irq->trigger = NULL; + return ret; + } + + return 0; +} + +static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, + unsigned int index, unsigned int start, + unsigned int count, u32 flags, + void *data) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int ret, hwirq; + struct vfio_fsl_mc_irq *irq; + struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); + struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); + + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) + return vfio_set_trigger(vdev, index, -1); + + if (start != 0 || count != 1) + return -EINVAL; + + mutex_lock(&vdev->reflck->lock); + ret = fsl_mc_populate_irq_pool(mc_cont, + FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS); + if (ret) + goto unlock; + + ret = vfio_fsl_mc_irqs_allocate(vdev); + if (ret) + goto unlock; + mutex_unlock(&vdev->reflck->lock); + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + s32 fd = *(s32 *)data; + + return vfio_set_trigger(vdev, index, fd); + } + + hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq; + + irq = &vdev->mc_irqs[index]; + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + vfio_fsl_mc_irq_handler(hwirq, irq); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + u8 trigger = *(u8 *)data; + + if (trigger) + vfio_fsl_mc_irq_handler(hwirq, irq); + } + + return 0; + +unlock: + mutex_unlock(&vdev->reflck->lock); + return ret; + +} + +int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev, + u32 flags, unsigned int index, + unsigned int start, unsigned int count, + void *data) +{ + if (flags & VFIO_IRQ_SET_ACTION_TRIGGER) + return vfio_fsl_mc_set_irq_trigger(vdev, index, start, + count, flags, data); + else + return -EINVAL; +} + +/* Free All IRQs for the given MC object */ +void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int irq_count = mc_dev->obj_desc.irq_count; + int i; + + /* + * Device does not support any interrupt or the interrupts + * were not configured + */ + if (!vdev->mc_irqs) + return; + + for (i = 0; i < irq_count; i++) + vfio_set_trigger(vdev, i, -1); + + fsl_mc_free_irqs(mc_dev); + kfree(vdev->mc_irqs); + vdev->mc_irqs = NULL; +} diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h new file mode 100644 index 000000000000..a97ee691ed47 --- /dev/null +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright 2013-2016 Freescale Semiconductor Inc. + * Copyright 2016,2019-2020 NXP + */ + +#ifndef VFIO_FSL_MC_PRIVATE_H +#define VFIO_FSL_MC_PRIVATE_H + +#define VFIO_FSL_MC_OFFSET_SHIFT 40 +#define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1) + +#define VFIO_FSL_MC_OFFSET_TO_INDEX(off) ((off) >> VFIO_FSL_MC_OFFSET_SHIFT) + +#define VFIO_FSL_MC_INDEX_TO_OFFSET(index) \ + ((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT) + +struct vfio_fsl_mc_irq { + u32 flags; + u32 count; + struct eventfd_ctx *trigger; + char *name; +}; + +struct vfio_fsl_mc_reflck { + struct kref kref; + struct mutex lock; +}; + +struct vfio_fsl_mc_region { + u32 flags; + u32 type; + u64 addr; + resource_size_t size; + void __iomem *ioaddr; +}; + +struct vfio_fsl_mc_device { + struct fsl_mc_device *mc_dev; + struct notifier_block nb; + int refcnt; + struct vfio_fsl_mc_region *regions; + struct vfio_fsl_mc_reflck *reflck; + struct mutex igate; + struct vfio_fsl_mc_irq *mc_irqs; +}; + +extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev, + u32 flags, unsigned int index, + unsigned int start, unsigned int count, + void *data); + +void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev); + +#endif /* VFIO_FSL_MC_PRIVATE_H */ diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index ac3c1dd3edef..40a223381ab6 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -45,3 +45,15 @@ config VFIO_PCI_NVLINK2 depends on VFIO_PCI && PPC_POWERNV help VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs + +config VFIO_PCI_ZDEV + bool "VFIO PCI ZPCI device CLP support" + depends on VFIO_PCI && S390 + default y + help + Enabling this option exposes VFIO capabilities containing hardware + configuration for zPCI devices. This enables userspace (e.g. QEMU) + to supply proper configuration values instead of hard-coded defaults + for zPCI devices passed through via VFIO on s390. + + Say Y here. diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index f027f8a0e89c..781e0809d6ee 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -3,5 +3,6 @@ vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o +vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b0f4b92a87ed..fbd2b3404184 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -807,15 +807,25 @@ static long vfio_pci_ioctl(void *device_data, if (cmd == VFIO_DEVICE_GET_INFO) { struct vfio_device_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + unsigned long capsz; minsz = offsetofend(struct vfio_device_info, num_irqs); + /* For backward compatibility, cannot require this */ + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); + if (copy_from_user(&info, (void __user *)arg, minsz)) return -EFAULT; if (info.argsz < minsz) return -EINVAL; + if (info.argsz >= capsz) { + minsz = capsz; + info.cap_offset = 0; + } + info.flags = VFIO_DEVICE_FLAGS_PCI; if (vdev->reset_works) @@ -824,6 +834,33 @@ static long vfio_pci_ioctl(void *device_data, info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; + if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) { + int ret = vfio_pci_info_zdev_add_caps(vdev, &caps); + + if (ret && ret != -ENODEV) { + pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n"); + return ret; + } + } + + if (caps.size) { + info.flags |= VFIO_DEVICE_FLAGS_CAPS; + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; + } + info.cap_offset = sizeof(info); + } + + kfree(caps.buf); + } + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; @@ -1860,7 +1897,6 @@ static const struct vfio_device_ops vfio_pci_ops = { static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev); static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck); -static struct pci_driver vfio_pci_driver; static int vfio_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index d98843feddce..a402adee8a21 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -406,7 +406,7 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev) * PF SR-IOV capability, there's therefore no need to trigger * faults based on the virtual value. */ - return pdev->is_virtfn || (cmd & PCI_COMMAND_MEMORY); + return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY); } /* @@ -467,6 +467,9 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev) __le32 *vbar; u64 mask; + if (!vdev->bardirty) + return; + vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) { @@ -520,8 +523,8 @@ static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos, count = vfio_default_config_read(vdev, pos, count, perm, offset, val); - /* Mask in virtual memory enable for SR-IOV devices */ - if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) { + /* Mask in virtual memory enable */ + if (offset == PCI_COMMAND && vdev->pdev->no_command_memory) { u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); u32 tmp_val = le32_to_cpu(*val); @@ -589,9 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, * shows it disabled (phys_mem/io, then the device has * undergone some kind of backdoor reset and needs to be * restored before we allow it to enable the bars. - * SR-IOV devices will trigger this, but we catch them later + * SR-IOV devices will trigger this - for mem enable let's + * catch this now and for io enable it will be caught later */ - if ((new_mem && virt_mem && !phys_mem) || + if ((new_mem && virt_mem && !phys_mem && + !pdev->no_command_memory) || (new_io && virt_io && !phys_io) || vfio_need_bar_restore(vdev)) vfio_bar_restore(vdev); @@ -1734,12 +1739,14 @@ int vfio_config_init(struct vfio_pci_device *vdev) vconfig[PCI_INTERRUPT_PIN]); vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */ - + } + if (pdev->no_command_memory) { /* - * VFs do no implement the memory enable bit of the COMMAND - * register therefore we'll not have it set in our initial - * copy of config space after pci_enable_device(). For - * consistency with PFs, set the virtual enable bit here. + * VFs and devices that set pdev->no_command_memory do not + * implement the memory enable bit of the COMMAND register + * therefore we'll not have it set in our initial copy of + * config space after pci_enable_device(). For consistency + * with PFs, set the virtual enable bit here. */ *(__le16 *)&vconfig[PCI_COMMAND] |= cpu_to_le16(PCI_COMMAND_MEMORY); diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 1d9fb2592945..869dce5f134d 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -352,11 +352,13 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, vdev->ctx[vector].producer.token = trigger; vdev->ctx[vector].producer.irq = irq; ret = irq_bypass_register_producer(&vdev->ctx[vector].producer); - if (unlikely(ret)) + if (unlikely(ret)) { dev_info(&pdev->dev, "irq bypass producer (token %p) registration fails: %d\n", vdev->ctx[vector].producer.token, ret); + vdev->ctx[vector].producer.token = NULL; + } vdev->ctx[vector].trigger = trigger; return 0; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 61ca8ab165dc..5c90e560c5c7 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -213,4 +213,16 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) return -ENODEV; } #endif + +#ifdef CONFIG_VFIO_PCI_ZDEV +extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, + struct vfio_info_cap *caps); +#else +static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + return -ENODEV; +} +#endif + #endif /* VFIO_PCI_PRIVATE_H */ diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c new file mode 100644 index 000000000000..229685634031 --- /dev/null +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VFIO ZPCI devices support + * + * Copyright (C) IBM Corp. 2020. All rights reserved. + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * Matthew Rosato <mjrosato@linux.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/io.h> +#include <linux/pci.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> +#include <linux/vfio_zdev.h> +#include <asm/pci_clp.h> +#include <asm/pci_io.h> + +#include "vfio_pci_private.h" + +/* + * Add the Base PCI Function information to the device info region. + */ +static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + struct vfio_device_info_cap_zpci_base cap = { + .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE, + .header.version = 1, + .start_dma = zdev->start_dma, + .end_dma = zdev->end_dma, + .pchid = zdev->pchid, + .vfn = zdev->vfn, + .fmb_length = zdev->fmb_length, + .pft = zdev->pft, + .gid = zdev->pfgid + }; + + return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); +} + +/* + * Add the Base PCI Function Group information to the device info region. + */ +static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + struct vfio_device_info_cap_zpci_group cap = { + .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP, + .header.version = 1, + .dasm = zdev->dma_mask, + .msi_addr = zdev->msi_addr, + .flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH, + .mui = zdev->fmb_update, + .noi = zdev->max_msi, + .maxstbl = ZPCI_MAX_WRITE_SIZE, + .version = zdev->version + }; + + return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); +} + +/* + * Add the device utility string to the device info region. + */ +static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + struct vfio_device_info_cap_zpci_util *cap; + int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN; + int ret; + + cap = kmalloc(cap_size, GFP_KERNEL); + + cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL; + cap->header.version = 1; + cap->size = CLP_UTIL_STR_LEN; + memcpy(cap->util_str, zdev->util_str, cap->size); + + ret = vfio_info_add_capability(caps, &cap->header, cap_size); + + kfree(cap); + + return ret; +} + +/* + * Add the function path string to the device info region. + */ +static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + struct vfio_device_info_cap_zpci_pfip *cap; + int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS; + int ret; + + cap = kmalloc(cap_size, GFP_KERNEL); + + cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP; + cap->header.version = 1; + cap->size = CLP_PFIP_NR_SEGMENTS; + memcpy(cap->pfip, zdev->pfip, cap->size); + + ret = vfio_info_add_capability(caps, &cap->header, cap_size); + + kfree(cap); + + return ret; +} + +/* + * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain. + */ +int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + struct zpci_dev *zdev = to_zpci(vdev->pdev); + int ret; + + if (!zdev) + return -ENODEV; + + ret = zpci_base_cap(zdev, vdev, caps); + if (ret) + return ret; + + ret = zpci_group_cap(zdev, vdev, caps); + if (ret) + return ret; + + if (zdev->util_str_avail) { + ret = zpci_util_cap(zdev, vdev, caps); + if (ret) + return ret; + } + + ret = zpci_pfip_cap(zdev, vdev, caps); + + return ret; +} diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 262ab0efd06c..2151bc7f87ab 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1949,8 +1949,10 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, if (!group) return -ENODEV; - if (group->dev_counter > 1) - return -EINVAL; + if (group->dev_counter > 1) { + ret = -EINVAL; + goto err_pin_pages; + } ret = vfio_group_add_container_user(group); if (ret) @@ -2051,6 +2053,9 @@ int vfio_group_pin_pages(struct vfio_group *group, if (!group || !user_iova_pfn || !phys_pfn || !npage) return -EINVAL; + if (group->dev_counter > 1) + return -EINVAL; + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c255a6683f31..bb2684cc245e 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -693,7 +693,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); if (ret) { - vfio_unpin_page_external(dma, iova, do_accounting); + if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) + vfio_lock_acct(dma, -1, true); goto pin_unwind; } @@ -2609,6 +2610,20 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); } +static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu, + struct vfio_info_cap *caps) +{ + struct vfio_iommu_type1_info_dma_avail cap_dma_avail; + + cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL; + cap_dma_avail.header.version = 1; + + cap_dma_avail.avail = iommu->dma_avail; + + return vfio_info_add_capability(caps, &cap_dma_avail.header, + sizeof(cap_dma_avail)); +} + static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, unsigned long arg) { @@ -2642,6 +2657,9 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, ret = vfio_iommu_migration_build_caps(iommu, &caps); if (!ret) + ret = vfio_iommu_dma_avail_build_caps(iommu, &caps); + + if (!ret) ret = vfio_iommu_iova_build_caps(iommu, &caps); mutex_unlock(&iommu->lock); @@ -2933,7 +2951,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, * size */ bitmap_set(dma->bitmap, offset >> pgshift, - *copied >> pgshift); + ((offset + *copied - 1) >> pgshift) - + (offset >> pgshift) + 1); } } else *copied = copy_from_user(data, (void __user *)vaddr, diff --git a/include/linux/pci.h b/include/linux/pci.h index 32cd2b550736..22207a79762c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -446,6 +446,7 @@ struct pci_dev { unsigned int is_probed:1; /* Device probing in progress */ unsigned int link_active_reporting:1;/* Device capable of reporting link active */ unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ + unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 920470502329..2f313a238a8f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -201,8 +201,11 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ +#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ +#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ }; #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) @@ -218,6 +221,15 @@ struct vfio_device_info { #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" #define VFIO_DEVICE_API_AP_STRING "vfio-ap" +/* + * The following capabilities are unique to s390 zPCI devices. Their contents + * are further-defined in vfio_zdev.h + */ +#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 +#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 +#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 +#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) @@ -462,7 +474,7 @@ struct vfio_region_gfx_edid { * 5. Resumed * |--------->| * - * 0. Default state of VFIO device is _RUNNNG when the user application starts. + * 0. Default state of VFIO device is _RUNNING when the user application starts. * 1. During normal shutdown of the user application, the user application may * optionally change the VFIO device state from _RUNNING to _STOP. This * transition is optional. The vendor driver must support this transition but @@ -1039,6 +1051,21 @@ struct vfio_iommu_type1_info_cap_migration { __u64 max_dirty_bitmap_size; /* in bytes */ }; +/* + * The DMA available capability allows to report the current number of + * simultaneously outstanding DMA mappings that are allowed. + * + * The structure below defines version 1 of this capability. + * + * avail: specifies the current number of outstanding DMA mappings allowed. + */ +#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 + +struct vfio_iommu_type1_info_dma_avail { + struct vfio_info_cap_header header; + __u32 avail; +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h new file mode 100644 index 000000000000..b4309397b6b2 --- /dev/null +++ b/include/uapi/linux/vfio_zdev.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * VFIO Region definitions for ZPCI devices + * + * Copyright IBM Corp. 2020 + * + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * Matthew Rosato <mjrosato@linux.ibm.com> + */ + +#ifndef _VFIO_ZDEV_H_ +#define _VFIO_ZDEV_H_ + +#include <linux/types.h> +#include <linux/vfio.h> + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information + * + * This capability provides a set of descriptive information about the + * associated PCI function. + */ +struct vfio_device_info_cap_zpci_base { + struct vfio_info_cap_header header; + __u64 start_dma; /* Start of available DMA addresses */ + __u64 end_dma; /* End of available DMA addresses */ + __u16 pchid; /* Physical Channel ID */ + __u16 vfn; /* Virtual function number */ + __u16 fmb_length; /* Measurement Block Length (in bytes) */ + __u8 pft; /* PCI Function Type */ + __u8 gid; /* PCI function group ID */ +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information + * + * This capability provides a set of descriptive information about the group of + * PCI functions that the associated device belongs to. + */ +struct vfio_device_info_cap_zpci_group { + struct vfio_info_cap_header header; + __u64 dasm; /* DMA Address space mask */ + __u64 msi_addr; /* MSI address */ + __u64 flags; +#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */ + __u16 mui; /* Measurement Block Update Interval */ + __u16 noi; /* Maximum number of MSIs */ + __u16 maxstbl; /* Maximum Store Block Length */ + __u8 version; /* Supported PCI Version */ +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String + * + * This capability provides the utility string for the associated device, which + * is a device identifier string made up of EBCDID characters. 'size' specifies + * the length of 'util_str'. + */ +struct vfio_device_info_cap_zpci_util { + struct vfio_info_cap_header header; + __u32 size; + __u8 util_str[]; +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path + * + * This capability provides the PCI function path string, which is an identifier + * that describes the internal hardware path of the device. 'size' specifies + * the length of 'pfip'. + */ +struct vfio_device_info_cap_zpci_pfip { + struct vfio_info_cap_header header; + __u32 size; + __u8 pfip[]; +}; + +#endif |