summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/svm.c')
-rw-r--r--drivers/iommu/intel/svm.c643
1 files changed, 317 insertions, 326 deletions
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5165cea90421..9b0f22bc0514 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -17,19 +17,76 @@
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
+#include <linux/xarray.h>
#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
+#include <trace/events/intel_iommu.h>
#include "pasid.h"
+#include "perf.h"
+#include "../iommu-sva-lib.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, u32 pasid);
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
#define PRQ_ORDER 0
+static DEFINE_XARRAY_ALLOC(pasid_private_array);
+static int pasid_private_add(ioasid_t pasid, void *priv)
+{
+ return xa_alloc(&pasid_private_array, &pasid, priv,
+ XA_LIMIT(pasid, pasid), GFP_ATOMIC);
+}
+
+static void pasid_private_remove(ioasid_t pasid)
+{
+ xa_erase(&pasid_private_array, pasid);
+}
+
+static void *pasid_private_find(ioasid_t pasid)
+{
+ return xa_load(&pasid_private_array, pasid);
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid)
+{
+ struct intel_svm_dev *sdev = NULL, *t;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == sid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return sdev;
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
+{
+ struct intel_svm_dev *sdev = NULL, *t;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->dev == dev) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return sdev;
+}
+
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
+ struct iopf_queue *iopfq;
struct page *pages;
int irq, ret;
@@ -46,13 +103,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
ret = -EINVAL;
- err:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
- return ret;
+ goto free_prq;
}
iommu->pr_irq = irq;
+ snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
+ "dmar%d-iopfq", iommu->seq_id);
+ iopfq = iopf_queue_alloc(iommu->iopfq_name);
+ if (!iopfq) {
+ pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
+ ret = -ENOMEM;
+ goto free_hwirq;
+ }
+ iommu->iopf_queue = iopfq;
+
snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
@@ -60,9 +124,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
if (ret) {
pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
iommu->name);
- dmar_free_hwirq(irq);
- iommu->pr_irq = 0;
- goto err;
+ goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
@@ -71,6 +133,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
init_completion(&iommu->prq_complete);
return 0;
+
+free_iopfq:
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+free_hwirq:
+ dmar_free_hwirq(irq);
+ iommu->pr_irq = 0;
+free_prq:
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return ret;
}
int intel_svm_finish_prq(struct intel_iommu *iommu)
@@ -85,6 +159,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->pr_irq = 0;
}
+ if (iommu->iopf_queue) {
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+ }
+
free_pages((unsigned long)iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
@@ -204,17 +283,12 @@ static const struct mmu_notifier_ops intel_mmuops = {
};
static DEFINE_MUTEX(pasid_mutex);
-static LIST_HEAD(global_svm_list);
-
-#define for_each_svm_dev(sdev, svm, d) \
- list_for_each_entry((sdev), &(svm)->devs, list) \
- if ((d) != (sdev)->dev) {} else
static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
struct intel_svm **rsvm,
struct intel_svm_dev **rsdev)
{
- struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm_dev *sdev = NULL;
struct intel_svm *svm;
/* The caller should hold the pasid_mutex lock */
@@ -224,7 +298,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
return -EINVAL;
- svm = ioasid_find(NULL, pasid, NULL);
+ svm = pasid_private_find(pasid);
if (IS_ERR(svm))
return PTR_ERR(svm);
@@ -237,15 +311,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
*/
if (WARN_ON(list_empty(&svm->devs)))
return -EINVAL;
-
- rcu_read_lock();
- list_for_each_entry_rcu(d, &svm->devs, list) {
- if (d->dev == dev) {
- sdev = d;
- break;
- }
- }
- rcu_read_unlock();
+ sdev = svm_lookup_device_by_dev(svm, dev);
out:
*rsvm = svm;
@@ -334,7 +400,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
svm->gpasid = data->gpasid;
svm->flags |= SVM_FLAG_GUEST_PASID;
}
- ioasid_set_data(data->hpasid, svm);
+ pasid_private_add(data->hpasid, svm);
INIT_LIST_HEAD_RCU(&svm->devs);
mmput(svm->mm);
}
@@ -388,7 +454,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
list_add_rcu(&sdev->list, &svm->devs);
out:
if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
- ioasid_set_data(data->hpasid, NULL);
+ pasid_private_remove(data->hpasid);
kfree(svm);
}
@@ -431,7 +497,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
* the unbind, IOMMU driver will get notified
* and perform cleanup.
*/
- ioasid_set_data(pasid, NULL);
+ pasid_private_remove(pasid);
kfree(svm);
}
}
@@ -459,79 +525,81 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
mutex_unlock(&mm->context.lock);
}
-/* Caller must hold pasid_mutex, mm reference */
-static int
-intel_svm_bind_mm(struct device *dev, unsigned int flags,
- struct mm_struct *mm, struct intel_svm_dev **sd)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
+ unsigned int flags)
{
- struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
- struct intel_svm *svm = NULL, *t;
- struct device_domain_info *info;
- struct intel_svm_dev *sdev;
- unsigned long iflags;
- int pasid_max;
- int ret;
+ ioasid_t max_pasid = dev_is_pci(dev) ?
+ pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
- if (!iommu || dmar_disabled)
- return -EINVAL;
+ return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
+}
- if (!intel_svm_capable(iommu))
- return -ENOTSUPP;
+static void intel_svm_free_pasid(struct mm_struct *mm)
+{
+ iommu_sva_free_pasid(mm);
+}
- if (dev_is_pci(dev)) {
- pasid_max = pci_max_pasids(to_pci_dev(dev));
- if (pasid_max < 0)
- return -EINVAL;
- } else
- pasid_max = 1 << 20;
+static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
+ struct device *dev,
+ struct mm_struct *mm,
+ unsigned int flags)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ unsigned long iflags, sflags;
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = 0;
- /* Bind supervisor PASID shuld have mm = NULL */
- if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap) || mm) {
- pr_err("Supervisor PASID with user provided mm.\n");
- return -EINVAL;
- }
- }
+ svm = pasid_private_find(mm->pasid);
+ if (!svm) {
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm)
+ return ERR_PTR(-ENOMEM);
- list_for_each_entry(t, &global_svm_list, list) {
- if (t->mm != mm)
- continue;
+ svm->pasid = mm->pasid;
+ svm->mm = mm;
+ svm->flags = flags;
+ INIT_LIST_HEAD_RCU(&svm->devs);
- svm = t;
- if (svm->pasid >= pasid_max) {
- dev_warn(dev,
- "Limited PASID width. Cannot use existing PASID %d\n",
- svm->pasid);
- ret = -ENOSPC;
- goto out;
+ if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
+ svm->notifier.ops = &intel_mmuops;
+ ret = mmu_notifier_register(&svm->notifier, mm);
+ if (ret) {
+ kfree(svm);
+ return ERR_PTR(ret);
+ }
}
- /* Find the matching device in svm list */
- for_each_svm_dev(sdev, svm, dev) {
- sdev->users++;
- goto success;
+ ret = pasid_private_add(svm->pasid, svm);
+ if (ret) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ kfree(svm);
+ return ERR_PTR(ret);
}
+ }
- break;
+ /* Find the matching device in svm list */
+ sdev = svm_lookup_device_by_dev(svm, dev);
+ if (sdev) {
+ sdev->users++;
+ goto success;
}
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev) {
ret = -ENOMEM;
- goto out;
+ goto free_svm;
}
+
sdev->dev = dev;
sdev->iommu = iommu;
-
- ret = intel_iommu_enable_pasid(iommu, dev);
- if (ret) {
- kfree(sdev);
- goto out;
- }
-
- info = get_domain_info(dev);
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ sdev->users = 1;
+ sdev->pasid = svm->pasid;
+ sdev->sva.dev = dev;
+ init_rcu_head(&sdev->rcu);
if (info->ats_enabled) {
sdev->dev_iotlb = 1;
sdev->qdep = info->ats_qdep;
@@ -539,95 +607,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
sdev->qdep = 0;
}
- /* Finish the setup now we know we're keeping it */
- sdev->users = 1;
- init_rcu_head(&sdev->rcu);
-
- if (!svm) {
- svm = kzalloc(sizeof(*svm), GFP_KERNEL);
- if (!svm) {
- ret = -ENOMEM;
- kfree(sdev);
- goto out;
- }
-
- if (pasid_max > intel_pasid_max_id)
- pasid_max = intel_pasid_max_id;
+ /* Setup the pasid table: */
+ sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
+ PASID_FLAG_SUPERVISOR_MODE : 0;
+ sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+ spin_lock_irqsave(&iommu->lock, iflags);
+ ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
+ FLPT_DEFAULT_DID, sflags);
+ spin_unlock_irqrestore(&iommu->lock, iflags);
- /* Do not use PASID 0, reserved for RID to PASID */
- svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
- if (svm->pasid == INVALID_IOASID) {
- kfree(svm);
- kfree(sdev);
- ret = -ENOSPC;
- goto out;
- }
- svm->notifier.ops = &intel_mmuops;
- svm->mm = mm;
- svm->flags = flags;
- INIT_LIST_HEAD_RCU(&svm->devs);
- INIT_LIST_HEAD(&svm->list);
- ret = -ENOMEM;
- if (mm) {
- ret = mmu_notifier_register(&svm->notifier, mm);
- if (ret) {
- ioasid_put(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
- }
+ if (ret)
+ goto free_sdev;
- spin_lock_irqsave(&iommu->lock, iflags);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
- (cpu_feature_enabled(X86_FEATURE_LA57) ?
- PASID_FLAG_FL5LP : 0));
- spin_unlock_irqrestore(&iommu->lock, iflags);
- if (ret) {
- if (mm)
- mmu_notifier_unregister(&svm->notifier, mm);
- ioasid_put(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
+ /* The newly allocated pasid is loaded to the mm. */
+ if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs))
+ load_pasid(mm, svm->pasid);
- list_add_tail(&svm->list, &global_svm_list);
- if (mm) {
- /* The newly allocated pasid is loaded to the mm. */
- load_pasid(mm, svm->pasid);
- }
- } else {
- /*
- * Binding a new device with existing PASID, need to setup
- * the PASID entry.
- */
- spin_lock_irqsave(&iommu->lock, iflags);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
- (cpu_feature_enabled(X86_FEATURE_LA57) ?
- PASID_FLAG_FL5LP : 0));
- spin_unlock_irqrestore(&iommu->lock, iflags);
- if (ret) {
- kfree(sdev);
- goto out;
- }
- }
list_add_rcu(&sdev->list, &svm->devs);
success:
- sdev->pasid = svm->pasid;
- sdev->sva.dev = dev;
- if (sd)
- *sd = sdev;
- ret = 0;
-out:
- return ret;
+ return &sdev->sva;
+
+free_sdev:
+ kfree(sdev);
+free_svm:
+ if (list_empty(&svm->devs)) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ pasid_private_remove(mm->pasid);
+ kfree(svm);
+ }
+
+ return ERR_PTR(ret);
}
/* Caller must hold pasid_mutex */
@@ -636,6 +646,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
+ struct mm_struct *mm;
int ret = -EINVAL;
iommu = device_to_iommu(dev, NULL, NULL);
@@ -645,6 +656,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
if (ret)
goto out;
+ mm = svm->mm;
if (sdev) {
sdev->users--;
@@ -663,13 +675,13 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
- ioasid_put(svm->pasid);
- if (svm->mm) {
- mmu_notifier_unregister(&svm->notifier, svm->mm);
+ intel_svm_free_pasid(mm);
+ if (svm->notifier.ops) {
+ mmu_notifier_unregister(&svm->notifier, mm);
/* Clear mm's pasid. */
- load_pasid(svm->mm, PASID_DISABLED);
+ load_pasid(mm, PASID_DISABLED);
}
- list_del(&svm->list);
+ pasid_private_remove(svm->pasid);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
@@ -714,22 +726,6 @@ struct page_req_dsc {
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
-static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
-{
- unsigned long requested = 0;
-
- if (req->exe_req)
- requested |= VM_EXEC;
-
- if (req->rd_req)
- requested |= VM_READ;
-
- if (req->wr_req)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
static bool is_canonical_address(u64 addr)
{
int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
@@ -799,6 +795,8 @@ prq_retry:
goto prq_retry;
}
+ iopf_queue_flush_dev(dev);
+
/*
* Perform steps described in VT-d spec CH7.10 to drain page
* requests and responses in hardware.
@@ -841,8 +839,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
return prot;
}
-static int
-intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
+ struct page_req_dsc *desc)
{
struct iommu_fault_event event;
@@ -872,159 +870,136 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
*/
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
- memcpy(event.fault.prm.private_data, desc->priv_data,
- sizeof(desc->priv_data));
+ event.fault.prm.private_data[0] = desc->priv_data[0];
+ event.fault.prm.private_data[1] = desc->priv_data[1];
+ } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
+ /*
+ * If the private data fields are not used by hardware, use it
+ * to monitor the prq handle latency.
+ */
+ event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
}
return iommu_report_device_fault(dev, &event);
}
+static void handle_bad_prq_event(struct intel_iommu *iommu,
+ struct page_req_dsc *req, int result)
+{
+ struct qi_desc desc;
+
+ pr_err("%s: Invalid page request: %08llx %08llx\n",
+ iommu->name, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must
+ * respond with page group response if private data
+ * is present (PDP) or last page in group (LPIG) bit
+ * is set. This is an additional VT-d feature beyond
+ * PCI ATS spec.
+ */
+ if (!req->lpig && !req->priv_data_present)
+ return;
+
+ desc.qw0 = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID(req->rid) |
+ QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_PDP(req->priv_data_present) |
+ QI_PGRP_RESP_CODE(result) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_LPIG(req->lpig);
+
+ if (req->priv_data_present) {
+ desc.qw2 = req->priv_data[0];
+ desc.qw3 = req->priv_data[1];
+ } else {
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ }
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
- int head, tail, handled = 0;
- unsigned int flags = 0;
+ struct page_req_dsc *req;
+ int head, tail, handled;
+ u64 address;
- /* Clear PPR bit before reading head/tail registers, to
- * ensure that we get a new interrupt if needed. */
+ /*
+ * Clear PPR bit before reading head/tail registers, to ensure that
+ * we get a new interrupt if needed.
+ */
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ handled = (head != tail);
while (head != tail) {
- struct vm_area_struct *vma;
- struct page_req_dsc *req;
- struct qi_desc resp;
- int result;
- vm_fault_t ret;
- u64 address;
-
- handled = 1;
req = &iommu->prq[head / sizeof(*req)];
- result = QI_RESP_INVALID;
address = (u64)req->addr << VTD_PAGE_SHIFT;
- if (!req->pasid_present) {
- pr_err("%s: Page request without PASID: %08llx %08llx\n",
- iommu->name, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
+
+ if (unlikely(!req->pasid_present)) {
+ pr_err("IOMMU: %s: Page request without PASID\n",
+ iommu->name);
+bad_req:
+ svm = NULL;
+ sdev = NULL;
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ goto prq_advance;
}
- /* We shall not receive page request for supervisor SVM */
- if (req->pm_req && (req->rd_req | req->wr_req)) {
- pr_err("Unexpected page request in Privilege Mode");
- /* No need to find the matching sdev as for bad_req */
- goto no_pasid;
+
+ if (unlikely(!is_canonical_address(address))) {
+ pr_err("IOMMU: %s: Address is not canonical\n",
+ iommu->name);
+ goto bad_req;
}
- /* DMA read with exec requeset is not supported. */
- if (req->exe_req && req->rd_req) {
- pr_err("Execution request not supported\n");
- goto no_pasid;
+
+ if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
+ pr_err("IOMMU: %s: Page request in Privilege Mode\n",
+ iommu->name);
+ goto bad_req;
}
+
+ if (unlikely(req->exe_req && req->rd_req)) {
+ pr_err("IOMMU: %s: Execution request not supported\n",
+ iommu->name);
+ goto bad_req;
+ }
+
if (!svm || svm->pasid != req->pasid) {
- rcu_read_lock();
- svm = ioasid_find(NULL, req->pasid, NULL);
- /* It *can't* go away, because the driver is not permitted
+ /*
+ * It can't go away, because the driver is not permitted
* to unbind the mm while any page faults are outstanding.
- * So we only need RCU to protect the internal idr code. */
- rcu_read_unlock();
- if (IS_ERR_OR_NULL(svm)) {
- pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
- iommu->name, req->pasid, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
- }
+ */
+ svm = pasid_private_find(req->pasid);
+ if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE))
+ goto bad_req;
}
if (!sdev || sdev->sid != req->rid) {
- struct intel_svm_dev *t;
-
- sdev = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(t, &svm->devs, list) {
- if (t->sid == req->rid) {
- sdev = t;
- break;
- }
- }
- rcu_read_unlock();
+ sdev = svm_lookup_device_by_sid(svm, req->rid);
+ if (!sdev)
+ goto bad_req;
}
- /* Since we're using init_mm.pgd directly, we should never take
- * any faults on kernel addresses. */
- if (!svm->mm)
- goto bad_req;
-
- /* If address is not canonical, return invalid response */
- if (!is_canonical_address(address))
- goto bad_req;
+ sdev->prq_seq_number++;
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (svm->flags & SVM_FLAG_GUEST_MODE) {
- if (sdev && !intel_svm_prq_report(sdev->dev, req))
- goto prq_advance;
- else
- goto bad_req;
- }
-
- /* If the mm is already defunct, don't handle faults. */
- if (!mmget_not_zero(svm->mm))
- goto bad_req;
-
- mmap_read_lock(svm->mm);
- vma = find_extend_vma(svm->mm, address);
- if (!vma || address < vma->vm_start)
- goto invalid;
-
- if (access_error(vma, req))
- goto invalid;
+ if (intel_svm_prq_report(iommu, sdev->dev, req))
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
- flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE;
- if (req->wr_req)
- flags |= FAULT_FLAG_WRITE;
-
- ret = handle_mm_fault(vma, address, flags, NULL);
- if (ret & VM_FAULT_ERROR)
- goto invalid;
-
- result = QI_RESP_SUCCESS;
-invalid:
- mmap_read_unlock(svm->mm);
- mmput(svm->mm);
-bad_req:
- /* We get here in the error case where the PASID lookup failed,
- and these can be NULL. Do not use them below this point! */
- sdev = NULL;
- svm = NULL;
-no_pasid:
- if (req->lpig || req->priv_data_present) {
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must
- * respond with page group response if private data
- * is present (PDP) or last page in group (LPIG) bit
- * is set. This is an additional VT-d feature beyond
- * PCI ATS spec.
- */
- resp.qw0 = QI_PGRP_PASID(req->pasid) |
- QI_PGRP_DID(req->rid) |
- QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_PDP(req->priv_data_present) |
- QI_PGRP_RESP_CODE(result) |
- QI_PGRP_RESP_TYPE;
- resp.qw1 = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_LPIG(req->lpig);
- resp.qw2 = 0;
- resp.qw3 = 0;
-
- if (req->priv_data_present)
- memcpy(&resp.qw2, req->priv_data,
- sizeof(req->priv_data));
- qi_submit_sync(iommu, &resp, 1, 0);
- }
+ trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ sdev->prq_seq_number);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
@@ -1041,6 +1016,7 @@ prq_advance:
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
+ iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
iommu->name);
@@ -1053,31 +1029,42 @@ prq_advance:
return IRQ_RETVAL(handled);
}
-#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
-struct iommu_sva *
-intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
{
- struct iommu_sva *sva = ERR_PTR(-EINVAL);
- struct intel_svm_dev *sdev = NULL;
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
unsigned int flags = 0;
+ struct iommu_sva *sva;
int ret;
- /*
- * TODO: Consolidate with generic iommu-sva bind after it is merged.
- * It will require shared SVM data structures, i.e. combine io_mm
- * and intel_svm etc.
- */
if (drvdata)
flags = *(unsigned int *)drvdata;
+
+ if (flags & SVM_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap)) {
+ dev_err(dev, "%s: Supervisor PASID not supported\n",
+ iommu->name);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (mm) {
+ dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
+ iommu->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mm = &init_mm;
+ }
+
mutex_lock(&pasid_mutex);
- ret = intel_svm_bind_mm(dev, flags, mm, &sdev);
- if (ret)
- sva = ERR_PTR(ret);
- else if (sdev)
- sva = &sdev->sva;
- else
- WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+ ret = intel_svm_alloc_pasid(dev, mm, flags);
+ if (ret) {
+ mutex_unlock(&pasid_mutex);
+ return ERR_PTR(ret);
+ }
+ sva = intel_svm_bind_mm(iommu, dev, mm, flags);
+ if (IS_ERR_OR_NULL(sva))
+ intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
return sva;
@@ -1085,10 +1072,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
void intel_svm_unbind(struct iommu_sva *sva)
{
- struct intel_svm_dev *sdev;
+ struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
mutex_lock(&pasid_mutex);
- sdev = to_intel_svm_dev(sva);
intel_svm_unbind_mm(sdev->dev, sdev->pasid);
mutex_unlock(&pasid_mutex);
}
@@ -1194,9 +1180,14 @@ int intel_svm_page_response(struct device *dev,
desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
desc.qw2 = 0;
desc.qw3 = 0;
- if (private_present)
- memcpy(&desc.qw2, prm->private_data,
- sizeof(prm->private_data));
+
+ if (private_present) {
+ desc.qw2 = prm->private_data[0];
+ desc.qw3 = prm->private_data[1];
+ } else if (prm->private_data[0]) {
+ dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
+ ktime_to_ns(ktime_get()) - prm->private_data[0]);
+ }
qi_submit_sync(iommu, &desc, 1, 0);
}