diff options
author | Alexey Skidanov <alexey.skidanov@gmail.com> | 2015-04-14 18:05:49 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-05-19 13:02:27 +0300 |
commit | 59d3e8be87a14c6a0d91c683e63d9b31734525ae (patch) | |
tree | 1f23bcca62cdfcec10055b83463d51129ec0d590 /drivers/gpu/drm/amd/amdkfd/kfd_device.c | |
parent | f3a398183f7b9ef78f6b71ee9f7641e046403bcb (diff) |
drm/amdkfd: Add memory exception handling
This patch adds Peripheral Page Request (PPR) failure processing
and reporting.
Bad address or pointer to a system memory block with inappropriate
read/write permission cause such PPR failure during a user queue
processing. PPR request handling is done by IOMMU driver notifying
AMDKFD module on PPR failure.
The process triggering a PPR failure will be notified by
appropriate event or SIGTERM signal will be sent to it.
v3:
- Change all bool fields in struct kfd_memory_exception_failure to
uint32_t
Signed-off-by: Alexey Skidanov <alexey.skidanov@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 4c0316957a57..52cab0f53ebc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -182,6 +182,32 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) kfd_unbind_process_from_device(dev, pasid); } +/* + * This function called by IOMMU driver on PPR failure + */ +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + unsigned long address, u16 flags) +{ + struct kfd_dev *dev; + + dev_warn(kfd_device, + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", + PCI_BUS_NUM(pdev->devfn), + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + pasid, + address, + flags); + + dev = kfd_device_by_pci_dev(pdev); + BUG_ON(dev == NULL); + + kfd_signal_iommu_event(dev, pasid, address, + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); + + return AMD_IOMMU_INV_PRI_RSP_INVALID; +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -251,6 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); kfd->dqm = device_queue_manager_init(kfd); if (!kfd->dqm) { @@ -316,6 +343,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) if (kfd->init_complete) { kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); amd_iommu_free_device(kfd->pdev); } } @@ -335,6 +363,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return -ENXIO; amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); kfd->dqm->ops.start(kfd->dqm); } |