summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2020-02-07 17:08:04 -0500
committerAlex Deucher <alexander.deucher@amd.com>2021-04-20 21:48:23 -0400
commit50ea50cf6f6d31d3235ad1853c5dbea766a3ed11 (patch)
treed8db08b809619b235b7d7c54079924694fd0d656 /drivers/gpu/drm/amd/amdkfd
parentb53fa124acdcec6d05bcdb36b55bf0f84471b1b7 (diff)
drm/amdkfd: copy memory through gart table
Use sdma linear copy to migrate data between ram and vram. The sdma linear copy command uses kernel buffer function queue to access system memory through gart table. Use reserved gart table window 0 to map system page address, and vram page address is direct mapping. Use the same kernel buffer function to fill in gart table mapping, so this is serialized with memory copy by sdma job submit. We only need wait for the last memory copy sdma fence for larger buffer migration. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c172
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h5
2 files changed, 177 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d8cec5ebe1d4..74b38856cce3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -33,6 +33,178 @@
#include "kfd_svm.h"
#include "kfd_migrate.h"
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+ return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+ dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ unsigned int num_dw, num_bytes;
+ struct dma_fence *fence;
+ uint64_t src_addr, dst_addr;
+ uint64_t pte_flags;
+ void *cpu_addr;
+ int r;
+
+ /* use gart window 0 */
+ *gart_addr = adev->gmc.gart_start;
+
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ num_bytes = npages * 8;
+
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_DELAYED, &job);
+ if (r)
+ return r;
+
+ src_addr = num_dw * 4;
+ src_addr += job->ibs[0].gpu_addr;
+
+ dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+ dst_addr, num_bytes, false);
+
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+ pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+ if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+ pte_flags |= AMDGPU_PTE_WRITEABLE;
+ pte_flags |= adev->gart.gart_pte_flags;
+
+ cpu_addr = &job->ibs[0].ptr[num_dw];
+
+ r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+ if (r)
+ goto error_free;
+
+ r = amdgpu_job_submit(job, &adev->mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+ if (r)
+ goto error_free;
+
+ dma_fence_put(fence);
+
+ return r;
+
+error_free:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @src: source page address array
+ * @dst: destination page address array
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+ uint64_t *vram, uint64_t npages,
+ enum MIGRATION_COPY_DIR direction,
+ struct dma_fence **mfence)
+{
+ const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ uint64_t gart_s, gart_d;
+ struct dma_fence *next;
+ uint64_t size;
+ int r;
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+
+ while (npages) {
+ size = min(GTT_MAX_PAGES, npages);
+
+ if (direction == FROM_VRAM_TO_RAM) {
+ gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+ r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
+
+ } else if (direction == FROM_RAM_TO_VRAM) {
+ r = svm_migrate_gart_map(ring, size, sys, &gart_s,
+ KFD_IOCTL_SVM_FLAG_GPU_RO);
+ gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
+ }
+ if (r) {
+ pr_debug("failed %d to create gart mapping\n", r);
+ goto out_unlock;
+ }
+
+ r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+ NULL, &next, false, true, false);
+ if (r) {
+ pr_debug("failed %d to copy memory\n", r);
+ goto out_unlock;
+ }
+
+ dma_fence_put(*mfence);
+ *mfence = next;
+ npages -= size;
+ if (npages) {
+ sys += size;
+ vram += size;
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
+/**
+ * svm_migrate_copy_done - wait for memory copy sdma is done
+ *
+ * @adev: amdgpu device the sdma memory copy is executing on
+ * @mfence: migrate fence
+ *
+ * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
+ * operations, this is the last sdma operation fence.
+ *
+ * Context: called after svm_migrate_copy_memory
+ *
+ * Return:
+ * 0 - success
+ * otherwise - error code from dma fence signal
+ */
+int
+svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
+{
+ int r = 0;
+
+ if (mfence) {
+ r = dma_fence_wait(mfence, false);
+ dma_fence_put(mfence);
+ pr_debug("sdma copy memory fence done\n");
+ }
+
+ return r;
+}
+
static void svm_migrate_page_free(struct page *page)
{
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 89392548ec44..df84e4143e25 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -33,6 +33,11 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
+enum MIGRATION_COPY_DIR {
+ FROM_RAM_TO_VRAM = 0,
+ FROM_VRAM_TO_RAM
+};
+
#if defined(CONFIG_DEVICE_PRIVATE)
int svm_migrate_init(struct amdgpu_device *adev);
void svm_migrate_fini(struct amdgpu_device *adev);