diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 172 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 5 |
2 files changed, 177 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d8cec5ebe1d4..74b38856cce3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -33,6 +33,178 @@ #include "kfd_svm.h" #include "kfd_migrate.h" +static uint64_t +svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) +{ + return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); +} + +static int +svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, + dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned int num_dw, num_bytes; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t pte_flags; + void *cpu_addr; + int r; + + /* use gart window 0 */ + *gart_addr = adev->gmc.gart_start; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = npages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) + pte_flags |= AMDGPU_PTE_WRITEABLE; + pte_flags |= adev->gart.gart_pte_flags; + + cpu_addr = &job->ibs[0].ptr[num_dw]; + + r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +/** + * svm_migrate_copy_memory_gart - sdma copy data between ram and vram + * + * @adev: amdgpu device the sdma ring running + * @src: source page address array + * @dst: destination page address array + * @npages: number of pages to copy + * @direction: enum MIGRATION_COPY_DIR + * @mfence: output, sdma fence to signal after sdma is done + * + * ram address uses GART table continuous entries mapping to ram pages, + * vram address uses direct mapping of vram pages, which must have npages + * number of continuous pages. + * GART update and sdma uses same buf copy function ring, sdma is splited to + * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for + * the last sdma finish fence which is returned to check copy memory is done. + * + * Context: Process context, takes and releases gtt_window_lock + * + * Return: + * 0 - OK, otherwise error code + */ + +static int +svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, + uint64_t *vram, uint64_t npages, + enum MIGRATION_COPY_DIR direction, + struct dma_fence **mfence) +{ + const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + uint64_t gart_s, gart_d; + struct dma_fence *next; + uint64_t size; + int r; + + mutex_lock(&adev->mman.gtt_window_lock); + + while (npages) { + size = min(GTT_MAX_PAGES, npages); + + if (direction == FROM_VRAM_TO_RAM) { + gart_s = svm_migrate_direct_mapping_addr(adev, *vram); + r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0); + + } else if (direction == FROM_RAM_TO_VRAM) { + r = svm_migrate_gart_map(ring, size, sys, &gart_s, + KFD_IOCTL_SVM_FLAG_GPU_RO); + gart_d = svm_migrate_direct_mapping_addr(adev, *vram); + } + if (r) { + pr_debug("failed %d to create gart mapping\n", r); + goto out_unlock; + } + + r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, + NULL, &next, false, true, false); + if (r) { + pr_debug("failed %d to copy memory\n", r); + goto out_unlock; + } + + dma_fence_put(*mfence); + *mfence = next; + npages -= size; + if (npages) { + sys += size; + vram += size; + } + } + +out_unlock: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + +/** + * svm_migrate_copy_done - wait for memory copy sdma is done + * + * @adev: amdgpu device the sdma memory copy is executing on + * @mfence: migrate fence + * + * Wait for dma fence is signaled, if the copy ssplit into multiple sdma + * operations, this is the last sdma operation fence. + * + * Context: called after svm_migrate_copy_memory + * + * Return: + * 0 - success + * otherwise - error code from dma fence signal + */ +int +svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) +{ + int r = 0; + + if (mfence) { + r = dma_fence_wait(mfence, false); + dma_fence_put(mfence); + pr_debug("sdma copy memory fence done\n"); + } + + return r; +} + static void svm_migrate_page_free(struct page *page) { } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 89392548ec44..df84e4143e25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -33,6 +33,11 @@ #include "kfd_priv.h" #include "kfd_svm.h" +enum MIGRATION_COPY_DIR { + FROM_RAM_TO_VRAM = 0, + FROM_VRAM_TO_RAM +}; + #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); |