summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r--drivers/gpu/drm/nouveau/Kconfig7
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c117
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c580
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h112
5 files changed, 836 insertions, 3 deletions
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index c02a13406a81..4b75ad40dd80 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -56,6 +56,13 @@ config NOUVEAU_DEBUG_DEFAULT
help
Selects the default debug level
+config NOUVEAU_DEBUG_MMU
+ bool "Enable additional MMU debugging"
+ depends on DRM_NOUVEAU
+ default n
+ help
+ Say Y here if you want to enable verbose MMU debug output.
+
config DRM_NOUVEAU_BACKLIGHT
bool "Support for backlight control"
depends on DRM_NOUVEAU
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index eb6704a2fc8f..ccd87d508d04 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -1,6 +1,7 @@
#ifndef __NVKM_MMU_H__
#define __NVKM_MMU_H__
#include <core/subdev.h>
+#include <core/memory.h>
#include <core/mm.h>
struct nvkm_gpuobj;
struct nvkm_mem;
@@ -11,6 +12,8 @@ struct nvkm_vm_pgt {
};
struct nvkm_vma {
+ struct nvkm_memory *memory;
+ struct nvkm_tags *tags;
struct nvkm_vm *vm;
struct nvkm_mm_node *node;
union {
@@ -24,6 +27,7 @@ struct nvkm_vm {
const struct nvkm_vmm_func *func;
struct nvkm_mmu *mmu;
const char *name;
+ u32 debug;
struct kref kref;
struct mutex mutex;
@@ -58,6 +62,25 @@ void nvkm_vm_map_at(struct nvkm_vma *, u64 offset, struct nvkm_mem *);
void nvkm_vm_unmap(struct nvkm_vma *);
void nvkm_vm_unmap_at(struct nvkm_vma *, u64 offset, u64 length);
+int nvkm_vmm_boot(struct nvkm_vmm *);
+
+struct nvkm_vmm_map {
+ struct nvkm_memory *memory;
+ u64 offset;
+
+ struct nvkm_mm_node *mem;
+ struct scatterlist *sgl;
+ dma_addr_t *dma;
+ u64 off;
+
+ const struct nvkm_vmm_page *page;
+
+ struct nvkm_tags *tags;
+ u64 next;
+ u64 type;
+ u64 ctag;
+};
+
struct nvkm_mmu {
const struct nvkm_mmu_func *func;
struct nvkm_subdev subdev;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index 22264d3db22f..536187952372 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -213,6 +213,36 @@ nvkm_mmu_ptc_get(struct nvkm_mmu *mmu, u32 size, u32 align, bool zero)
return pt;
}
+static void
+nvkm_vm_map_(const struct nvkm_vmm_page *page, struct nvkm_vma *vma, u64 delta,
+ struct nvkm_mem *mem, nvkm_vmm_pte_func fn,
+ struct nvkm_vmm_map *map)
+{
+ struct nvkm_vmm *vmm = vma->vm;
+ void *argv = NULL;
+ u32 argc = 0;
+ int ret;
+
+ map->memory = mem->memory;
+ map->page = page;
+
+ if (vmm->func->valid) {
+ ret = vmm->func->valid(vmm, argv, argc, map);
+ if (WARN_ON(ret))
+ return;
+ }
+
+ mutex_lock(&vmm->mutex);
+ nvkm_vmm_ptes_map(vmm, page, ((u64)vma->node->offset << 12) + delta,
+ (u64)vma->node->length << 12, map, fn);
+ mutex_unlock(&vmm->mutex);
+
+ nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
+ nvkm_memory_unref(&vma->memory);
+ vma->memory = nvkm_memory_ref(map->memory);
+ vma->tags = map->tags;
+}
+
void
nvkm_mmu_ptc_dump(struct nvkm_mmu *mmu)
{
@@ -251,6 +281,7 @@ nvkm_mmu_ptc_init(struct nvkm_mmu *mmu)
void
nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
{
+ const struct nvkm_vmm_page *page = vma->vm->func->page;
struct nvkm_vm *vm = vma->vm;
struct nvkm_mmu *mmu = vm->mmu;
struct nvkm_mm_node *r = node->mem;
@@ -262,6 +293,14 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
u32 max = 1 << (mmu->func->pgt_bits - bits);
u32 end, len;
+ if (page->desc->func->unmap) {
+ struct nvkm_vmm_map map = { .mem = node->mem };
+ while (page->shift != vma->node->type)
+ page++;
+ nvkm_vm_map_(page, vma, delta, node, page->desc->func->mem, &map);
+ return;
+ }
+
delta = 0;
while (r) {
u64 phys = (u64)r->offset << 12;
@@ -297,6 +336,7 @@ static void
nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length,
struct nvkm_mem *mem)
{
+ const struct nvkm_vmm_page *page = vma->vm->func->page;
struct nvkm_vm *vm = vma->vm;
struct nvkm_mmu *mmu = vm->mmu;
int big = vma->node->type != mmu->func->spg_shift;
@@ -311,6 +351,14 @@ nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length,
int i;
struct scatterlist *sg;
+ if (page->desc->func->unmap) {
+ struct nvkm_vmm_map map = { .sgl = mem->sg->sgl };
+ while (page->shift != vma->node->type)
+ page++;
+ nvkm_vm_map_(page, vma, delta, mem, page->desc->func->sgl, &map);
+ return;
+ }
+
for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) {
struct nvkm_memory *pgt = vm->pgt[pde].mem[big];
sglen = sg_dma_len(sg) >> PAGE_SHIFT;
@@ -355,6 +403,7 @@ static void
nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length,
struct nvkm_mem *mem)
{
+ const struct nvkm_vmm_page *page = vma->vm->func->page;
struct nvkm_vm *vm = vma->vm;
struct nvkm_mmu *mmu = vm->mmu;
dma_addr_t *list = mem->pages;
@@ -367,6 +416,14 @@ nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length,
u32 max = 1 << (mmu->func->pgt_bits - bits);
u32 end, len;
+ if (page->desc->func->unmap) {
+ struct nvkm_vmm_map map = { .dma = mem->pages };
+ while (page->shift != vma->node->type)
+ page++;
+ nvkm_vm_map_(page, vma, delta, mem, page->desc->func->dma, &map);
+ return;
+ }
+
while (num) {
struct nvkm_memory *pgt = vm->pgt[pde].mem[big];
@@ -415,6 +472,17 @@ nvkm_vm_unmap_at(struct nvkm_vma *vma, u64 delta, u64 length)
u32 max = 1 << (mmu->func->pgt_bits - bits);
u32 end, len;
+ if (vm->func->page->desc->func->unmap) {
+ const struct nvkm_vmm_page *page = vm->func->page;
+ while (page->shift != vma->node->type)
+ page++;
+ mutex_lock(&vm->mutex);
+ nvkm_vmm_ptes_unmap(vm, page, (vma->node->offset << 12) + delta,
+ vma->node->length << 12, false);
+ mutex_unlock(&vm->mutex);
+ return;
+ }
+
while (num) {
struct nvkm_memory *pgt = vm->pgt[pde].mem[big];
@@ -440,6 +508,9 @@ void
nvkm_vm_unmap(struct nvkm_vma *vma)
{
nvkm_vm_unmap_at(vma, 0, (u64)vma->node->length << 12);
+
+ nvkm_memory_tags_put(vma->memory, vma->vm->mmu->subdev.device, &vma->tags);
+ nvkm_memory_unref(&vma->memory);
}
static void
@@ -509,6 +580,22 @@ nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access,
return ret;
}
+ if (vm->func->page->desc->func->unmap) {
+ const struct nvkm_vmm_page *page = vm->func->page;
+ while (page->shift != page_shift)
+ page++;
+
+ ret = nvkm_vmm_ptes_get(vm, page, vma->node->offset << 12,
+ vma->node->length << 12);
+ if (ret) {
+ nvkm_mm_free(&vm->mm, &vma->node);
+ mutex_unlock(&vm->mutex);
+ return ret;
+ }
+
+ goto done;
+ }
+
fpde = (vma->node->offset >> mmu->func->pgt_bits);
lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits;
@@ -530,8 +617,11 @@ nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access,
return ret;
}
}
+done:
mutex_unlock(&vm->mutex);
+ vma->memory = NULL;
+ vma->tags = NULL;
vma->vm = NULL;
nvkm_vm_ref(vm, &vma->vm, NULL);
vma->offset = (u64)vma->node->offset << 12;
@@ -551,11 +641,25 @@ nvkm_vm_put(struct nvkm_vma *vma)
vm = vma->vm;
mmu = vm->mmu;
+ nvkm_memory_tags_put(vma->memory, mmu->subdev.device, &vma->tags);
+ nvkm_memory_unref(&vma->memory);
+
fpde = (vma->node->offset >> mmu->func->pgt_bits);
lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits;
mutex_lock(&vm->mutex);
+ if (vm->func->page->desc->func->unmap) {
+ const struct nvkm_vmm_page *page = vm->func->page;
+ while (page->shift != vma->node->type)
+ page++;
+
+ nvkm_vmm_ptes_put(vm, page, vma->node->offset << 12,
+ vma->node->length << 12);
+ goto done;
+ }
+
nvkm_vm_unmap_pgt(vm, vma->node->type != mmu->func->spg_shift, fpde, lpde);
+done:
nvkm_mm_free(&vm->mm, &vma->node);
mutex_unlock(&vm->mutex);
@@ -569,6 +673,9 @@ nvkm_vm_boot(struct nvkm_vm *vm, u64 size)
struct nvkm_memory *pgt;
int ret;
+ if (vm->func->page->desc->func->unmap)
+ return nvkm_vmm_boot(vm);
+
ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST,
(size >> mmu->func->spg_shift) * 8, 0x1000, true, &pgt);
if (ret == 0) {
@@ -660,7 +767,7 @@ nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_memory *inst)
if (ret)
return ret;
- if (ref->mmu->func->map_pgt) {
+ if (!ref->func->page->desc->func->unmap && ref->mmu->func->map_pgt) {
for (i = ref->fpde; i <= ref->lpde; i++)
ref->mmu->func->map_pgt(ref, i, ref->pgt[i - ref->fpde].mem);
}
@@ -672,8 +779,12 @@ nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_memory *inst)
if (*ptr) {
if ((*ptr)->func->part && inst)
(*ptr)->func->part(*ptr, inst);
- if ((*ptr)->bootstrapped && inst)
- nvkm_memory_unref(&(*ptr)->pgt[0].mem[0]);
+ if ((*ptr)->bootstrapped && inst) {
+ if (!(*ptr)->func->page->desc->func->unmap) {
+ nvkm_memory_unref(&(*ptr)->pgt[0].mem[0]);
+ (*ptr)->bootstrapped = false;
+ }
+ }
kref_put(&(*ptr)->refcount, nvkm_vm_del);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 7e00b9adca05..46c7fecf0054 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -67,9 +67,559 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
return pgt;
}
+struct nvkm_vmm_iter {
+ const struct nvkm_vmm_page *page;
+ const struct nvkm_vmm_desc *desc;
+ struct nvkm_vmm *vmm;
+ u64 cnt;
+ u16 max, lvl;
+ u32 pte[NVKM_VMM_LEVELS_MAX];
+ struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX];
+ int flush;
+};
+
+#ifdef CONFIG_NOUVEAU_DEBUG_MMU
+static const char *
+nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc)
+{
+ switch (desc->type) {
+ case PGD: return "PGD";
+ case PGT: return "PGT";
+ case SPT: return "SPT";
+ case LPT: return "LPT";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void
+nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf)
+{
+ int lvl;
+ for (lvl = it->max; lvl >= 0; lvl--) {
+ if (lvl >= it->lvl)
+ buf += sprintf(buf, "%05x:", it->pte[lvl]);
+ else
+ buf += sprintf(buf, "xxxxx:");
+ }
+}
+
+#define TRA(i,f,a...) do { \
+ char _buf[NVKM_VMM_LEVELS_MAX * 7]; \
+ struct nvkm_vmm_iter *_it = (i); \
+ nvkm_vmm_trace(_it, _buf); \
+ VMM_TRACE(_it->vmm, "%s "f, _buf, ##a); \
+} while(0)
+#else
+#define TRA(i,f,a...)
+#endif
+
+static inline void
+nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it)
+{
+ it->flush = min(it->flush, it->max - it->lvl);
+}
+
+static inline void
+nvkm_vmm_flush(struct nvkm_vmm_iter *it)
+{
+ if (it->flush != NVKM_VMM_LEVELS_MAX) {
+ if (it->vmm->func->flush) {
+ TRA(it, "flush: %d", it->flush);
+ it->vmm->func->flush(it->vmm, it->flush);
+ }
+ it->flush = NVKM_VMM_LEVELS_MAX;
+ }
+}
+
+static void
+nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it)
+{
+ const struct nvkm_vmm_desc *desc = it->desc;
+ const int type = desc[it->lvl].type == SPT;
+ struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1];
+ struct nvkm_vmm_pt *pgt = it->pt[it->lvl];
+ struct nvkm_mmu_pt *pt = pgt->pt[type];
+ struct nvkm_vmm *vmm = it->vmm;
+ u32 pdei = it->pte[it->lvl + 1];
+
+ /* Recurse up the tree, unreferencing/destroying unneeded PDs. */
+ it->lvl++;
+ if (--pgd->refs[0]) {
+ const struct nvkm_vmm_desc_func *func = desc[it->lvl].func;
+ /* PD has other valid PDEs, so we need a proper update. */
+ TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
+ pgt->pt[type] = NULL;
+ if (!pgt->refs[!type]) {
+ /* PDE no longer required. */
+ if (pgd->pt[0]) {
+ if (pgt->sparse) {
+ func->sparse(vmm, pgd->pt[0], pdei, 1);
+ pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE;
+ } else {
+ func->unmap(vmm, pgd->pt[0], pdei, 1);
+ pgd->pde[pdei] = NULL;
+ }
+ } else {
+ /* Special handling for Tesla-class GPUs,
+ * where there's no central PD, but each
+ * instance has its own embedded PD.
+ */
+ func->pde(vmm, pgd, pdei);
+ pgd->pde[pdei] = NULL;
+ }
+ } else {
+ /* PDE was pointing at dual-PTs and we're removing
+ * one of them, leaving the other in place.
+ */
+ func->pde(vmm, pgd, pdei);
+ }
+
+ /* GPU may have cached the PTs, flush before freeing. */
+ nvkm_vmm_flush_mark(it);
+ nvkm_vmm_flush(it);
+ } else {
+ /* PD has no valid PDEs left, so we can just destroy it. */
+ nvkm_vmm_unref_pdes(it);
+ }
+
+ /* Destroy PD/PT. */
+ TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
+ nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt);
+ if (!pgt->refs[!type])
+ nvkm_vmm_pt_del(&pgt);
+ it->lvl--;
+}
+
+static void
+nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
+ const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
+{
+ const struct nvkm_vmm_desc *pair = it->page[-1].desc;
+ const u32 sptb = desc->bits - pair->bits;
+ const u32 sptn = 1 << sptb;
+ struct nvkm_vmm *vmm = it->vmm;
+ u32 spti = ptei & (sptn - 1), lpti, pteb;
+
+ /* Determine how many SPTEs are being touched under each LPTE,
+ * and drop reference counts.
+ */
+ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
+ const u32 pten = min(sptn - spti, ptes);
+ pgt->pte[lpti] -= pten;
+ ptes -= pten;
+ }
+
+ /* We're done here if there's no corresponding LPT. */
+ if (!pgt->refs[0])
+ return;
+
+ for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
+ /* Skip over any LPTEs that still have valid SPTEs. */
+ if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
+ for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
+ if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
+ break;
+ }
+ continue;
+ }
+
+ /* As there's no more non-UNMAPPED SPTEs left in the range
+ * covered by a number of LPTEs, the LPTEs once again take
+ * control over their address range.
+ *
+ * Determine how many LPTEs need to transition state.
+ */
+ pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+ for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
+ if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
+ break;
+ pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+ }
+
+ if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+ TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
+ pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
+ } else
+ if (pair->func->invalid) {
+ /* If the MMU supports it, restore the LPTE to the
+ * INVALID state to tell the MMU there is no point
+ * trying to fetch the corresponding SPTEs.
+ */
+ TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
+ pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
+ }
+ }
+}
+
+static bool
+nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+{
+ const struct nvkm_vmm_desc *desc = it->desc;
+ const int type = desc->type == SPT;
+ struct nvkm_vmm_pt *pgt = it->pt[0];
+
+ /* Drop PTE references. */
+ pgt->refs[type] -= ptes;
+
+ /* Dual-PTs need special handling, unless PDE becoming invalid. */
+ if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1]))
+ nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes);
+
+ /* PT no longer neeed? Destroy it. */
+ if (!pgt->refs[type]) {
+ it->lvl++;
+ TRA(it, "%s empty", nvkm_vmm_desc_type(desc));
+ it->lvl--;
+ nvkm_vmm_unref_pdes(it);
+ return false; /* PTE writes for unmap() not necessary. */
+ }
+
+ return true;
+}
+
+static void
+nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
+ const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
+{
+ const struct nvkm_vmm_desc *pair = it->page[-1].desc;
+ const u32 sptb = desc->bits - pair->bits;
+ const u32 sptn = 1 << sptb;
+ struct nvkm_vmm *vmm = it->vmm;
+ u32 spti = ptei & (sptn - 1), lpti, pteb;
+
+ /* Determine how many SPTEs are being touched under each LPTE,
+ * and increase reference counts.
+ */
+ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
+ const u32 pten = min(sptn - spti, ptes);
+ pgt->pte[lpti] += pten;
+ ptes -= pten;
+ }
+
+ /* We're done here if there's no corresponding LPT. */
+ if (!pgt->refs[0])
+ return;
+
+ for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
+ /* Skip over any LPTEs that already have valid SPTEs. */
+ if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
+ for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
+ if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
+ break;
+ }
+ continue;
+ }
+
+ /* As there are now non-UNMAPPED SPTEs in the range covered
+ * by a number of LPTEs, we need to transfer control of the
+ * address range to the SPTEs.
+ *
+ * Determine how many LPTEs need to transition state.
+ */
+ pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+ for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
+ if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
+ break;
+ pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+ }
+
+ if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+ const u32 spti = pteb * sptn;
+ const u32 sptc = ptes * sptn;
+ /* The entire LPTE is marked as sparse, we need
+ * to make sure that the SPTEs are too.
+ */
+ TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc);
+ desc->func->sparse(vmm, pgt->pt[1], spti, sptc);
+ /* Sparse LPTEs prevent SPTEs from being accessed. */
+ TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes);
+ pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
+ } else
+ if (pair->func->invalid) {
+ /* MMU supports blocking SPTEs by marking an LPTE
+ * as INVALID. We need to reverse that here.
+ */
+ TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes);
+ pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
+ }
+ }
+}
+
+static bool
+nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+{
+ const struct nvkm_vmm_desc *desc = it->desc;
+ const int type = desc->type == SPT;
+ struct nvkm_vmm_pt *pgt = it->pt[0];
+
+ /* Take PTE references. */
+ pgt->refs[type] += ptes;
+
+ /* Dual-PTs need special handling. */
+ if (desc->type == SPT)
+ nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes);
+
+ return true;
+}
+
+static void
+nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
+ struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes)
+{
+ if (desc->type == PGD) {
+ while (ptes--)
+ pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
+ } else
+ if (desc->type == LPT) {
+ memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
+ }
+}
+
+static bool
+nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
+{
+ const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
+ const int type = desc->type == SPT;
+ struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
+ const bool zero = !pgt->sparse && !desc->func->invalid;
+ struct nvkm_vmm *vmm = it->vmm;
+ struct nvkm_mmu *mmu = vmm->mmu;
+ struct nvkm_mmu_pt *pt;
+ u32 pten = 1 << desc->bits;
+ u32 pteb, ptei, ptes;
+ u32 size = desc->size * pten;
+
+ pgd->refs[0]++;
+
+ pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero);
+ if (!pgt->pt[type]) {
+ it->lvl--;
+ nvkm_vmm_unref_pdes(it);
+ return false;
+ }
+
+ if (zero)
+ goto done;
+
+ pt = pgt->pt[type];
+
+ if (desc->type == LPT && pgt->refs[1]) {
+ /* SPT already exists covering the same range as this LPT,
+ * which means we need to be careful that any LPTEs which
+ * overlap valid SPTEs are unmapped as opposed to invalid
+ * or sparse, which would prevent the MMU from looking at
+ * the SPTEs on some GPUs.
+ */
+ for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
+ bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+ for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
+ bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+ if (spte != next)
+ break;
+ }
+
+ if (!spte) {
+ if (pgt->sparse)
+ desc->func->sparse(vmm, pt, pteb, ptes);
+ else
+ desc->func->invalid(vmm, pt, pteb, ptes);
+ memset(&pgt->pte[pteb], 0x00, ptes);
+ } else {
+ desc->func->unmap(vmm, pt, pteb, ptes);
+ while (ptes--)
+ pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
+ }
+ }
+ } else {
+ if (pgt->sparse) {
+ nvkm_vmm_sparse_ptes(desc, pgt, 0, pten);
+ desc->func->sparse(vmm, pt, 0, pten);
+ } else {
+ desc->func->invalid(vmm, pt, 0, pten);
+ }
+ }
+
+done:
+ TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc));
+ it->desc[it->lvl].func->pde(it->vmm, pgd, pdei);
+ nvkm_vmm_flush_mark(it);
+ return true;
+}
+
+static bool
+nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
+{
+ const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
+ struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
+
+ pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page);
+ if (!pgt) {
+ if (!pgd->refs[0])
+ nvkm_vmm_unref_pdes(it);
+ return false;
+ }
+
+ pgd->pde[pdei] = pgt;
+ return true;
+}
+
+static inline u64
+nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+ u64 addr, u64 size, const char *name, bool ref,
+ bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32),
+ nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map,
+ nvkm_vmm_pxe_func CLR_PTES)
+{
+ const struct nvkm_vmm_desc *desc = page->desc;
+ struct nvkm_vmm_iter it;
+ u64 bits = addr >> page->shift;
+
+ it.page = page;
+ it.desc = desc;
+ it.vmm = vmm;
+ it.cnt = size >> page->shift;
+ it.flush = NVKM_VMM_LEVELS_MAX;
+
+ /* Deconstruct address into PTE indices for each mapping level. */
+ for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) {
+ it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1);
+ bits >>= desc[it.lvl].bits;
+ }
+ it.max = --it.lvl;
+ it.pt[it.max] = vmm->pd;
+
+ it.lvl = 0;
+ TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name,
+ addr, size, page->shift, it.cnt);
+ it.lvl = it.max;
+
+ /* Depth-first traversal of page tables. */
+ while (it.cnt) {
+ struct nvkm_vmm_pt *pgt = it.pt[it.lvl];
+ const int type = desc->type == SPT;
+ const u32 pten = 1 << desc->bits;
+ const u32 ptei = it.pte[0];
+ const u32 ptes = min_t(u64, it.cnt, pten - ptei);
+
+ /* Walk down the tree, finding page tables for each level. */
+ for (; it.lvl; it.lvl--) {
+ const u32 pdei = it.pte[it.lvl];
+ struct nvkm_vmm_pt *pgd = pgt;
+
+ /* Software PT. */
+ if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) {
+ if (!nvkm_vmm_ref_swpt(&it, pgd, pdei))
+ goto fail;
+ }
+ it.pt[it.lvl - 1] = pgt = pgd->pde[pdei];
+
+ /* Hardware PT.
+ *
+ * This is a separate step from above due to GF100 and
+ * newer having dual page tables at some levels, which
+ * are refcounted independently.
+ */
+ if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) {
+ if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei))
+ goto fail;
+ }
+ }
+
+ /* Handle PTE updates. */
+ if (!REF_PTES || REF_PTES(&it, ptei, ptes)) {
+ struct nvkm_mmu_pt *pt = pgt->pt[type];
+ if (MAP_PTES || CLR_PTES) {
+ if (MAP_PTES)
+ MAP_PTES(vmm, pt, ptei, ptes, map);
+ else
+ CLR_PTES(vmm, pt, ptei, ptes);
+ nvkm_vmm_flush_mark(&it);
+ }
+ }
+
+ /* Walk back up the tree to the next position. */
+ it.pte[it.lvl] += ptes;
+ it.cnt -= ptes;
+ if (it.cnt) {
+ while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) {
+ it.pte[it.lvl++] = 0;
+ it.pte[it.lvl]++;
+ }
+ }
+ };
+
+ nvkm_vmm_flush(&it);
+ return ~0ULL;
+
+fail:
+ /* Reconstruct the failure address so the caller is able to
+ * reverse any partially completed operations.
+ */
+ addr = it.pte[it.max--];
+ do {
+ addr = addr << desc[it.max].bits;
+ addr |= it.pte[it.max];
+ } while (it.max--);
+
+ return addr << page->shift;
+}
+
+void
+nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+ u64 addr, u64 size, bool sparse)
+{
+ const struct nvkm_vmm_desc_func *func = page->desc->func;
+ nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL,
+ sparse ? func->sparse : func->invalid ? func->invalid :
+ func->unmap);
+}
+
+void
+nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+ u64 addr, u64 size, struct nvkm_vmm_map *map,
+ nvkm_vmm_pte_func func)
+{
+ nvkm_vmm_iter(vmm, page, addr, size, "map", false,
+ NULL, func, map, NULL);
+}
+
+void
+nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+ u64 addr, u64 size)
+{
+ nvkm_vmm_iter(vmm, page, addr, size, "unref", false,
+ nvkm_vmm_unref_ptes, NULL, NULL, NULL);
+}
+
+int
+nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+ u64 addr, u64 size)
+{
+ u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true,
+ nvkm_vmm_ref_ptes, NULL, NULL, NULL);
+ if (fail != ~0ULL) {
+ if (fail != addr)
+ nvkm_vmm_ptes_put(vmm, page, addr, fail - addr);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
void
nvkm_vmm_dtor(struct nvkm_vmm *vmm)
{
+ if (vmm->bootstrapped) {
+ const struct nvkm_vmm_page *page = vmm->func->page;
+ const u64 limit = vmm->limit - vmm->start;
+
+ while (page[1].shift)
+ page++;
+
+ nvkm_mmu_ptc_dump(vmm->mmu);
+ nvkm_vmm_ptes_put(vmm, page, vmm->start, limit);
+ }
+
if (vmm->nullp) {
dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024,
vmm->nullp, vmm->null);
@@ -94,6 +644,7 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
vmm->func = func;
vmm->mmu = mmu;
vmm->name = name;
+ vmm->debug = mmu->subdev.debug;
kref_init(&vmm->kref);
__mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key);
@@ -150,3 +701,32 @@ nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
return -ENOMEM;
return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm);
}
+
+static bool
+nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+{
+ const struct nvkm_vmm_desc *desc = it->desc;
+ const int type = desc->type == SPT;
+ nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm);
+ return false;
+}
+
+int
+nvkm_vmm_boot(struct nvkm_vmm *vmm)
+{
+ const struct nvkm_vmm_page *page = vmm->func->page;
+ const u64 limit = vmm->limit - vmm->start;
+ int ret;
+
+ while (page[1].shift)
+ page++;
+
+ ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit);
+ if (ret)
+ return ret;
+
+ nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false,
+ nvkm_vmm_boot_ptes, NULL, NULL, NULL);
+ vmm->bootstrapped = true;
+ return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 504408d8014b..042d84c5e950 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -2,6 +2,7 @@
#define __NVKM_VMM_H__
#include "priv.h"
#include <core/memory.h>
+enum nvkm_memory_target;
struct nvkm_vmm_pt {
/* Some GPUs have a mapping level with a dual page tables to
@@ -49,7 +50,23 @@ struct nvkm_vmm_pt {
u8 pte[];
};
+typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *,
+ struct nvkm_mmu_pt *, u32 ptei, u32 ptes);
+typedef void (*nvkm_vmm_pde_func)(struct nvkm_vmm *,
+ struct nvkm_vmm_pt *, u32 pdei);
+typedef void (*nvkm_vmm_pte_func)(struct nvkm_vmm *, struct nvkm_mmu_pt *,
+ u32 ptei, u32 ptes, struct nvkm_vmm_map *);
+
struct nvkm_vmm_desc_func {
+ nvkm_vmm_pxe_func invalid;
+ nvkm_vmm_pxe_func unmap;
+ nvkm_vmm_pxe_func sparse;
+
+ nvkm_vmm_pde_func pde;
+
+ nvkm_vmm_pte_func mem;
+ nvkm_vmm_pte_func dma;
+ nvkm_vmm_pte_func sgl;
};
extern const struct nvkm_vmm_desc_func gf100_vmm_pgd;
@@ -106,6 +123,11 @@ struct nvkm_vmm_func {
int (*join)(struct nvkm_vmm *, struct nvkm_memory *inst);
void (*part)(struct nvkm_vmm *, struct nvkm_memory *inst);
+ int (*aper)(enum nvkm_memory_target);
+ int (*valid)(struct nvkm_vmm *, void *argv, u32 argc,
+ struct nvkm_vmm_map *);
+ void (*flush)(struct nvkm_vmm *, int depth);
+
u64 page_block;
const struct nvkm_vmm_page page[];
};
@@ -122,6 +144,15 @@ int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *,
u32 pd_header, u64 addr, u64 size, struct lock_class_key *,
const char *name, struct nvkm_vmm *);
void nvkm_vmm_dtor(struct nvkm_vmm *);
+void nvkm_vmm_ptes_put(struct nvkm_vmm *, const struct nvkm_vmm_page *,
+ u64 addr, u64 size);
+int nvkm_vmm_ptes_get(struct nvkm_vmm *, const struct nvkm_vmm_page *,
+ u64 addr, u64 size);
+void nvkm_vmm_ptes_map(struct nvkm_vmm *, const struct nvkm_vmm_page *,
+ u64 addr, u64 size, struct nvkm_vmm_map *,
+ nvkm_vmm_pte_func);
+void nvkm_vmm_ptes_unmap(struct nvkm_vmm *, const struct nvkm_vmm_page *,
+ u64 addr, u64 size, bool sparse);
int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32,
u64, u64, void *, u32, struct lock_class_key *,
@@ -176,4 +207,85 @@ int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
struct lock_class_key *, const char *,
struct nvkm_vmm **);
+
+#define VMM_PRINT(l,v,p,f,a...) do { \
+ struct nvkm_vmm *_vmm = (v); \
+ if (CONFIG_NOUVEAU_DEBUG >= (l) && _vmm->debug >= (l)) { \
+ nvkm_printk_(&_vmm->mmu->subdev, 0, p, "%s: "f"\n", \
+ _vmm->name, ##a); \
+ } \
+} while(0)
+#define VMM_DEBUG(v,f,a...) VMM_PRINT(NV_DBG_DEBUG, (v), info, f, ##a)
+#define VMM_TRACE(v,f,a...) VMM_PRINT(NV_DBG_TRACE, (v), info, f, ##a)
+#define VMM_SPAM(v,f,a...) VMM_PRINT(NV_DBG_SPAM , (v), dbg, f, ##a)
+
+#define VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL,BASE,SIZE,NEXT) do { \
+ nvkm_kmap((PT)->memory); \
+ while (PTEN) { \
+ u64 _ptes = ((SIZE) - MAP->off) >> MAP->page->shift; \
+ u64 _addr = ((BASE) + MAP->off); \
+ \
+ if (_ptes > PTEN) { \
+ MAP->off += PTEN << MAP->page->shift; \
+ _ptes = PTEN; \
+ } else { \
+ MAP->off = 0; \
+ NEXT; \
+ } \
+ \
+ VMM_SPAM(VMM, "ITER %08x %08x PTE(s)", PTEI, (u32)_ptes); \
+ \
+ FILL(VMM, PT, PTEI, _ptes, MAP, _addr); \
+ PTEI += _ptes; \
+ PTEN -= _ptes; \
+ }; \
+ nvkm_done((PT)->memory); \
+} while(0)
+
+#define VMM_MAP_ITER_MEM(VMM,PT,PTEI,PTEN,MAP,FILL) \
+ VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \
+ ((u64)MAP->mem->offset << NVKM_RAM_MM_SHIFT), \
+ ((u64)MAP->mem->length << NVKM_RAM_MM_SHIFT), \
+ (MAP->mem = MAP->mem->next))
+#define VMM_MAP_ITER_DMA(VMM,PT,PTEI,PTEN,MAP,FILL) \
+ VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \
+ *MAP->dma, PAGE_SIZE, MAP->dma++)
+#define VMM_MAP_ITER_SGL(VMM,PT,PTEI,PTEN,MAP,FILL) \
+ VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \
+ sg_dma_address(MAP->sgl), sg_dma_len(MAP->sgl), \
+ (MAP->sgl = sg_next(MAP->sgl)))
+
+#define VMM_FO(m,o,d,c,b) nvkm_fo##b((m)->memory, (o), (d), (c))
+#define VMM_WO(m,o,d,c,b) nvkm_wo##b((m)->memory, (o), (d))
+#define VMM_XO(m,v,o,d,c,b,fn,f,a...) do { \
+ const u32 _pteo = (o); u##b _data = (d); \
+ VMM_SPAM((v), " %010llx "f, (m)->addr + _pteo, _data, ##a); \
+ VMM_##fn((m), (m)->base + _pteo, _data, (c), b); \
+} while(0)
+
+#define VMM_WO032(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 32, WO, "%08x")
+#define VMM_FO032(m,v,o,d,c) \
+ VMM_XO((m),(v),(o),(d),(c), 32, FO, "%08x %08x", (c))
+
+#define VMM_WO064(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 64, WO, "%016llx")
+#define VMM_FO064(m,v,o,d,c) \
+ VMM_XO((m),(v),(o),(d),(c), 64, FO, "%016llx %08x", (c))
+
+#define VMM_XO128(m,v,o,lo,hi,c,f,a...) do { \
+ u32 _pteo = (o), _ptes = (c); \
+ const u64 _addr = (m)->addr + _pteo; \
+ VMM_SPAM((v), " %010llx %016llx%016llx"f, _addr, (hi), (lo), ##a); \
+ while (_ptes--) { \
+ nvkm_wo64((m)->memory, (m)->base + _pteo + 0, (lo)); \
+ nvkm_wo64((m)->memory, (m)->base + _pteo + 8, (hi)); \
+ _pteo += 0x10; \
+ } \
+} while(0)
+
+#define VMM_WO128(m,v,o,lo,hi) VMM_XO128((m),(v),(o),(lo),(hi), 1, "")
+#define VMM_FO128(m,v,o,lo,hi,c) do { \
+ nvkm_kmap((m)->memory); \
+ VMM_XO128((m),(v),(o),(lo),(hi),(c), " %08x", (c)); \
+ nvkm_done((m)->memory); \
+} while(0)
#endif