diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 199 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.h | 73 |
3 files changed, 219 insertions, 62 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 092f25cfb8d5..69190af87a42 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3510,6 +3510,15 @@ search_free: if (ret) goto err_remove_node; + /* allocate before insert / bind */ + if (vma->vm->allocate_va_range) { + ret = vma->vm->allocate_va_range(vma->vm, + vma->node.start, + vma->node.size); + if (ret) + goto err_remove_node; + } + trace_i915_vma_bind(vma, flags); ret = i915_vma_bind(vma, obj->cache_level, flags & PIN_GLOBAL ? GLOBAL_BIND : 0); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6f5a0cf77fad..7d206b59eb3c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -279,29 +279,88 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct drm_device *dev) +#define i915_dma_unmap_single(px, dev) \ + __i915_dma_unmap_single((px)->daddr, dev) + +static inline void __i915_dma_unmap_single(dma_addr_t daddr, + struct drm_device *dev) +{ + struct device *device = &dev->pdev->dev; + + dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL); +} + +/** + * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc. + * @px: Page table/dir/etc to get a DMA map for + * @dev: drm device + * + * Page table allocations are unified across all gens. They always require a + * single 4k allocation, as well as a DMA mapping. If we keep the structs + * symmetric here, the simple macro covers us for every page table type. + * + * Return: 0 if success. + */ +#define i915_dma_map_single(px, dev) \ + i915_dma_map_page_single((px)->page, (dev), &(px)->daddr) + +static inline int i915_dma_map_page_single(struct page *page, + struct drm_device *dev, + dma_addr_t *daddr) +{ + struct device *device = &dev->pdev->dev; + + *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); + return dma_mapping_error(device, *daddr); +} + +static void unmap_and_free_pt(struct i915_page_table_entry *pt, + struct drm_device *dev) { if (WARN_ON(!pt->page)) return; + + i915_dma_unmap_single(pt, dev); __free_page(pt->page); + kfree(pt->used_ptes); kfree(pt); } static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev) { struct i915_page_table_entry *pt; + const size_t count = INTEL_INFO(dev)->gen >= 8 ? + GEN8_PTES : GEN6_PTES; + int ret = -ENOMEM; pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return ERR_PTR(-ENOMEM); + pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), + GFP_KERNEL); + + if (!pt->used_ptes) + goto fail_bitmap; + pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pt->page) { - kfree(pt); - return ERR_PTR(-ENOMEM); - } + if (!pt->page) + goto fail_page; + + ret = i915_dma_map_single(pt, dev); + if (ret) + goto fail_dma; return pt; + +fail_dma: + __free_page(pt->page); +fail_page: + kfree(pt->used_ptes); +fail_bitmap: + kfree(pt); + + return ERR_PTR(ret); } /** @@ -848,26 +907,36 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } } -static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) +/* Write pde (index) from the page directory @pd to the page table @pt */ +static void gen6_write_pde(struct i915_page_directory_entry *pd, + const int pde, struct i915_page_table_entry *pt) { - struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; - gen6_pte_t __iomem *pd_addr; - uint32_t pd_entry; - int i; + /* Caller needs to make sure the write completes if necessary */ + struct i915_hw_ppgtt *ppgtt = + container_of(pd, struct i915_hw_ppgtt, pd); + u32 pd_entry; - WARN_ON(ppgtt->pd.pd_offset & 0x3f); - pd_addr = (gen6_pte_t __iomem*)dev_priv->gtt.gsm + - ppgtt->pd.pd_offset / sizeof(gen6_pte_t); - for (i = 0; i < ppgtt->num_pd_entries; i++) { - dma_addr_t pt_addr; + pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr); + pd_entry |= GEN6_PDE_VALID; - pt_addr = ppgtt->pd.page_table[i]->daddr; - pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); - pd_entry |= GEN6_PDE_VALID; + writel(pd_entry, ppgtt->pd_addr + pde); +} - writel(pd_entry, pd_addr + i); - } - readl(pd_addr); +/* Write all the page tables found in the ppgtt structure to incrementing page + * directories. */ +static void gen6_write_page_range(struct drm_i915_private *dev_priv, + struct i915_page_directory_entry *pd, + uint32_t start, uint32_t length) +{ + struct i915_page_table_entry *pt; + uint32_t pde, temp; + + gen6_for_each_pde(pt, pd, start, length, temp, pde) + gen6_write_pde(pd, pde, pt); + + /* Make sure write is complete before other code can use this page + * table. Also require for WC mapped PTEs */ + readl(dev_priv->gtt.gsm); } static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) @@ -1093,6 +1162,28 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 4096, PCI_DMA_BIDIRECTIONAL); } +static int gen6_alloc_va_range(struct i915_address_space *vm, + uint64_t start, uint64_t length) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct i915_page_table_entry *pt; + uint32_t pde, temp; + + gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { + DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); + + bitmap_zero(tmp_bitmap, GEN6_PTES); + bitmap_set(tmp_bitmap, gen6_pte_index(start), + gen6_pte_count(start, length)); + + bitmap_or(pt->used_ptes, pt->used_ptes, tmp_bitmap, + GEN6_PTES); + } + + return 0; +} + static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) { int i; @@ -1139,20 +1230,24 @@ alloc: 0, dev_priv->gtt.base.total, 0); if (ret) - return ret; + goto err_out; retried = true; goto alloc; } if (ret) - return ret; + goto err_out; + if (ppgtt->node.start < dev_priv->gtt.mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); ppgtt->num_pd_entries = I915_PDES; return 0; + +err_out: + return ret; } static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) @@ -1174,30 +1269,6 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) return 0; } -static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) -{ - struct drm_device *dev = ppgtt->base.dev; - int i; - - for (i = 0; i < ppgtt->num_pd_entries; i++) { - struct page *page; - dma_addr_t pt_addr; - - page = ppgtt->pd.page_table[i]->page; - pt_addr = pci_map_page(dev->pdev, page, 0, 4096, - PCI_DMA_BIDIRECTIONAL); - - if (pci_dma_mapping_error(dev->pdev, pt_addr)) { - gen6_ppgtt_unmap_pages(ppgtt); - return -EIO; - } - - ppgtt->pd.page_table[i]->daddr = pt_addr; - } - - return 0; -} - static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { struct drm_device *dev = ppgtt->base.dev; @@ -1221,12 +1292,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ret = gen6_ppgtt_setup_page_tables(ppgtt); - if (ret) { - gen6_ppgtt_free(ppgtt); - return ret; - } - + ppgtt->base.allocate_va_range = gen6_alloc_va_range; ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; @@ -1237,13 +1303,17 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->pd.pd_offset = ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); + ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm + + ppgtt->pd.pd_offset / sizeof(gen6_pte_t); + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); + gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); + DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); - gen6_write_pdes(ppgtt); DRM_DEBUG("Adding PPGTT at offset %x\n", ppgtt->pd.pd_offset << 10); @@ -1514,15 +1584,20 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) return; } - list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - /* TODO: Perhaps it shouldn't be gen6 specific */ - if (i915_is_ggtt(vm)) { - if (dev_priv->mm.aliasing_ppgtt) - gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); - continue; - } + if (USES_PPGTT(dev)) { + list_for_each_entry(vm, &dev_priv->vm_list, global_link) { + /* TODO: Perhaps it shouldn't be gen6 specific */ + + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, + base); - gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); + if (i915_is_ggtt(vm)) + ppgtt = dev_priv->mm.aliasing_ppgtt; + + gen6_write_page_range(dev_priv, &ppgtt->pd, + 0, ppgtt->base.total); + } } i915_ggtt_flush(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5ca7c5eff88b..7472312deff9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -55,10 +55,12 @@ typedef uint64_t gen8_pde_t; #define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) #define I915_PDES 512 #define I915_PDE_MASK (I915_PDES - 1) +#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT)) #define GEN6_PTES I915_PTES(sizeof(gen6_pte_t)) #define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE) #define GEN6_PD_ALIGN (PAGE_SIZE * 16) +#define GEN6_PDE_SHIFT 22 #define GEN6_PDE_VALID (1 << 0) #define GEN7_PTE_CACHE_L3_LLC (3 << 1) @@ -194,6 +196,8 @@ struct i915_vma { struct i915_page_table_entry { struct page *page; dma_addr_t daddr; + + unsigned long *used_ptes; }; struct i915_page_directory_entry { @@ -250,6 +254,9 @@ struct i915_address_space { gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, bool valid, u32 flags); /* Create a valid PTE */ + int (*allocate_va_range)(struct i915_address_space *vm, + uint64_t start, + uint64_t length); void (*clear_range)(struct i915_address_space *vm, uint64_t start, uint64_t length, @@ -302,12 +309,78 @@ struct i915_hw_ppgtt { struct drm_i915_file_private *file_priv; + gen6_pte_t __iomem *pd_addr; + int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *ring); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; +/* For each pde iterates over every pde between from start until start + length. + * If start, and start+length are not perfectly divisible, the macro will round + * down, and up as needed. The macro modifies pde, start, and length. Dev is + * only used to differentiate shift values. Temp is temp. On gen6/7, start = 0, + * and length = 2G effectively iterates over every PDE in the system. + * + * XXX: temp is not actually needed, but it saves doing the ALIGN operation. + */ +#define gen6_for_each_pde(pt, pd, start, length, temp, iter) \ + for (iter = gen6_pde_index(start), pt = (pd)->page_table[iter]; \ + length > 0 && iter < I915_PDES; \ + pt = (pd)->page_table[++iter], \ + temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \ + temp = min_t(unsigned, temp, length), \ + start += temp, length -= temp) + +static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +{ + const uint32_t mask = NUM_PTE(pde_shift) - 1; + + return (address >> PAGE_SHIFT) & mask; +} + +/* Helper to counts the number of PTEs within the given length. This count + * does not cross a page table boundary, so the max value would be + * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. +*/ +static inline uint32_t i915_pte_count(uint64_t addr, size_t length, + uint32_t pde_shift) +{ + const uint64_t mask = ~((1 << pde_shift) - 1); + uint64_t end; + + WARN_ON(length == 0); + WARN_ON(offset_in_page(addr|length)); + + end = addr + length; + + if ((addr & mask) != (end & mask)) + return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift); + + return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); +} + +static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +{ + return (addr >> shift) & I915_PDE_MASK; +} + +static inline uint32_t gen6_pte_index(uint32_t addr) +{ + return i915_pte_index(addr, GEN6_PDE_SHIFT); +} + +static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +{ + return i915_pte_count(addr, length, GEN6_PDE_SHIFT); +} + +static inline uint32_t gen6_pde_index(uint32_t addr) +{ + return i915_pde_index(addr, GEN6_PDE_SHIFT); +} + int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); void i915_global_gtt_cleanup(struct drm_device *dev); |