From ef9b189352f2eb78f14e52996f4780a523b04a49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Dec 2008 12:17:33 -0800 Subject: swiotlb: support bouncing of HighMem pages Impact: prepare the swiotlb code for HighMem struct pages This requires us to treat DMA regions in terms of page+offset rather than virtual addressing since a HighMem page may not have a mapping. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 122 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a0b4039e2880..1661af593914 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -38,9 +39,6 @@ #define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) -#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) -#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) - #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* @@ -91,7 +89,10 @@ static unsigned int io_tlb_index; * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ -static unsigned char **io_tlb_orig_addr; +static struct swiotlb_phys_addr { + struct page *page; + unsigned int offset; +} *io_tlb_orig_addr; /* * Protect the above data structures in the map and unmap calls @@ -150,6 +151,11 @@ int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) return 0; } +static dma_addr_t swiotlb_sg_to_bus(struct scatterlist *sg) +{ + return swiotlb_phys_to_bus(page_to_phys(sg_page(sg)) + sg->offset); +} + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -183,7 +189,7 @@ swiotlb_init_with_default_size(size_t default_size) for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr)); /* * Get the overflow emergency buffer @@ -258,12 +264,12 @@ swiotlb_late_init_with_default_size(size_t default_size) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(char *))); + io_tlb_orig_addr = (struct swiotlb_phys_addr *)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(struct swiotlb_phys_addr))); if (!io_tlb_orig_addr) goto cleanup3; - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(struct swiotlb_phys_addr)); /* * Get the overflow emergency buffer @@ -312,20 +318,59 @@ static int is_swiotlb_buffer(char *addr) return addr >= io_tlb_start && addr < io_tlb_end; } -static void -__sync_single(char *buffer, char *dma_addr, size_t size, int dir) +static struct swiotlb_phys_addr swiotlb_bus_to_phys_addr(char *dma_addr) { - if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, buffer, size); - else - memcpy(buffer, dma_addr, size); + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + struct swiotlb_phys_addr buffer = io_tlb_orig_addr[index]; + buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1); + buffer.page += buffer.offset >> PAGE_SHIFT; + buffer.offset &= PAGE_SIZE - 1; + return buffer; +} + +static void +__sync_single(struct swiotlb_phys_addr buffer, char *dma_addr, size_t size, int dir) +{ + if (PageHighMem(buffer.page)) { + size_t len, bytes; + char *dev, *host, *kmp; + + len = size; + while (len != 0) { + unsigned long flags; + + bytes = len; + if ((bytes + buffer.offset) > PAGE_SIZE) + bytes = PAGE_SIZE - buffer.offset; + local_irq_save(flags); /* protects KM_BOUNCE_READ */ + kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ); + dev = dma_addr + size - len; + host = kmp + buffer.offset; + if (dir == DMA_FROM_DEVICE) + memcpy(host, dev, bytes); + else + memcpy(dev, host, bytes); + kunmap_atomic(kmp, KM_BOUNCE_READ); + local_irq_restore(flags); + len -= bytes; + buffer.page++; + buffer.offset = 0; + } + } else { + void *v = page_address(buffer.page) + buffer.offset; + + if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, v, size); + else + memcpy(v, dma_addr, size); + } } /* * Allocates bounce buffer and returns its kernel virtual address. */ static void * -map_single(struct device *hwdev, char *buffer, size_t size, int dir) +map_single(struct device *hwdev, struct swiotlb_phys_addr buffer, size_t size, int dir) { unsigned long flags; char *dma_addr; @@ -335,6 +380,7 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) unsigned long mask; unsigned long offset_slots; unsigned long max_slots; + struct swiotlb_phys_addr slot_buf; mask = dma_get_seg_boundary(hwdev); start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask; @@ -419,8 +465,13 @@ found: * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT); + slot_buf = buffer; + for (i = 0; i < nslots; i++) { + slot_buf.page += slot_buf.offset >> PAGE_SHIFT; + slot_buf.offset &= PAGE_SIZE - 1; + io_tlb_orig_addr[index+i] = slot_buf; + slot_buf.offset += 1 << IO_TLB_SHIFT; + } if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); @@ -436,12 +487,12 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; + struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr); /* * First, sync the memory before unmapping the entry */ - if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) /* * bounce... copy the data back into the original buffer * and * delete the bounce buffer. @@ -478,10 +529,7 @@ static void sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir, int target) { - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); + struct swiotlb_phys_addr buffer = swiotlb_bus_to_phys_addr(dma_addr); switch (target) { case SYNC_FOR_CPU: @@ -529,7 +577,10 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, * swiotlb_map_single(), which will grab memory from * the lowest available address range. */ - ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE); + struct swiotlb_phys_addr buffer; + buffer.page = virt_to_page(NULL); + buffer.offset = 0; + ret = map_single(hwdev, buffer, size, DMA_FROM_DEVICE); if (!ret) return NULL; } @@ -597,6 +648,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, { dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr); void *map; + struct swiotlb_phys_addr buffer; BUG_ON(dir == DMA_NONE); /* @@ -611,7 +663,9 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, /* * Oh well, have to allocate and map a bounce buffer. */ - map = map_single(hwdev, ptr, size, dir); + buffer.page = virt_to_page(ptr); + buffer.offset = (unsigned long)ptr & ~PAGE_MASK; + map = map_single(hwdev, buffer, size, dir); if (!map) { swiotlb_full(hwdev, size, dir, 1); map = io_tlb_overflow_buffer; @@ -756,18 +810,20 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir, struct dma_attrs *attrs) { struct scatterlist *sg; - void *addr; + struct swiotlb_phys_addr buffer; dma_addr_t dev_addr; int i; BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); - dev_addr = swiotlb_virt_to_bus(addr); + dev_addr = swiotlb_sg_to_bus(sg); if (range_needs_mapping(sg_virt(sg), sg->length) || address_needs_mapping(hwdev, dev_addr, sg->length)) { - void *map = map_single(hwdev, addr, sg->length, dir); + void *map; + buffer.page = sg_page(sg); + buffer.offset = sg->offset; + map = map_single(hwdev, buffer, sg->length, dir); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ @@ -807,11 +863,11 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + if (sg->dma_address != swiotlb_sg_to_bus(sg)) unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), sg->dma_length, dir); else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); + dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); } } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); @@ -840,11 +896,11 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) + if (sg->dma_address != swiotlb_sg_to_bus(sg)) sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), sg->dma_length, dir, target); else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); + dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); } } -- cgit v1.2.3