From 43fb778754ffd1ef8b4f579a5c94f118292acfb5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 4 Jan 2013 09:24:18 -0500 Subject: drm/radeon: split r6xx and r7xx copy_dma functions - r6xx actually uses a slightly different packet format, although both formats seem to work ok. - r7xx doesn't have the count multiple of 2 limitation. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 10 ++--- drivers/gpu/drm/radeon/radeon_asic.c | 4 +- drivers/gpu/drm/radeon/radeon_asic.h | 4 ++ drivers/gpu/drm/radeon/rv770.c | 74 ++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 923f93647042..537e259b3837 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2646,7 +2646,7 @@ int r600_copy_blit(struct radeon_device *rdev, * @num_gpu_pages: number of GPU pages to xfer * @fence: radeon fence object * - * Copy GPU paging using the DMA engine (r6xx-r7xx). + * Copy GPU paging using the DMA engine (r6xx). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ @@ -2669,8 +2669,8 @@ int r600_copy_dma(struct radeon_device *rdev, } size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xffff); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); + num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); + r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); @@ -2693,8 +2693,8 @@ int r600_copy_dma(struct radeon_device *rdev, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); radeon_ring_write(ring, dst_offset & 0xfffffffc); radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | + (upper_32_bits(src_offset) & 0xff))); src_offset += cur_size_in_dw * 4; dst_offset += cur_size_in_dw * 4; } diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 596bcbe80ed0..9056fafb00ea 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1140,9 +1140,9 @@ static struct radeon_asic rv770_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = &r600_copy_dma, + .dma = &rv770_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_dma, + .copy = &rv770_copy_dma, .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 5f4882cc2152..15d70e613076 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -403,6 +403,10 @@ u32 rv770_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base); void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); void r700_cp_stop(struct radeon_device *rdev); void r700_cp_fini(struct radeon_device *rdev); +int rv770_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence); /* * evergreen diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 87c979c4f721..1b2444f4d8f4 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -887,6 +887,80 @@ static int rv770_mc_init(struct radeon_device *rdev) return 0; } +/** + * rv770_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (r7xx). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int rv770_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFF) + cur_size_in_dw = 0xFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + static int rv770_startup(struct radeon_device *rdev) { struct radeon_ring *ring; -- cgit v1.2.3 From a10fbb4224e53c6b3528d671051be4b98bd8e9b9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 4 Jan 2013 12:16:05 -0500 Subject: drm/radeon: fix DMA CS parser for r6xx linear copy packet Was using the r7xx format. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cs.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 9ea13d07cc55..03191a56eb44 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2677,16 +2677,29 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) } p->idx += 7; } else { - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + if (p->family >= CHIP_RV770) { + src_offset = ib[idx+2]; + src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - p->idx += 5; + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 5; + } else { + src_offset = ib[idx+2]; + src_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0xff0000)) << 16; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16; + p->idx += 4; + } } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n", -- cgit v1.2.3 From d1f9809ed1315c4cdc5760cf2f59626fd3276952 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 7 Jan 2013 15:18:47 +0100 Subject: drm/radeon: add quirk for d3 delay during switcheroo poweron for apple macbooks vga-switcheroo with apple-gmux does not switch correctly on my system. The PCI configuration space is not restored correctly, resulting in MSI not working after switch. Only useful item in dmesg is: [ 33.922807] radeon 0000:01:00.0: Refused to change power state, currently in D3 I did some testing, dumping the difference in ms between first succesful switch from D3 to D0, and it seems that there is slightly more than 20 ms difference when the device is re-enabled through vga-switcheroo. So bump the re-enable d3 delay to 20 ms to handle this, which fixes msi not working on my system after switcheroo-ing. Default d3_delay value is PCI_PM_D3_WAIT, 10 ms. Signed-off-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index cd756262924d..edfc54e41842 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -896,6 +896,25 @@ static void radeon_check_arguments(struct radeon_device *rdev) } } +/** + * radeon_switcheroo_quirk_long_wakeup - return true if longer d3 delay is + * needed for waking up. + * + * @pdev: pci dev pointer + */ +static bool radeon_switcheroo_quirk_long_wakeup(struct pci_dev *pdev) +{ + + /* 6600m in a macbook pro */ + if (pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE && + pdev->subsystem_device == 0x00e2) { + printk(KERN_INFO "radeon: quirking longer d3 wakeup delay\n"); + return true; + } + + return false; +} + /** * radeon_switcheroo_set_state - set switcheroo state * @@ -910,10 +929,19 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero struct drm_device *dev = pci_get_drvdata(pdev); pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; if (state == VGA_SWITCHEROO_ON) { + unsigned d3_delay = dev->pdev->d3_delay; + printk(KERN_INFO "radeon: switched on\n"); /* don't suspend or resume card normally */ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + + if (d3_delay < 20 && radeon_switcheroo_quirk_long_wakeup(pdev)) + dev->pdev->d3_delay = 20; + radeon_resume_kms(dev); + + dev->pdev->d3_delay = d3_delay; + dev->switch_power_state = DRM_SWITCH_POWER_ON; drm_kms_helper_poll_enable(dev); } else { -- cgit v1.2.3