diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2012-12-18 21:47:44 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2013-06-25 17:50:28 -0400 |
commit | 841cf442fd5326683db87e9e4f8050a47d2446da (patch) | |
tree | a08aa444db45478008720ae97f9bfee30d81a64e | |
parent | bc8273fe97019e0cd1cdc893c6b40c0add7e8de3 (diff) |
drm/radeon: Add CP init for CIK (v7)
Sets up the GFX ring and loads ucode for GFX and Compute.
Todo:
- handle compute queue setup.
v2: add documentation
v3: integrate with latest reset changes
v4: additional init fixes
v5: scratch reg write back no longer supported on CIK
v6: properly set CP_RB0_BASE_HI
v7: rebase
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/radeon/Makefile | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cik.c | 395 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cik_blit_shaders.c | 246 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cik_blit_shaders.h | 32 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cikd.h | 222 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_cs.c | 4 |
6 files changed, 899 insertions, 2 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 88d0601e075b..292fd25bbb38 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ - si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o + si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8eec582867b5..5712526a4468 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -30,6 +30,7 @@ #include "radeon_asic.h" #include "cikd.h" #include "atom.h" +#include "cik_blit_shaders.h" /* GFX */ #define CIK_PFP_UCODE_SIZE 2144 @@ -1491,6 +1492,400 @@ static void cik_gpu_init(struct radeon_device *rdev) udelay(50); } +/* + * CP. + * On CIK, gfx and compute now have independant command processors. + * + * GFX + * Gfx consists of a single ring and can process both gfx jobs and + * compute jobs. The gfx CP consists of three microengines (ME): + * PFP - Pre-Fetch Parser + * ME - Micro Engine + * CE - Constant Engine + * The PFP and ME make up what is considered the Drawing Engine (DE). + * The CE is an asynchronous engine used for updating buffer desciptors + * used by the DE so that they can be loaded into cache in parallel + * while the DE is processing state update packets. + * + * Compute + * The compute CP consists of two microengines (ME): + * MEC1 - Compute MicroEngine 1 + * MEC2 - Compute MicroEngine 2 + * Each MEC supports 4 compute pipes and each pipe supports 8 queues. + * The queues are exposed to userspace and are programmed directly + * by the compute runtime. + */ +/** + * cik_cp_gfx_enable - enable/disable the gfx CP MEs + * + * @rdev: radeon_device pointer + * @enable: enable or disable the MEs + * + * Halts or unhalts the gfx MEs. + */ +static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + WREG32(CP_ME_CNTL, 0); + else { + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + } + udelay(50); +} + +/** + * cik_cp_gfx_load_microcode - load the gfx CP ME ucode + * + * @rdev: radeon_device pointer + * + * Loads the gfx PFP, ME, and CE ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) + return -EINVAL; + + cik_cp_gfx_enable(rdev, false); + + /* PFP */ + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_PFP_UCODE_ADDR, 0); + + /* CE */ + fw_data = (const __be32 *)rdev->ce_fw->data; + WREG32(CP_CE_UCODE_ADDR, 0); + for (i = 0; i < CIK_CE_UCODE_SIZE; i++) + WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_CE_UCODE_ADDR, 0); + + /* ME */ + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < CIK_ME_UCODE_SIZE; i++) + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_ME_RAM_WADDR, 0); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_CE_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; +} + +/** + * cik_cp_gfx_start - start the gfx ring + * + * @rdev: radeon_device pointer + * + * Enables the ring and loads the clear state context and other + * packets required to init the ring. + * Returns 0 for success, error for failure. + */ +static int cik_cp_gfx_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + int r, i; + + /* init the CP */ + WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); + WREG32(CP_ENDIAN_SWAP, 0); + WREG32(CP_DEVICE_ID, 1); + + cik_cp_gfx_enable(rdev, true); + + r = radeon_ring_lock(rdev, ring, cik_default_size + 17); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + + /* init the CE partitions. CE only used for gfx on CIK */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); + radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); + radeon_ring_write(ring, 0xc000); + radeon_ring_write(ring, 0xc000); + + /* setup clear context state */ + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + radeon_ring_write(ring, 0x80000000); + radeon_ring_write(ring, 0x80000000); + + for (i = 0; i < cik_default_size; i++) + radeon_ring_write(ring, cik_default_state[i]); + + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + + /* set clear context state */ + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(ring, 0x00000316); + radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ + + radeon_ring_unlock_commit(rdev, ring); + + return 0; +} + +/** + * cik_cp_gfx_fini - stop the gfx ring + * + * @rdev: radeon_device pointer + * + * Stop the gfx ring and tear down the driver ring + * info. + */ +static void cik_cp_gfx_fini(struct radeon_device *rdev) +{ + cik_cp_gfx_enable(rdev, false); + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); +} + +/** + * cik_cp_gfx_resume - setup the gfx ring buffer registers + * + * @rdev: radeon_device pointer + * + * Program the location and size of the gfx ring buffer + * and test it to make sure it's working. + * Returns 0 for success, error for failure. + */ +static int cik_cp_gfx_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 tmp; + u32 rb_bufsz; + u64 rb_addr; + int r; + + WREG32(CP_SEM_WAIT_TIMER, 0x0); + WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); + + /* Set the write pointer delay */ + WREG32(CP_RB_WPTR_DELAY, 0); + + /* set the RB to use vmid 0 */ + WREG32(CP_RB_VMID, 0); + + WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); + + /* ring 0 - compute and gfx */ + /* Set ring buffer size */ + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + rb_bufsz = drm_order(ring->ring_size / 8); + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; +#ifdef __BIG_ENDIAN + tmp |= BUF_SWAP_32BIT; +#endif + WREG32(CP_RB0_CNTL, tmp); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); + ring->wptr = 0; + WREG32(CP_RB0_WPTR, ring->wptr); + + /* set the wb address wether it's enabled or not */ + WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); + WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); + + /* scratch register shadowing is no longer supported */ + WREG32(SCRATCH_UMSK, 0); + + if (!rdev->wb.enabled) + tmp |= RB_NO_UPDATE; + + mdelay(1); + WREG32(CP_RB0_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32(CP_RB0_BASE, rb_addr); + WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); + + ring->rptr = RREG32(CP_RB0_RPTR); + + /* start the ring */ + cik_cp_gfx_start(rdev); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; + r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + if (r) { + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + return r; + } + return 0; +} + +/** + * cik_cp_compute_enable - enable/disable the compute CP MEs + * + * @rdev: radeon_device pointer + * @enable: enable or disable the MEs + * + * Halts or unhalts the compute MEs. + */ +static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + WREG32(CP_MEC_CNTL, 0); + else + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); + udelay(50); +} + +/** + * cik_cp_compute_load_microcode - load the compute CP ME ucode + * + * @rdev: radeon_device pointer + * + * Loads the compute MEC1&2 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int cik_cp_compute_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->mec_fw) + return -EINVAL; + + cik_cp_compute_enable(rdev, false); + + /* MEC1 */ + fw_data = (const __be32 *)rdev->mec_fw->data; + WREG32(CP_MEC_ME1_UCODE_ADDR, 0); + for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) + WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_MEC_ME1_UCODE_ADDR, 0); + + if (rdev->family == CHIP_KAVERI) { + /* MEC2 */ + fw_data = (const __be32 *)rdev->mec_fw->data; + WREG32(CP_MEC_ME2_UCODE_ADDR, 0); + for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) + WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_MEC_ME2_UCODE_ADDR, 0); + } + + return 0; +} + +/** + * cik_cp_compute_start - start the compute queues + * + * @rdev: radeon_device pointer + * + * Enable the compute queues. + * Returns 0 for success, error for failure. + */ +static int cik_cp_compute_start(struct radeon_device *rdev) +{ + //todo + return 0; +} + +/** + * cik_cp_compute_fini - stop the compute queues + * + * @rdev: radeon_device pointer + * + * Stop the compute queues and tear down the driver queue + * info. + */ +static void cik_cp_compute_fini(struct radeon_device *rdev) +{ + cik_cp_compute_enable(rdev, false); + //todo +} + +/** + * cik_cp_compute_resume - setup the compute queue registers + * + * @rdev: radeon_device pointer + * + * Program the compute queues and test them to make sure they + * are working. + * Returns 0 for success, error for failure. + */ +static int cik_cp_compute_resume(struct radeon_device *rdev) +{ + int r; + + //todo + r = cik_cp_compute_start(rdev); + if (r) + return r; + return 0; +} + +/* XXX temporary wrappers to handle both compute and gfx */ +/* XXX */ +static void cik_cp_enable(struct radeon_device *rdev, bool enable) +{ + cik_cp_gfx_enable(rdev, enable); + cik_cp_compute_enable(rdev, enable); +} + +/* XXX */ +static int cik_cp_load_microcode(struct radeon_device *rdev) +{ + int r; + + r = cik_cp_gfx_load_microcode(rdev); + if (r) + return r; + r = cik_cp_compute_load_microcode(rdev); + if (r) + return r; + + return 0; +} + +/* XXX */ +static void cik_cp_fini(struct radeon_device *rdev) +{ + cik_cp_gfx_fini(rdev); + cik_cp_compute_fini(rdev); +} + +/* XXX */ +static int cik_cp_resume(struct radeon_device *rdev) +{ + int r; + + /* Reset all cp blocks */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + RREG32(GRBM_SOFT_RESET); + + r = cik_cp_load_microcode(rdev); + if (r) + return r; + + r = cik_cp_gfx_resume(rdev); + if (r) + return r; + r = cik_cp_compute_resume(rdev); + if (r) + return r; + + return 0; +} + /** * cik_gpu_is_lockup - check if the 3D engine is locked up * diff --git a/drivers/gpu/drm/radeon/cik_blit_shaders.c b/drivers/gpu/drm/radeon/cik_blit_shaders.c new file mode 100644 index 000000000000..ff1311806e91 --- /dev/null +++ b/drivers/gpu/drm/radeon/cik_blit_shaders.c @@ -0,0 +1,246 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ + +#include <linux/types.h> +#include <linux/bug.h> +#include <linux/kernel.h> + +const u32 cik_default_state[] = +{ + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0046900, + 0x00000008, + 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ + 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0046900, + 0x00000100, + 0xffffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + + 0xc0046900, + 0x00000105, + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00c6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; + +const u32 cik_default_size = ARRAY_SIZE(cik_default_state); diff --git a/drivers/gpu/drm/radeon/cik_blit_shaders.h b/drivers/gpu/drm/radeon/cik_blit_shaders.h new file mode 100644 index 000000000000..dfe7314f9ff4 --- /dev/null +++ b/drivers/gpu/drm/radeon/cik_blit_shaders.h @@ -0,0 +1,32 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef CIK_BLIT_SHADERS_H +#define CIK_BLIT_SHADERS_H + +extern const u32 cik_default_state[]; + +extern const u32 cik_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 2300ae0f09da..0d1a29849a7e 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -263,11 +263,18 @@ #define MEC_ME2_HALT (1 << 28) #define MEC_ME1_HALT (1 << 30) +#define CP_MEC_CNTL 0x8234 +#define MEC_ME2_HALT (1 << 28) +#define MEC_ME1_HALT (1 << 30) + #define CP_ME_CNTL 0x86D8 #define CP_CE_HALT (1 << 24) #define CP_PFP_HALT (1 << 26) #define CP_ME_HALT (1 << 28) +#define CP_RB0_RPTR 0x8700 +#define CP_RB_WPTR_DELAY 0x8704 + #define CP_MEQ_THRESHOLDS 0x8764 #define MEQ1_START(x) ((x) << 0) #define MEQ2_START(x) ((x) << 8) @@ -445,12 +452,62 @@ #define TC_CFG_L1_VOLATILE 0xAC88 #define TC_CFG_L2_VOLATILE 0xAC8C +#define CP_RB0_BASE 0xC100 +#define CP_RB0_CNTL 0xC104 +#define RB_BUFSZ(x) ((x) << 0) +#define RB_BLKSZ(x) ((x) << 8) +#define BUF_SWAP_32BIT (2 << 16) +#define RB_NO_UPDATE (1 << 27) +#define RB_RPTR_WR_ENA (1 << 31) + +#define CP_RB0_RPTR_ADDR 0xC10C +#define RB_RPTR_SWAP_32BIT (2 << 0) +#define CP_RB0_RPTR_ADDR_HI 0xC110 +#define CP_RB0_WPTR 0xC114 + +#define CP_DEVICE_ID 0xC12C +#define CP_ENDIAN_SWAP 0xC140 +#define CP_RB_VMID 0xC144 + +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_ME_RAM_DATA 0xC160 + +#define CP_CE_UCODE_ADDR 0xC168 +#define CP_CE_UCODE_DATA 0xC16C +#define CP_MEC_ME1_UCODE_ADDR 0xC170 +#define CP_MEC_ME1_UCODE_DATA 0xC174 +#define CP_MEC_ME2_UCODE_ADDR 0xC178 +#define CP_MEC_ME2_UCODE_DATA 0xC17C + +#define CP_MAX_CONTEXT 0xC2B8 + +#define CP_RB0_BASE_HI 0xC2C4 + #define PA_SC_RASTER_CONFIG 0x28350 # define RASTER_CONFIG_RB_MAP_0 0 # define RASTER_CONFIG_RB_MAP_1 1 # define RASTER_CONFIG_RB_MAP_2 2 # define RASTER_CONFIG_RB_MAP_3 3 +#define SCRATCH_REG0 0x30100 +#define SCRATCH_REG1 0x30104 +#define SCRATCH_REG2 0x30108 +#define SCRATCH_REG3 0x3010C +#define SCRATCH_REG4 0x30110 +#define SCRATCH_REG5 0x30114 +#define SCRATCH_REG6 0x30118 +#define SCRATCH_REG7 0x3011C + +#define SCRATCH_UMSK 0x30140 +#define SCRATCH_ADDR 0x30144 + +#define CP_SEM_WAIT_TIMER 0x301BC + +#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x301C8 + #define GRBM_GFX_INDEX 0x30800 #define INSTANCE_INDEX(x) ((x) << 0) #define SH_INDEX(x) ((x) << 8) @@ -482,4 +539,169 @@ #define TCC_DISABLE_MASK 0xFFFF0000 #define TCC_DISABLE_SHIFT 16 +/* + * PM4 + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_SET_BASE 0x11 +#define PACKET3_BASE_INDEX(x) ((x) << 0) +#define CE_PARTITION_BASE 3 +#define PACKET3_CLEAR_STATE 0x12 +#define PACKET3_INDEX_BUFFER_SIZE 0x13 +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_DISPATCH_INDIRECT 0x16 +#define PACKET3_ATOMIC_GDS 0x1D +#define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_OCCLUSION_QUERY 0x1F +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_DRAW_INDIRECT 0x24 +#define PACKET3_DRAW_INDEX_INDIRECT 0x25 +#define PACKET3_INDEX_BASE 0x26 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 +#define PACKET3_INDIRECT_BUFFER_CONST 0x33 +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 +#define PACKET3_DRAW_PREAMBLE 0x36 +#define PACKET3_WRITE_DATA 0x37 +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +#define PACKET3_COPY_DW 0x3B +#define PACKET3_WAIT_REG_MEM 0x3C +#define PACKET3_INDIRECT_BUFFER 0x3F +#define PACKET3_COPY_DATA 0x40 +#define PACKET3_PFP_SYNC_ME 0x42 +#define PACKET3_SURFACE_SYNC 0x43 +# define PACKET3_DEST_BASE_0_ENA (1 << 0) +# define PACKET3_DEST_BASE_1_ENA (1 << 1) +# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_CB1_DEST_BASE_ENA (1 << 7) +# define PACKET3_CB2_DEST_BASE_ENA (1 << 8) +# define PACKET3_CB3_DEST_BASE_ENA (1 << 9) +# define PACKET3_CB4_DEST_BASE_ENA (1 << 10) +# define PACKET3_CB5_DEST_BASE_ENA (1 << 11) +# define PACKET3_CB6_DEST_BASE_ENA (1 << 12) +# define PACKET3_CB7_DEST_BASE_ENA (1 << 13) +# define PACKET3_DB_DEST_BASE_ENA (1 << 14) +# define PACKET3_TCL1_VOL_ACTION_ENA (1 << 15) +# define PACKET3_TC_VOL_ACTION_ENA (1 << 16) /* L2 */ +# define PACKET3_TC_WB_ACTION_ENA (1 << 18) /* L2 */ +# define PACKET3_DEST_BASE_2_ENA (1 << 19) +# define PACKET3_DEST_BASE_3_ENA (1 << 21) +# define PACKET3_TCL1_ACTION_ENA (1 << 22) +# define PACKET3_TC_ACTION_ENA (1 << 23) /* L2 */ +# define PACKET3_CB_ACTION_ENA (1 << 25) +# define PACKET3_DB_ACTION_ENA (1 << 26) +# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27) +# define PACKET3_SH_KCACHE_VOL_ACTION_ENA (1 << 28) +# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29) +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_* + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - EOP events + * 6 - EOS events + */ +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define EOP_TCL1_VOL_ACTION_EN (1 << 12) +#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */ +#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ +#define EOP_TCL1_ACTION_EN (1 << 16) +#define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + * 2 - Bypass + */ +#define TCL2_VOLATILE (1 << 27) +#define DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit GPU counter value + * 4 - send 64bit sys counter value + */ +#define INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ +#define DST_SEL(x) ((x) << 16) + /* 0 - MC + * 1 - TC/L2 + */ +#define PACKET3_EVENT_WRITE_EOS 0x48 +#define PACKET3_RELEASE_MEM 0x49 +#define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) +#define PACKET3_DMA_DATA 0x50 +#define PACKET3_AQUIRE_MEM 0x58 +#define PACKET3_REWIND 0x59 +#define PACKET3_LOAD_UCONFIG_REG 0x5E +#define PACKET3_LOAD_SH_REG 0x5F +#define PACKET3_LOAD_CONFIG_REG 0x60 +#define PACKET3_LOAD_CONTEXT_REG 0x61 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_START 0x00008000 +#define PACKET3_SET_CONFIG_REG_END 0x0000b000 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_START 0x00028000 +#define PACKET3_SET_CONTEXT_REG_END 0x00029000 +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x0000b000 +#define PACKET3_SET_SH_REG_END 0x0000c000 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_SET_QUEUE_REG 0x78 +#define PACKET3_SET_UCONFIG_REG 0x79 +#define PACKET3_SCRATCH_RAM_WRITE 0x7D +#define PACKET3_SCRATCH_RAM_READ 0x7E +#define PACKET3_LOAD_CONST_RAM 0x80 +#define PACKET3_WRITE_CONST_RAM 0x81 +#define PACKET3_DUMP_CONST_RAM 0x83 +#define PACKET3_INCREMENT_CE_COUNTER 0x84 +#define PACKET3_INCREMENT_DE_COUNTER 0x85 +#define PACKET3_WAIT_ON_CE_COUNTER 0x86 +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 + + #endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 7e265a58141f..cf71734a13d0 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -121,7 +121,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority p->ring = RADEON_RING_TYPE_GFX_INDEX; break; case RADEON_CS_RING_COMPUTE: - if (p->rdev->family >= CHIP_TAHITI) { + if (p->rdev->family >= CHIP_BONAIRE) + p->ring = RADEON_RING_TYPE_GFX_INDEX; + else if (p->rdev->family >= CHIP_TAHITI) { if (p->priority > 0) p->ring = CAYMAN_RING_TYPE_CP1_INDEX; else |