diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c')
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 423 |
1 files changed, 313 insertions, 110 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 881015080d83..e813a3f8ea93 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -850,12 +850,17 @@ gf100_grctx_init_gcc_0[] = { }; const struct gf100_gr_pack -gf100_grctx_pack_gpc[] = { +gf100_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf100_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gf100_grctx_pack_gpc_1[] = { { gf100_grctx_init_crstr_0 }, { gf100_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -1025,6 +1030,13 @@ gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data, } void +gf100_grctx_generate_r419cb8(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000); +} + +void gf100_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; @@ -1080,89 +1092,38 @@ gf100_grctx_generate_unkn(struct gf100_gr *gr) } void -gf100_grctx_generate_tpcid(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; - - for (tpc = 0, id = 0; tpc < 4; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); - } - } -} - -void -gf100_grctx_generate_r406028(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 tmp[GPC_MAX / 8] = {}, i = 0; - for (i = 0; i < gr->gpc_nr; i++) - tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4); - for (i = 0; i < 4; i++) { - nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]); - nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]); - } -} - -void gf100_grctx_generate_r4060a8(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u8 tpcnr[GPC_MAX], data[TPC_MAX]; - int gpc, tpc, i; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - memset(data, 0x1f, sizeof(data)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - data[tpc] = gpc; + const u8 gpcmax = nvkm_rd32(device, 0x022430); + const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax; + int i, j, sm = 0; + u32 data; + + for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) { + for (data = 0, j = 0; j < 4; j++) { + if (sm < gr->sm_nr) + data |= gr->sm[sm++].gpc << (j * 8); + else + data |= 0x1f << (j * 8); + } + nvkm_wr32(device, 0x4060a8 + (i * 4), data); } - - for (i = 0; i < 4; i++) - nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]); } void -gf100_grctx_generate_r418bb8(struct gf100_gr *gr) +gf100_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } + int i; - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); + /* Pack tile map into register format. */ + for (i = 0; i < 32; i++) + data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5); - /* and the second... */ + /* Magic. */ shift = 0; ntpcv = gr->tpc_total; while (!(ntpcv & (1 << 4))) { @@ -1197,40 +1158,214 @@ gf100_grctx_generate_r418bb8(struct gf100_gr *gr) } void -gf100_grctx_generate_r406800(struct gf100_gr *gr) +gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u64 tpc_mask = 0, tpc_set = 0; - u8 tpcnr[GPC_MAX]; - int gpc, tpc; - int i, a, b; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8); - - for (i = 0, gpc = -1, b = -1; i < 32; i++) { - a = (i * (gr->tpc_total - 1)) / 32; - if (a != b) { - b = a; - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - tpc_set |= 1ULL << ((gpc * 8) + tpc); + u32 fbps = nvkm_rd32(device, 0x121c74); + if (fbps == 1) + nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000); +} + +static const u32 +gf100_grctx_alpha_beta_map[17][32] = { + [1] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + [2] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + //XXX: 3 + [4] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + }, + //XXX: 5 + //XXX: 6 + [7] = { + 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, + }, + [8] = { + 1, 1, 1, + 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, + 7, 7, 7, + }, + //XXX: 9 + //XXX: 10 + [11] = { + 1, 1, + 2, 2, 2, 2, + 3, 3, 3, + 4, 4, 4, 4, + 5, 5, 5, + 6, 6, 6, + 7, 7, 7, 7, + 8, 8, 8, + 9, 9, 9, 9, + 10, 10, + }, + //XXX: 12 + //XXX: 13 + [14] = { + 1, 1, + 2, 2, + 3, 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + }, + [15] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, 9, + 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + 14, 14, + }, + [16] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, + 12, 12, + 13, 13, + 14, 14, + 15, 15, + }, +}; + +void +gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + int i, gpc; + + for (i = 0; i < 32; i++) { + u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i]; + u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0; + + if (!atarget) { + nvkm_warn(subdev, "missing alpha/beta mapping table\n"); + atarget = max_t(u32, gr->tpc_total * i / 32, 1); + } + + while (atarget) { + for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) { + if (abits[gpc] < gr->tpc_nr[gpc]) { + abits[gpc]++; + atarget--; + } + } } - nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask)); - if (gr->gpc_nr > 4) { - nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask)); + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + u32 bbits = gr->tpc_nr[gpc] - abits[gpc]; + amask |= ((1 << abits[gpc]) - 1) << (gpc * 8); + bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8); } + + nvkm_wr32(device, 0x406800 + (i * 0x20), amask); + nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask); } } void +gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); +} + +void +gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); +} + +void +gf100_grctx_generate_floorsweep(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const struct gf100_grctx_func *func = gr->func->grctx; + int gpc, sm, i, j; + u32 data; + + for (sm = 0; sm < gr->sm_nr; sm++) { + func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm); + if (func->tpc_nr) + func->tpc_nr(gr, gr->sm[sm].gpc); + } + + for (gpc = 0, i = 0; i < 4; i++) { + for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++) + data |= gr->tpc_nr[gpc] << (j * 4); + nvkm_wr32(device, 0x406028 + (i * 4), data); + nvkm_wr32(device, 0x405870 + (i * 4), data); + } + + if (func->r4060a8) + func->r4060a8(gr); + + func->rop_mapping(gr); + + if (func->alpha_beta_tables) + func->alpha_beta_tables(gr); + if (func->max_ways_evict) + func->max_ways_evict(gr); + if (func->dist_skip_table) + func->dist_skip_table(gr); + if (func->r406500) + func->r406500(gr); + if (func->gpc_tpc_nr) + func->gpc_tpc_nr(gr); + if (func->r419f78) + func->r419f78(gr); + if (func->tpc_mask) + func->tpc_mask(gr); + if (func->smid_config) + func->smid_config(gr); +} + +void gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -1239,29 +1374,63 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mc_unk260(device, 0); - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); + if (!gr->fuc_sw_ctx) { + gf100_gr_mmio(gr, grctx->hub); + gf100_gr_mmio(gr, grctx->gpc_0); + gf100_gr_mmio(gr, grctx->zcull); + gf100_gr_mmio(gr, grctx->gpc_1); + gf100_gr_mmio(gr, grctx->tpc); + gf100_gr_mmio(gr, grctx->ppc); + } else { + gf100_gr_mmio(gr, gr->fuc_sw_ctx); + } + + gf100_gr_wait_idle(gr); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->bundle(info); grctx->pagepool(info); + grctx->bundle(info); grctx->attrib(info); + if (grctx->patch_ltc) + grctx->patch_ltc(info); grctx->unkn(gr); - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gf100_grctx_generate_r4060a8(gr); - gf100_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); + gf100_grctx_generate_floorsweep(gr); + + gf100_gr_wait_idle(gr); + + if (grctx->r400088) grctx->r400088(gr, false); + if (gr->fuc_bundle) + gf100_gr_icmd(gr, gr->fuc_bundle); + else + gf100_gr_icmd(gr, grctx->icmd); + if (grctx->sw_veid_bundle_init) + gf100_gr_icmd(gr, grctx->sw_veid_bundle_init); + if (grctx->r400088) grctx->r400088(gr, true); - gf100_gr_icmd(gr, grctx->icmd); nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); + + if (gr->fuc_method) + gf100_gr_mthd(gr, gr->fuc_method); + else + gf100_gr_mthd(gr, grctx->mthd); nvkm_mc_unk260(device, 1); + + if (grctx->r419cb8) + grctx->r419cb8(gr); + if (grctx->r418800) + grctx->r418800(gr); + if (grctx->r419eb0) + grctx->r419eb0(gr); + if (grctx->r419e00) + grctx->r419e00(gr); + if (grctx->r418e94) + grctx->r418e94(gr); + if (grctx->r419a3c) + grctx->r419a3c(gr); + if (grctx->r408840) + grctx->r408840(gr); } #define CB_RESERVED 0x80000 @@ -1280,6 +1449,32 @@ gf100_grctx_generate(struct gf100_gr *gr) int ret, i; u64 addr; + /* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */ + nvkm_wr32(device, 0x404170, 0x00000012); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x404170) & 0x00000010)) + break; + ); + + if (grctx->unkn88c) + grctx->unkn88c(gr, true); + + /* Reset FECS. */ + nvkm_wr32(device, 0x409614, 0x00000070); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_mask(device, 0x409614, 0x00000700, 0x00000700); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_rd32(device, 0x409614); + + if (grctx->unkn88c) + grctx->unkn88c(gr, false); + + /* NV_PGRAPH_FE_PWR_MODE_AUTO. */ + nvkm_wr32(device, 0x404170, 0x00000010); + + /* Init SCC RAM. */ + nvkm_wr32(device, 0x40802c, 0x00000001); + /* Allocate memory to for a "channel", which we'll use to generate * the default context values. */ @@ -1392,7 +1587,8 @@ gf100_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf100_grctx_pack_gpc, + .gpc_0 = gf100_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf100_grctx_pack_tpc, .icmd = gf100_grctx_pack_icmd, @@ -1404,4 +1600,11 @@ gf100_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; |