diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-06 08:16:33 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-06 08:16:33 -0700 |
commit | 135c5504a600ff9b06e321694fbcac78a9530cd4 (patch) | |
tree | 8d22ed739b0e85954010a964a9aeadf3c692c977 /drivers/gpu/drm/vc4 | |
parent | af6c5d5e01ad9f2c9ca38cccaae6b5d67ddd241f (diff) | |
parent | 568cf2e6aa0c762f14d2d0d481a006f93c63ab7a (diff) |
Merge tag 'drm-next-2018-06-06-1' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"This starts to support NVIDIA volta hardware with nouveau, and adds
amdgpu support for the GPU in the Kabylake-G (the intel + radeon
single package chip), along with some initial Intel icelake enabling.
Summary:
New Drivers:
- v3d - driver for broadcom V3D V3.x+ hardware
- xen-front - XEN PV display frontend
core:
- handle zpos normalization in the core
- stop looking at legacy pointers in atomic paths
- improved scheduler documentation
- improved aspect ratio validation
- aspect ratio support for 64:27 and 256:135
- drop unused control node code.
i915:
- Icelake (ICL) enabling
- GuC/HuC refactoring
- PSR/PSR2 enabling and fixes
- DPLL management refactoring
- DP MST fixes
- NV12 enabling
- HDCP improvements
- GEM/Execlist/reset improvements
- GVT improvements
- stolen memory first 4k fix
amdgpu:
- Vega 20 support
- VEGAM support (Kabylake-G)
- preOS scanout buffer reservation
- power management gfxoff support for raven
- SR-IOV fixes
- Vega10 power profiles and clock voltage control
- scatter/gather display support on CZ/ST
amdkfd:
- GFX9 dGPU support
- userptr memory mapping
nouveau:
- major refactoring for Volta GV100 support
tda998x:
- HDMI i2c CEC support
etnaviv:
- removed unused logging code
- license text cleanups
- MMU handling improvements
- timeout fence fix for 50 days uptime
tegra:
- IOMMU support in gr2d/gr3d drivers
- zpos support
vc4:
- syncobj support
- CTM, plane alpha and async cursor support
analogix_dp:
- HPD and aux chan fixes
sun4i:
- MIPI DSI support
tilcdc:
- clock divider fixes for OMAP-l138 LCDK board
rcar-du:
- R8A77965 support
- dma-buf fences fixes
- hardware indexed crtc/du group handling
- generic zplane property support
atmel-hclcdc:
- generic zplane property support
mediatek:
- use generic video mode function
exynos:
- S5PV210 FIMD variant support
- IPP v2 framework
- more HW overlays support"
* tag 'drm-next-2018-06-06-1' of git://anongit.freedesktop.org/drm/drm: (1286 commits)
drm/amdgpu: fix 32-bit build warning
drm/exynos: fimc: signedness bug in fimc_setup_clocks()
drm/exynos: scaler: fix static checker warning
drm/amdgpu: Use dev_info() to report amdkfd is not supported for this ASIC
drm/amd/display: Remove use of division operator for long longs
drm/amdgpu: Update GFX info structure to match what vega20 used
drm/amdgpu/pp: remove duplicate assignment
drm/sched: add rcu_barrier after entity fini
drm/amdgpu: move VM BOs on LRU again
drm/amdgpu: consistenly use VM moved flag
drm/amdgpu: kmap PDs/PTs in amdgpu_vm_update_directories
drm/amdgpu: further optimize amdgpu_vm_handle_moved
drm/amdgpu: cleanup amdgpu_vm_validate_pt_bos v2
drm/amdgpu: rework VM state machine lock handling v2
drm/amdgpu: Add runtime VCN PG support
drm/amdgpu: Enable VCN static PG by default on RV
drm/amdgpu: Add VCN static PG support on RV
drm/amdgpu: Enable VCN CG by default on RV
drm/amdgpu: Add static CG control for VCN on RV
drm/exynos: Fix default value for zpos plane property
...
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_crtc.c | 75 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_dsi.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 57 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hvs.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_kms.c | 224 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 152 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 97 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 3 |
10 files changed, 530 insertions, 134 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index c61dff594195..c8650bbcbcb3 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -42,51 +42,18 @@ #include "vc4_drv.h" #include "vc4_regs.h" -struct vc4_crtc { - struct drm_crtc base; - const struct vc4_crtc_data *data; - void __iomem *regs; - - /* Timestamp at start of vblank irq - unaffected by lock delays. */ - ktime_t t_vblank; - - /* Which HVS channel we're using for our CRTC. */ - int channel; - - u8 lut_r[256]; - u8 lut_g[256]; - u8 lut_b[256]; - /* Size in pixels of the COB memory allocated to this CRTC. */ - u32 cob_size; - - struct drm_pending_vblank_event *event; -}; - struct vc4_crtc_state { struct drm_crtc_state base; /* Dlist area for this CRTC configuration. */ struct drm_mm_node mm; }; -static inline struct vc4_crtc * -to_vc4_crtc(struct drm_crtc *crtc) -{ - return (struct vc4_crtc *)crtc; -} - static inline struct vc4_crtc_state * to_vc4_crtc_state(struct drm_crtc_state *crtc_state) { return (struct vc4_crtc_state *)crtc_state; } -struct vc4_crtc_data { - /* Which channel of the HVS this pixelvalve sources from. */ - int hvs_channel; - - enum vc4_encoder_type encoder_types[4]; -}; - #define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset)) #define CRTC_READ(offset) readl(vc4_crtc->regs + (offset)) @@ -298,23 +265,21 @@ vc4_crtc_lut_load(struct drm_crtc *crtc) HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]); } -static int -vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, - uint32_t size, - struct drm_modeset_acquire_ctx *ctx) +static void +vc4_crtc_update_gamma_lut(struct drm_crtc *crtc) { struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct drm_color_lut *lut = crtc->state->gamma_lut->data; + u32 length = drm_color_lut_size(crtc->state->gamma_lut); u32 i; - for (i = 0; i < size; i++) { - vc4_crtc->lut_r[i] = r[i] >> 8; - vc4_crtc->lut_g[i] = g[i] >> 8; - vc4_crtc->lut_b[i] = b[i] >> 8; + for (i = 0; i < length; i++) { + vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8); + vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8); + vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8); } vc4_crtc_lut_load(crtc); - - return 0; } static u32 vc4_get_fifo_full_level(u32 format) @@ -699,6 +664,22 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, if (crtc->state->active && old_state->active) vc4_crtc_update_dlist(crtc); + if (crtc->state->color_mgmt_changed) { + u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel)); + + if (crtc->state->gamma_lut) { + vc4_crtc_update_gamma_lut(crtc); + dispbkgndx |= SCALER_DISPBKGND_GAMMA; + } else { + /* Unsetting DISPBKGND_GAMMA skips the gamma lut step + * in hardware, which is the same as a linear lut that + * DRM expects us to use in absence of a user lut. + */ + dispbkgndx &= ~SCALER_DISPBKGND_GAMMA; + } + HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), dispbkgndx); + } + if (debug_dump_regs) { DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); vc4_hvs_dump_state(dev); @@ -953,7 +934,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .reset = vc4_crtc_reset, .atomic_duplicate_state = vc4_crtc_duplicate_state, .atomic_destroy_state = vc4_crtc_destroy_state, - .gamma_set = vc4_crtc_gamma_set, + .gamma_set = drm_atomic_helper_legacy_gamma_set, .enable_vblank = vc4_enable_vblank, .disable_vblank = vc4_disable_vblank, }; @@ -1079,6 +1060,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) primary_plane->crtc = crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size); + + /* We support CTM, but only for one CRTC at a time. It's therefore + * implemented as private driver state in vc4_kms, not here. + */ + drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size); /* Set up some arbitrary number of planes. We're not limited * by a set number of physical registers, just the space in diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 7c95ed5c5cac..466d0a27b415 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -176,7 +176,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_RENDER | - DRIVER_PRIME), + DRIVER_PRIME | + DRIVER_SYNCOBJ), .lastclose = drm_fb_helper_lastclose, .open = vc4_open, .postclose = vc4_close, @@ -319,8 +320,8 @@ dev_unref: static void vc4_drm_unbind(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct drm_device *drm = platform_get_drvdata(pdev); + struct drm_device *drm = dev_get_drvdata(dev); + struct vc4_dev *vc4 = to_vc4_dev(drm); drm_dev_unregister(drm); @@ -328,6 +329,8 @@ static void vc4_drm_unbind(struct device *dev) drm_mode_config_cleanup(drm); + drm_atomic_private_obj_fini(&vc4->ctm_manager); + drm_dev_unref(drm); } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 1b4cd1fabf56..554a4e810d5b 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -10,6 +10,8 @@ #include <drm/drmP.h> #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_atomic.h> +#include <drm/drm_syncobj.h> #include "uapi/drm/vc4_drm.h" @@ -193,6 +195,9 @@ struct vc4_dev { } hangcheck; struct semaphore async_modeset; + + struct drm_modeset_lock ctm_state_lock; + struct drm_private_obj ctm_manager; }; static inline struct vc4_dev * @@ -392,6 +397,39 @@ to_vc4_encoder(struct drm_encoder *encoder) return container_of(encoder, struct vc4_encoder, base); } +struct vc4_crtc_data { + /* Which channel of the HVS this pixelvalve sources from. */ + int hvs_channel; + + enum vc4_encoder_type encoder_types[4]; +}; + +struct vc4_crtc { + struct drm_crtc base; + const struct vc4_crtc_data *data; + void __iomem *regs; + + /* Timestamp at start of vblank irq - unaffected by lock delays. */ + ktime_t t_vblank; + + /* Which HVS channel we're using for our CRTC. */ + int channel; + + u8 lut_r[256]; + u8 lut_g[256]; + u8 lut_b[256]; + /* Size in pixels of the COB memory allocated to this CRTC. */ + u32 cob_size; + + struct drm_pending_vblank_event *event; +}; + +static inline struct vc4_crtc * +to_vc4_crtc(struct drm_crtc *crtc) +{ + return (struct vc4_crtc *)crtc; +} + #define V3D_READ(offset) readl(vc4->v3d->regs + offset) #define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset) #define HVS_READ(offset) readl(vc4->hvs->regs + offset) diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 94085f8bcd68..8aa897835118 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -753,6 +753,11 @@ static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps) (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) | (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0)); int ret; + bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) & + DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS)); + + if (ulps == ulps_currently_enabled) + return; DSI_PORT_WRITE(STAT, stat_ulps); DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0daf8ef..7910b9acedd6 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/sched/signal.h> +#include <linux/dma-fence-array.h> #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -655,7 +656,8 @@ retry: */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -678,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base; + if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno); vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1113,8 +1118,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0; if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | @@ -1126,7 +1133,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); + DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2); return -EINVAL; } @@ -1164,6 +1171,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } } + if (args->in_sync) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) @@ -1181,12 +1211,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->out_sync) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail; diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 2b62fc5b8d85..5d8c749c9749 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -58,6 +58,10 @@ static const struct { HVS_REG(SCALER_DISPSTAT2), HVS_REG(SCALER_DISPBASE2), HVS_REG(SCALER_DISPALPHA2), + HVS_REG(SCALER_OLEDOFFS), + HVS_REG(SCALER_OLEDCOEF0), + HVS_REG(SCALER_OLEDCOEF1), + HVS_REG(SCALER_OLEDCOEF2), }; void vc4_hvs_dump_state(struct drm_device *dev) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index ba60153dddb5..8a411e5f8776 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -23,6 +23,117 @@ #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include "vc4_drv.h" +#include "vc4_regs.h" + +struct vc4_ctm_state { + struct drm_private_state base; + struct drm_color_ctm *ctm; + int fifo; +}; + +static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv) +{ + return container_of(priv, struct vc4_ctm_state, base); +} + +static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state, + struct drm_private_obj *manager) +{ + struct drm_device *dev = state->dev; + struct vc4_dev *vc4 = dev->dev_private; + struct drm_private_state *priv_state; + int ret; + + ret = drm_modeset_lock(&vc4->ctm_state_lock, state->acquire_ctx); + if (ret) + return ERR_PTR(ret); + + priv_state = drm_atomic_get_private_obj_state(state, manager); + if (IS_ERR(priv_state)) + return ERR_CAST(priv_state); + + return to_vc4_ctm_state(priv_state); +} + +static struct drm_private_state * +vc4_ctm_duplicate_state(struct drm_private_obj *obj) +{ + struct vc4_ctm_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); + + return &state->base; +} + +static void vc4_ctm_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(state); + + kfree(ctm_state); +} + +static const struct drm_private_state_funcs vc4_ctm_state_funcs = { + .atomic_duplicate_state = vc4_ctm_duplicate_state, + .atomic_destroy_state = vc4_ctm_destroy_state, +}; + +/* Converts a DRM S31.32 value to the HW S0.9 format. */ +static u16 vc4_ctm_s31_32_to_s0_9(u64 in) +{ + u16 r; + + /* Sign bit. */ + r = in & BIT_ULL(63) ? BIT(9) : 0; + + if ((in & GENMASK_ULL(62, 32)) > 0) { + /* We have zero integer bits so we can only saturate here. */ + r |= GENMASK(8, 0); + } else { + /* Otherwise take the 9 most important fractional bits. */ + r |= (in >> 23) & GENMASK(8, 0); + } + + return r; +} + +static void +vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state) +{ + struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(vc4->ctm_manager.state); + struct drm_color_ctm *ctm = ctm_state->ctm; + + if (ctm_state->fifo) { + HVS_WRITE(SCALER_OLEDCOEF2, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[0]), + SCALER_OLEDCOEF2_R_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[3]), + SCALER_OLEDCOEF2_R_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[6]), + SCALER_OLEDCOEF2_R_TO_B)); + HVS_WRITE(SCALER_OLEDCOEF1, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[1]), + SCALER_OLEDCOEF1_G_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[4]), + SCALER_OLEDCOEF1_G_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[7]), + SCALER_OLEDCOEF1_G_TO_B)); + HVS_WRITE(SCALER_OLEDCOEF0, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[2]), + SCALER_OLEDCOEF0_B_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[5]), + SCALER_OLEDCOEF0_B_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[8]), + SCALER_OLEDCOEF0_B_TO_B)); + } + + HVS_WRITE(SCALER_OLEDOFFS, + VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO)); +} static void vc4_atomic_complete_commit(struct drm_atomic_state *state) @@ -36,6 +147,8 @@ vc4_atomic_complete_commit(struct drm_atomic_state *state) drm_atomic_helper_commit_modeset_disables(dev, state); + vc4_ctm_commit(vc4, state); + drm_atomic_helper_commit_planes(dev, state, 0); drm_atomic_helper_commit_modeset_enables(dev, state); @@ -90,6 +203,26 @@ static int vc4_atomic_commit(struct drm_device *dev, struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + if (state->async_update) { + ret = down_interruptible(&vc4->async_modeset); + if (ret) + return ret; + + ret = drm_atomic_helper_prepare_planes(dev, state); + if (ret) { + up(&vc4->async_modeset); + return ret; + } + + drm_atomic_helper_async_commit(dev, state); + + drm_atomic_helper_cleanup_planes(dev, state); + + up(&vc4->async_modeset); + + return 0; + } + ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -187,9 +320,89 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, return drm_gem_fb_create(dev, file_priv, mode_cmd); } +/* Our CTM has some peculiar limitations: we can only enable it for one CRTC + * at a time and the HW only supports S0.9 scalars. To account for the latter, + * we don't allow userland to set a CTM that we have no hope of approximating. + */ +static int +vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_ctm_state *ctm_state = NULL; + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_color_ctm *ctm; + int i; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + /* CTM is being disabled. */ + if (!new_crtc_state->ctm && old_crtc_state->ctm) { + ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager); + if (IS_ERR(ctm_state)) + return PTR_ERR(ctm_state); + ctm_state->fifo = 0; + } + } + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (new_crtc_state->ctm == old_crtc_state->ctm) + continue; + + if (!ctm_state) { + ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager); + if (IS_ERR(ctm_state)) + return PTR_ERR(ctm_state); + } + + /* CTM is being enabled or the matrix changed. */ + if (new_crtc_state->ctm) { + /* fifo is 1-based since 0 disables CTM. */ + int fifo = to_vc4_crtc(crtc)->channel + 1; + + /* Check userland isn't trying to turn on CTM for more + * than one CRTC at a time. + */ + if (ctm_state->fifo && ctm_state->fifo != fifo) { + DRM_DEBUG_DRIVER("Too many CTM configured\n"); + return -EINVAL; + } + + /* Check we can approximate the specified CTM. + * We disallow scalars |c| > 1.0 since the HW has + * no integer bits. + */ + ctm = new_crtc_state->ctm->data; + for (i = 0; i < ARRAY_SIZE(ctm->matrix); i++) { + u64 val = ctm->matrix[i]; + + val &= ~BIT_ULL(63); + if (val > BIT_ULL(32)) + return -EINVAL; + } + + ctm_state->fifo = fifo; + ctm_state->ctm = ctm; + } + } + + return 0; +} + +static int +vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + int ret; + + ret = vc4_ctm_atomic_check(dev, state); + if (ret < 0) + return ret; + + return drm_atomic_helper_check(dev, state); +} + static const struct drm_mode_config_funcs vc4_mode_funcs = { .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = vc4_atomic_check, .atomic_commit = vc4_atomic_commit, .fb_create = vc4_fb_create, }; @@ -197,6 +410,7 @@ static const struct drm_mode_config_funcs vc4_mode_funcs = { int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_ctm_state *ctm_state; int ret; sema_init(&vc4->async_modeset, 1); @@ -217,6 +431,14 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.async_page_flip = true; dev->mode_config.allow_fb_modifiers = true; + drm_modeset_lock_init(&vc4->ctm_state_lock); + + ctm_state = kzalloc(sizeof(*ctm_state), GFP_KERNEL); + if (!ctm_state) + return -ENOMEM; + drm_atomic_private_obj_init(&vc4->ctm_manager, &ctm_state->base, + &vc4_ctm_state_funcs); + drm_mode_config_reset(dev); if (dev->mode_config.num_connector) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 13dcaad06798..71d44c357d35 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -201,6 +201,7 @@ static void vc4_plane_reset(struct drm_plane *plane) return; plane->state = &vc4_state->base; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; vc4_state->base.plane = plane; } @@ -467,6 +468,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); int num_planes = drm_format_num_planes(format->drm); + bool mix_plane_alpha; bool covers_screen; u32 scl0, scl1, pitch0; u32 lbm_size, tiling; @@ -552,7 +554,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | + VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); @@ -565,6 +567,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS1_SCL_HEIGHT)); } + /* Don't waste cycles mixing with plane alpha if the set alpha + * is opaque or there is no per-pixel alpha information. + * In any case we use the alpha property value as the fixed alpha. + */ + mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && + fb->format->has_alpha; + /* Position Word 2: Source Image Size, Alpha */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, @@ -572,6 +581,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | + (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) | VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); @@ -653,10 +663,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->crtc_w == state->crtc->mode.hdisplay && vc4_state->crtc_h == state->crtc->mode.vdisplay; /* Background fill might be necessary when the plane has per-pixel - * alpha content and blends from the background or does not cover - * the entire screen. + * alpha content or a non-opaque plane alpha and could blend from the + * background or does not cover the entire screen. */ - vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen; + vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || + state->alpha != DRM_BLEND_ALPHA_OPAQUE; return 0; } @@ -741,6 +752,57 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) vc4_state->dlist[vc4_state->ptr0_offset] = addr; } +static void vc4_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); + + if (plane->state->fb != state->fb) { + vc4_plane_async_set_fb(plane, state->fb); + drm_atomic_set_fb_for_plane(plane->state, state->fb); + } + + /* Set the cursor's position on the screen. This is the + * expected change from the drm_mode_cursor_universal() + * helper. + */ + plane->state->crtc_x = state->crtc_x; + plane->state->crtc_y = state->crtc_y; + + /* Allow changing the start position within the cursor BO, if + * that matters. + */ + plane->state->src_x = state->src_x; + plane->state->src_y = state->src_y; + + /* Update the display list based on the new crtc_x/y. */ + vc4_plane_atomic_check(plane, plane->state); + + /* Note that we can't just call vc4_plane_write_dlist() + * because that would smash the context data that the HVS is + * currently using. + */ + writel(vc4_state->dlist[vc4_state->pos0_offset], + &vc4_state->hw_dlist[vc4_state->pos0_offset]); + writel(vc4_state->dlist[vc4_state->pos2_offset], + &vc4_state->hw_dlist[vc4_state->pos2_offset]); + writel(vc4_state->dlist[vc4_state->ptr0_offset], + &vc4_state->hw_dlist[vc4_state->ptr0_offset]); +} + +static int vc4_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + /* No configuring new scaling in the fast path. */ + if (plane->state->crtc_w != state->crtc_w || + plane->state->crtc_h != state->crtc_h || + plane->state->src_w != state->src_w || + plane->state->src_h != state->src_h) + return -EINVAL; + + return 0; +} + static int vc4_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { @@ -780,6 +842,8 @@ static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_update = vc4_plane_atomic_update, .prepare_fb = vc4_prepare_fb, .cleanup_fb = vc4_cleanup_fb, + .atomic_async_check = vc4_plane_atomic_async_check, + .atomic_async_update = vc4_plane_atomic_async_update, }; static void vc4_plane_destroy(struct drm_plane *plane) @@ -788,82 +852,6 @@ static void vc4_plane_destroy(struct drm_plane *plane) drm_plane_cleanup(plane); } -/* Implements immediate (non-vblank-synced) updates of the cursor - * position, or falls back to the atomic helper otherwise. - */ -static int -vc4_update_plane(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct drm_plane_state *plane_state; - struct vc4_plane_state *vc4_state; - - if (plane != crtc->cursor) - goto out; - - plane_state = plane->state; - vc4_state = to_vc4_plane_state(plane_state); - - if (!plane_state) - goto out; - - /* No configuring new scaling in the fast path. */ - if (crtc_w != plane_state->crtc_w || - crtc_h != plane_state->crtc_h || - src_w != plane_state->src_w || - src_h != plane_state->src_h) { - goto out; - } - - if (fb != plane_state->fb) { - drm_atomic_set_fb_for_plane(plane->state, fb); - vc4_plane_async_set_fb(plane, fb); - } - - /* Set the cursor's position on the screen. This is the - * expected change from the drm_mode_cursor_universal() - * helper. - */ - plane_state->crtc_x = crtc_x; - plane_state->crtc_y = crtc_y; - - /* Allow changing the start position within the cursor BO, if - * that matters. - */ - plane_state->src_x = src_x; - plane_state->src_y = src_y; - - /* Update the display list based on the new crtc_x/y. */ - vc4_plane_atomic_check(plane, plane_state); - - /* Note that we can't just call vc4_plane_write_dlist() - * because that would smash the context data that the HVS is - * currently using. - */ - writel(vc4_state->dlist[vc4_state->pos0_offset], - &vc4_state->hw_dlist[vc4_state->pos0_offset]); - writel(vc4_state->dlist[vc4_state->pos2_offset], - &vc4_state->hw_dlist[vc4_state->pos2_offset]); - writel(vc4_state->dlist[vc4_state->ptr0_offset], - &vc4_state->hw_dlist[vc4_state->ptr0_offset]); - - return 0; - -out: - return drm_atomic_helper_update_plane(plane, crtc, fb, - crtc_x, crtc_y, - crtc_w, crtc_h, - src_x, src_y, - src_w, src_h, - ctx); -} - static bool vc4_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -891,7 +879,7 @@ static bool vc4_format_mod_supported(struct drm_plane *plane, } static const struct drm_plane_funcs vc4_plane_funcs = { - .update_plane = vc4_update_plane, + .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = vc4_plane_destroy, .set_property = NULL, @@ -939,5 +927,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, drm_plane_helper_add(plane, &vc4_plane_helper_funcs); + drm_plane_create_alpha_property(plane); + return plane; } diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index a141496104a6..d1fb6fec46eb 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -330,6 +330,21 @@ #define SCALER_DISPCTRL0 0x00000040 # define SCALER_DISPCTRLX_ENABLE BIT(31) # define SCALER_DISPCTRLX_RESET BIT(30) +/* Generates a single frame when VSTART is seen and stops at the last + * pixel read from the FIFO. + */ +# define SCALER_DISPCTRLX_ONESHOT BIT(29) +/* Processes a single context in the dlist and then task switch, + * instead of an entire line. + */ +# define SCALER_DISPCTRLX_ONECTX BIT(28) +/* Set to have DISPSLAVE return 2 16bpp pixels and no status data. */ +# define SCALER_DISPCTRLX_FIFO32 BIT(27) +/* Turns on output to the DISPSLAVE register instead of the normal + * FIFO. + */ +# define SCALER_DISPCTRLX_FIFOREG BIT(26) + # define SCALER_DISPCTRLX_WIDTH_MASK VC4_MASK(23, 12) # define SCALER_DISPCTRLX_WIDTH_SHIFT 12 # define SCALER_DISPCTRLX_HEIGHT_MASK VC4_MASK(11, 0) @@ -402,6 +417,68 @@ */ # define SCALER_GAMADDR_SRAMENB BIT(30) +#define SCALER_OLEDOFFS 0x00000080 +/* Clamps R to [16,235] and G/B to [16,240]. */ +# define SCALER_OLEDOFFS_YUVCLAMP BIT(31) + +/* Chooses which display FIFO the matrix applies to. */ +# define SCALER_OLEDOFFS_DISPFIFO_MASK VC4_MASK(25, 24) +# define SCALER_OLEDOFFS_DISPFIFO_SHIFT 24 +# define SCALER_OLEDOFFS_DISPFIFO_DISABLED 0 +# define SCALER_OLEDOFFS_DISPFIFO_0 1 +# define SCALER_OLEDOFFS_DISPFIFO_1 2 +# define SCALER_OLEDOFFS_DISPFIFO_2 3 + +/* Offsets are 8-bit 2s-complement. */ +# define SCALER_OLEDOFFS_RED_MASK VC4_MASK(23, 16) +# define SCALER_OLEDOFFS_RED_SHIFT 16 +# define SCALER_OLEDOFFS_GREEN_MASK VC4_MASK(15, 8) +# define SCALER_OLEDOFFS_GREEN_SHIFT 8 +# define SCALER_OLEDOFFS_BLUE_MASK VC4_MASK(7, 0) +# define SCALER_OLEDOFFS_BLUE_SHIFT 0 + +/* The coefficients are S0.9 fractions. */ +#define SCALER_OLEDCOEF0 0x00000084 +# define SCALER_OLEDCOEF0_B_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF0_B_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF0_B_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF0_B_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF0_B_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF0_B_TO_B_SHIFT 0 + +#define SCALER_OLEDCOEF1 0x00000088 +# define SCALER_OLEDCOEF1_G_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF1_G_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF1_G_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF1_G_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF1_G_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF1_G_TO_B_SHIFT 0 + +#define SCALER_OLEDCOEF2 0x0000008c +# define SCALER_OLEDCOEF2_R_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF2_R_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF2_R_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF2_R_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF2_R_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF2_R_TO_B_SHIFT 0 + +/* Slave addresses for DMAing from HVS composition output to other + * devices. The top bits are valid only in !FIFO32 mode. + */ +#define SCALER_DISPSLAVE0 0x000000c0 +#define SCALER_DISPSLAVE1 0x000000c9 +#define SCALER_DISPSLAVE2 0x000000d0 +# define SCALER_DISPSLAVE_ISSUE_VSTART BIT(31) +# define SCALER_DISPSLAVE_ISSUE_HSTART BIT(30) +/* Set when the current line has been read and an HSTART is required. */ +# define SCALER_DISPSLAVE_EOL BIT(26) +/* Set when the display FIFO is empty. */ +# define SCALER_DISPSLAVE_EMPTY BIT(25) +/* Set when there is RGB data ready to read. */ +# define SCALER_DISPSLAVE_VALID BIT(24) +# define SCALER_DISPSLAVE_RGB_MASK VC4_MASK(23, 0) +# define SCALER_DISPSLAVE_RGB_SHIFT 0 + #define SCALER_GAMDATA 0x000000e0 #define SCALER_DLIST_START 0x00002000 #define SCALER_DLIST_SIZE 0x00004000 @@ -767,6 +844,10 @@ enum hvs_pixel_format { HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9, HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10, HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11, + HVS_PIXEL_FORMAT_H264 = 12, + HVS_PIXEL_FORMAT_PALETTE = 13, + HVS_PIXEL_FORMAT_YUV444_RGB = 14, + HVS_PIXEL_FORMAT_AYUV444_RGB = 15, }; /* Note: the LSB is the rightmost character shown. Only valid for @@ -800,12 +881,27 @@ enum hvs_pixel_format { #define SCALER_CTL0_TILING_128B 2 #define SCALER_CTL0_TILING_256B_OR_T 3 +#define SCALER_CTL0_ALPHA_MASK BIT(19) #define SCALER_CTL0_HFLIP BIT(16) #define SCALER_CTL0_VFLIP BIT(15) +#define SCALER_CTL0_KEY_MODE_MASK VC4_MASK(18, 17) +#define SCALER_CTL0_KEY_MODE_SHIFT 17 +#define SCALER_CTL0_KEY_DISABLED 0 +#define SCALER_CTL0_KEY_LUMA_OR_COMMON_RGB 1 +#define SCALER_CTL0_KEY_MATCH 2 /* turn transparent */ +#define SCALER_CTL0_KEY_REPLACE 3 /* replace with value from key mask word 2 */ + #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13) #define SCALER_CTL0_ORDER_SHIFT 13 +#define SCALER_CTL0_RGBA_EXPAND_MASK VC4_MASK(12, 11) +#define SCALER_CTL0_RGBA_EXPAND_SHIFT 11 +#define SCALER_CTL0_RGBA_EXPAND_ZERO 0 +#define SCALER_CTL0_RGBA_EXPAND_LSB 1 +#define SCALER_CTL0_RGBA_EXPAND_MSB 2 +#define SCALER_CTL0_RGBA_EXPAND_ROUND 3 + #define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8) #define SCALER_CTL0_SCL1_SHIFT 8 @@ -849,6 +945,7 @@ enum hvs_pixel_format { #define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO 2 #define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07 3 #define SCALER_POS2_ALPHA_PREMULT BIT(29) +#define SCALER_POS2_ALPHA_MIX BIT(28) #define SCALER_POS2_HEIGHT_MASK VC4_MASK(27, 16) #define SCALER_POS2_HEIGHT_SHIFT 16 diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index bfc2fa73d2ae..e47e29426078 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -218,8 +218,7 @@ try_again: * overall CMA pool before they make scenes complicated enough to run * out of bin space. */ -int -vc4_allocate_bin_bo(struct drm_device *drm) +static int vc4_allocate_bin_bo(struct drm_device *drm) { struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = vc4->v3d; |