diff options
author | Dave Airlie <airlied@redhat.com> | 2018-01-12 11:45:18 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-01-12 11:45:18 +1000 |
commit | 323b20c4af6e315e665b0ea85035fea7d738e0cc (patch) | |
tree | c5c59de32f20a0161e4c7d7e0269352b841eb4a2 | |
parent | fb8baefc7b2d7b93ad96abacbe63fa99e3d213d6 (diff) | |
parent | f91c14ab448af4d9d57350301dd9d6b6a7b6128a (diff) |
Merge tag 'drm-msm-next-2018-01-10' of git://people.freedesktop.org/~robclark/linux into drm-next
Updates for 4.16.. fairly small this time around, main thing is
devfreq support for the gpu.
* tag 'drm-msm-next-2018-01-10' of git://people.freedesktop.org/~robclark/linux:
drm/msm: Add devfreq support for the GPU
drm/msm/adreno: a5xx: Explicitly program the CP0 performance counter
drm/msm/adreno: Read the speed bins for a5xx targets
drm/msm/adreno: Move clock parsing to adreno_gpu_init()
drm/msm/adreno: Cleanup chipid parsing
drm/msm/gpu: Remove unused bus scaling code
drm/msm/adreno: Remove a useless call to dev_pm_opp_get_freq()
drm/msm/adreno: Call dev_pm_opp_put()
drm/msm: Fix NULL deref in adreno_load_gpu
drm/msm: gpu: Only sync fences on rings that exist
drm/msm: fix leak in failed get_pages
drm/msm: avoid false-positive -Wmaybe-uninitialized warning
drm/msm/mdp4: Deduplicate bus_find_device() by name matching
drm/msm: add missing MODULE_FIRMWARE declarations
drm/msm: update adreno firmware path in MODULE_FIRMWARE
drm/msm: free kstrdup'd cmdline
drm/msm: fix msm_rd_dump_submit prototype
drm/msm: fix spelling mistake: "ringubffer" -> "ringbuffer"
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_power.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_device.c | 140 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 85 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 112 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.h | 14 |
11 files changed, 264 insertions, 170 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index a1f4eeeb73e2..7e09d44e4a15 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -17,6 +17,8 @@ #include <linux/dma-mapping.h> #include <linux/of_address.h> #include <linux/soc/qcom/mdt_loader.h> +#include <linux/pm_opp.h> +#include <linux/nvmem-consumer.h> #include "msm_gem.h" #include "msm_mmu.h" #include "a5xx_gpu.h" @@ -595,6 +597,12 @@ static int a5xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters */ gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); + /* Select CP0 to always count cycles */ + gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); + + /* Select RBBM0 to countable 6 to get the busy status for devfreq */ + gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); + /* Increase VFD cache access so LRZ and other data gets evicted less */ gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); @@ -1165,6 +1173,14 @@ static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) return a5xx_gpu->cur_ring; } +static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value) +{ + *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, + REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); + + return 0; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -1180,10 +1196,30 @@ static const struct adreno_gpu_funcs funcs = { #ifdef CONFIG_DEBUG_FS .show = a5xx_show, #endif + .gpu_busy = a5xx_gpu_busy, }, .get_timestamp = a5xx_get_timestamp, }; +static void check_speed_bin(struct device *dev) +{ + struct nvmem_cell *cell; + u32 bin, val; + + cell = nvmem_cell_get(dev, "speed_bin"); + + /* If a nvmem cell isn't defined, nothing to do */ + if (IS_ERR(cell)) + return; + + bin = *((u32 *) nvmem_cell_read(cell, NULL)); + nvmem_cell_put(cell); + + val = (1 << bin); + + dev_pm_opp_set_supported_hw(dev, &val, 1); +} + struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; @@ -1210,6 +1246,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; + check_speed_bin(&pdev->dev); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index e5700bbf09dd..4e4d965fd9ab 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -103,10 +103,16 @@ static inline uint32_t _get_mvolts(struct msm_gpu *gpu, uint32_t freq) struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; struct dev_pm_opp *opp; + u32 ret = 0; opp = dev_pm_opp_find_freq_exact(&pdev->dev, freq, true); - return (!IS_ERR(opp)) ? dev_pm_opp_get_voltage(opp) / 1000 : 0; + if (!IS_ERR(opp)) { + ret = dev_pm_opp_get_voltage(opp) / 1000; + dev_pm_opp_put(opp); + } + + return ret; } /* Setup thermal limit management */ diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 05022ea2a007..62bdb7316da1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -17,7 +17,6 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/pm_opp.h> #include "adreno_gpu.h" #define ANY_ID 0xff @@ -90,14 +89,19 @@ static const struct adreno_info gpulist[] = { }, }; -MODULE_FIRMWARE("a300_pm4.fw"); -MODULE_FIRMWARE("a300_pfp.fw"); -MODULE_FIRMWARE("a330_pm4.fw"); -MODULE_FIRMWARE("a330_pfp.fw"); -MODULE_FIRMWARE("a420_pm4.fw"); -MODULE_FIRMWARE("a420_pfp.fw"); -MODULE_FIRMWARE("a530_fm4.fw"); -MODULE_FIRMWARE("a530_pfp.fw"); +MODULE_FIRMWARE("qcom/a300_pm4.fw"); +MODULE_FIRMWARE("qcom/a300_pfp.fw"); +MODULE_FIRMWARE("qcom/a330_pm4.fw"); +MODULE_FIRMWARE("qcom/a330_pfp.fw"); +MODULE_FIRMWARE("qcom/a420_pm4.fw"); +MODULE_FIRMWARE("qcom/a420_pfp.fw"); +MODULE_FIRMWARE("qcom/a530_pm4.fw"); +MODULE_FIRMWARE("qcom/a530_pfp.fw"); +MODULE_FIRMWARE("qcom/a530v3_gpmu.fw2"); +MODULE_FIRMWARE("qcom/a530_zap.mdt"); +MODULE_FIRMWARE("qcom/a530_zap.b00"); +MODULE_FIRMWARE("qcom/a530_zap.b01"); +MODULE_FIRMWARE("qcom/a530_zap.b02"); static inline bool _rev_match(uint8_t entry, uint8_t id) { @@ -125,11 +129,14 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; - struct msm_gpu *gpu = platform_get_drvdata(priv->gpu_pdev); + struct msm_gpu *gpu = NULL; int ret; + if (pdev) + gpu = platform_get_drvdata(pdev); + if (!gpu) { - dev_err(dev->dev, "no adreno device\n"); + dev_err_once(dev->dev, "no GPU device was found\n"); return NULL; } @@ -153,101 +160,45 @@ static void set_gpu_pdev(struct drm_device *dev, priv->gpu_pdev = pdev; } -static int find_chipid(struct device *dev, u32 *chipid) +static int find_chipid(struct device *dev, struct adreno_rev *rev) { struct device_node *node = dev->of_node; const char *compat; int ret; + u32 chipid; /* first search the compat strings for qcom,adreno-XYZ.W: */ ret = of_property_read_string_index(node, "compatible", 0, &compat); if (ret == 0) { - unsigned rev, patch; + unsigned int r, patch; - if (sscanf(compat, "qcom,adreno-%u.%u", &rev, &patch) == 2) { - *chipid = 0; - *chipid |= (rev / 100) << 24; /* core */ - rev %= 100; - *chipid |= (rev / 10) << 16; /* major */ - rev %= 10; - *chipid |= rev << 8; /* minor */ - *chipid |= patch; + if (sscanf(compat, "qcom,adreno-%u.%u", &r, &patch) == 2) { + rev->core = r / 100; + r %= 100; + rev->major = r / 10; + r %= 10; + rev->minor = r; + rev->patchid = patch; return 0; } } /* and if that fails, fall back to legacy "qcom,chipid" property: */ - ret = of_property_read_u32(node, "qcom,chipid", chipid); - if (ret) + ret = of_property_read_u32(node, "qcom,chipid", &chipid); + if (ret) { + dev_err(dev, "could not parse qcom,chipid: %d\n", ret); return ret; - - dev_warn(dev, "Using legacy qcom,chipid binding!\n"); - dev_warn(dev, "Use compatible qcom,adreno-%u%u%u.%u instead.\n", - (*chipid >> 24) & 0xff, (*chipid >> 16) & 0xff, - (*chipid >> 8) & 0xff, *chipid & 0xff); - - return 0; -} - -/* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */ -static int adreno_get_legacy_pwrlevels(struct device *dev) -{ - struct device_node *child, *node; - int ret; - - node = of_find_compatible_node(dev->of_node, NULL, - "qcom,gpu-pwrlevels"); - if (!node) { - dev_err(dev, "Could not find the GPU powerlevels\n"); - return -ENXIO; } - for_each_child_of_node(node, child) { - unsigned int val; - - ret = of_property_read_u32(child, "qcom,gpu-freq", &val); - if (ret) - continue; + rev->core = (chipid >> 24) & 0xff; + rev->major = (chipid >> 16) & 0xff; + rev->minor = (chipid >> 8) & 0xff; + rev->patchid = (chipid & 0xff); - /* - * Skip the intentionally bogus clock value found at the bottom - * of most legacy frequency tables - */ - if (val != 27000000) - dev_pm_opp_add(dev, val, 0); - } - - return 0; -} - -static int adreno_get_pwrlevels(struct device *dev, - struct adreno_platform_config *config) -{ - unsigned long freq = ULONG_MAX; - struct dev_pm_opp *opp; - int ret; - - /* You down with OPP? */ - if (!of_find_property(dev->of_node, "operating-points-v2", NULL)) - ret = adreno_get_legacy_pwrlevels(dev); - else - ret = dev_pm_opp_of_add_table(dev); - - if (ret) - return ret; - - /* Find the fastest defined rate */ - opp = dev_pm_opp_find_freq_floor(dev, &freq); - if (!IS_ERR(opp)) - config->fast_rate = dev_pm_opp_get_freq(opp); - - if (!config->fast_rate) { - DRM_DEV_INFO(dev, - "Could not find clock rate. Using default\n"); - /* Pick a suitably safe clock speed for any target */ - config->fast_rate = 200000000; - } + dev_warn(dev, "Using legacy qcom,chipid binding!\n"); + dev_warn(dev, "Use compatible qcom,adreno-%u%u%u.%u instead.\n", + rev->core, rev->major, rev->minor, rev->patchid); return 0; } @@ -258,22 +209,9 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) const struct adreno_info *info; struct drm_device *drm = dev_get_drvdata(master); struct msm_gpu *gpu; - u32 val; int ret; - ret = find_chipid(dev, &val); - if (ret) { - dev_err(dev, "could not find chipid: %d\n", ret); - return ret; - } - - config.rev = ADRENO_REV((val >> 24) & 0xff, - (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff); - - /* find clock rates: */ - config.fast_rate = 0; - - ret = adreno_get_pwrlevels(dev, &config); + ret = find_chipid(dev, &config.rev); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index e2ffecce59a3..de63ff26a062 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -17,11 +17,11 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/pm_opp.h> #include "adreno_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" - int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -461,10 +461,80 @@ void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { if (spin_until(ring_freewords(ring) >= ndwords)) DRM_DEV_ERROR(ring->gpu->dev->dev, - "timeout waiting for space in ringubffer %d\n", + "timeout waiting for space in ringbuffer %d\n", ring->id); } +/* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */ +static int adreno_get_legacy_pwrlevels(struct device *dev) +{ + struct device_node *child, *node; + int ret; + + node = of_find_compatible_node(dev->of_node, NULL, + "qcom,gpu-pwrlevels"); + if (!node) { + dev_err(dev, "Could not find the GPU powerlevels\n"); + return -ENXIO; + } + + for_each_child_of_node(node, child) { + unsigned int val; + + ret = of_property_read_u32(child, "qcom,gpu-freq", &val); + if (ret) + continue; + + /* + * Skip the intentionally bogus clock value found at the bottom + * of most legacy frequency tables + */ + if (val != 27000000) + dev_pm_opp_add(dev, val, 0); + } + + return 0; +} + +static int adreno_get_pwrlevels(struct device *dev, + struct msm_gpu *gpu) +{ + unsigned long freq = ULONG_MAX; + struct dev_pm_opp *opp; + int ret; + + gpu->fast_rate = 0; + + /* You down with OPP? */ + if (!of_find_property(dev->of_node, "operating-points-v2", NULL)) + ret = adreno_get_legacy_pwrlevels(dev); + else { + ret = dev_pm_opp_of_add_table(dev); + if (ret) + dev_err(dev, "Unable to set the OPP table\n"); + } + + if (!ret) { + /* Find the fastest defined rate */ + opp = dev_pm_opp_find_freq_floor(dev, &freq); + if (!IS_ERR(opp)) { + gpu->fast_rate = freq; + dev_pm_opp_put(opp); + } + } + + if (!gpu->fast_rate) { + dev_warn(dev, + "Could not find a clock rate. Using a reasonable default\n"); + /* Pick a suitably safe clock speed for any target */ + gpu->fast_rate = 200000000; + } + + DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate); + + return 0; +} + int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs, int nr_rings) @@ -479,15 +549,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu->revn = adreno_gpu->info->revn; adreno_gpu->rev = config->rev; - gpu->fast_rate = config->fast_rate; - gpu->bus_freq = config->bus_freq; -#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING - gpu->bus_scale_table = config->bus_scale_table; -#endif - - DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u", - gpu->fast_rate, gpu->bus_freq); - adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; adreno_gpu_config.irqname = "kgsl_3d0_irq"; @@ -496,6 +557,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.nr_rings = nr_rings; + adreno_get_pwrlevels(&pdev->dev, gpu); + pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_enable(&pdev->dev); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 28e3de6e5f94..8d3d0a924908 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -129,10 +129,6 @@ struct adreno_gpu { /* platform config data (ie. from DT, or pdata) */ struct adreno_platform_config { struct adreno_rev rev; - uint32_t fast_rate, bus_freq; -#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING - struct msm_bus_scale_pdata *bus_scale_table; -#endif }; #define ADRENO_IDLE_TIMEOUT msecs_to_jiffies(1000) diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h index 940de51ac5cd..a1b3e31e959e 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h @@ -234,10 +234,6 @@ static inline struct clk *mpd4_lvds_pll_init(struct drm_device *dev) #endif #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING -static inline int match_dev_name(struct device *dev, void *data) -{ - return !strcmp(dev_name(dev), data); -} /* bus scaling data is associated with extra pointless platform devices, * "dtv", etc.. this is a bit of a hack, but we need a way for encoders * to find their pdata to make the bus-scaling stuff work. @@ -245,8 +241,7 @@ static inline int match_dev_name(struct device *dev, void *data) static inline void *mdp4_find_pdata(const char *devname) { struct device *dev; - dev = bus_find_device(&platform_bus_type, NULL, - (void *)devname, match_dev_name); + dev = bus_find_device_by_name(&platform_bus_type, NULL, devname); return dev ? dev->platform_data : NULL; } #endif diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index ee41423baeb7..29678876fc09 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -966,8 +966,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, uint32_t src_x, src_y; uint32_t src_w, src_h; uint32_t src_img_w, src_img_h; - uint32_t src_x_r; - int crtc_x_r; int ret; nplanes = fb->format->num_planes; @@ -1012,9 +1010,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, crtc_w /= 2; src_w /= 2; src_img_w /= 2; - - crtc_x_r = crtc_x + crtc_w; - src_x_r = src_x + src_w; } ret = calc_scalex_steps(plane, pix_format, src_w, crtc_w, step.x); @@ -1054,9 +1049,9 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, if (right_hwpipe) mdp5_hwpipe_mode_set(mdp5_kms, right_hwpipe, fb, &step, &pe, config, hdecm, vdecm, hflip, vflip, - crtc_x_r, crtc_y, crtc_w, crtc_h, + crtc_x + crtc_w, crtc_y, crtc_w, crtc_h, src_img_w, src_img_h, - src_x_r, src_y, src_w, src_h); + src_x + src_w, src_y, src_w, src_h); plane->fb = fb; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index c646843d8822..0a653dd2e618 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -303,7 +303,8 @@ int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } -static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {} +static inline void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...) {} static inline void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) {} static inline void msm_perf_debugfs_cleanup(struct msm_drm_private *priv) {} #endif diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 81fe6d6740ce..07376de9ff4c 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -93,14 +93,17 @@ static struct page **get_pages(struct drm_gem_object *obj) return p; } + msm_obj->pages = p; + msm_obj->sgt = drm_prime_pages_to_sg(p, npages); if (IS_ERR(msm_obj->sgt)) { + void *ptr = ERR_CAST(msm_obj->sgt); + dev_err(dev->dev, "failed to allocate sgt\n"); - return ERR_CAST(msm_obj->sgt); + msm_obj->sgt = NULL; + return ptr; } - msm_obj->pages = p; - /* For non-cached buffers, ensure the new pages are clean * because display controller, GPU, etc. are not coherent: */ @@ -135,7 +138,10 @@ static void put_pages(struct drm_gem_object *obj) if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl, msm_obj->sgt->nents, DMA_BIDIRECTIONAL); - sg_free_table(msm_obj->sgt); + + if (msm_obj->sgt) + sg_free_table(msm_obj->sgt); + kfree(msm_obj->sgt); if (use_pages(obj)) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 232201403439..bd376f9e18a7 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -21,42 +21,90 @@ #include "msm_fence.h" #include <linux/string_helpers.h> +#include <linux/pm_opp.h> +#include <linux/devfreq.h> /* * Power Management: */ -#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING -#include <mach/board.h> -static void bs_init(struct msm_gpu *gpu) +static int msm_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) { - if (gpu->bus_scale_table) { - gpu->bsc = msm_bus_scale_register_client(gpu->bus_scale_table); - DBG("bus scale client: %08x", gpu->bsc); - } + struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); + struct dev_pm_opp *opp; + + opp = devfreq_recommended_opp(dev, freq, flags); + + if (IS_ERR(opp)) + return PTR_ERR(opp); + + clk_set_rate(gpu->core_clk, *freq); + dev_pm_opp_put(opp); + + return 0; } -static void bs_fini(struct msm_gpu *gpu) +static int msm_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) { - if (gpu->bsc) { - msm_bus_scale_unregister_client(gpu->bsc); - gpu->bsc = 0; - } + struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); + u64 cycles; + u32 freq = ((u32) status->current_frequency) / 1000000; + ktime_t time; + + status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk); + gpu->funcs->gpu_busy(gpu, &cycles); + + status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq; + + gpu->devfreq.busy_cycles = cycles; + + time = ktime_get(); + status->total_time = ktime_us_delta(time, gpu->devfreq.time); + gpu->devfreq.time = time; + + return 0; +} + +static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); + + *freq = (unsigned long) clk_get_rate(gpu->core_clk); + + return 0; } -static void bs_set(struct msm_gpu *gpu, int idx) +static struct devfreq_dev_profile msm_devfreq_profile = { + .polling_ms = 10, + .target = msm_devfreq_target, + .get_dev_status = msm_devfreq_get_dev_status, + .get_cur_freq = msm_devfreq_get_cur_freq, +}; + +static void msm_devfreq_init(struct msm_gpu *gpu) { - if (gpu->bsc) { - DBG("set bus scaling: %d", idx); - msm_bus_scale_client_update_request(gpu->bsc, idx); + /* We need target support to do devfreq */ + if (!gpu->funcs->gpu_busy) + return; + + msm_devfreq_profile.initial_freq = gpu->fast_rate; + + /* + * Don't set the freq_table or max_state and let devfreq build the table + * from OPP + */ + + gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, + &msm_devfreq_profile, "simple_ondemand", NULL); + + if (IS_ERR(gpu->devfreq.devfreq)) { + dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); + gpu->devfreq.devfreq = NULL; } } -#else -static void bs_init(struct msm_gpu *gpu) {} -static void bs_fini(struct msm_gpu *gpu) {} -static void bs_set(struct msm_gpu *gpu, int idx) {} -#endif static int enable_pwrrail(struct msm_gpu *gpu) { @@ -143,8 +191,6 @@ static int enable_axi(struct msm_gpu *gpu) { if (gpu->ebi1_clk) clk_prepare_enable(gpu->ebi1_clk); - if (gpu->bus_freq) - bs_set(gpu, gpu->bus_freq); return 0; } @@ -152,8 +198,6 @@ static int disable_axi(struct msm_gpu *gpu) { if (gpu->ebi1_clk) clk_disable_unprepare(gpu->ebi1_clk); - if (gpu->bus_freq) - bs_set(gpu, 0); return 0; } @@ -175,6 +219,13 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu) if (ret) return ret; + if (gpu->devfreq.devfreq) { + gpu->devfreq.busy_cycles = 0; + gpu->devfreq.time = ktime_get(); + + devfreq_resume_device(gpu->devfreq.devfreq); + } + gpu->needs_hw_init = true; return 0; @@ -186,6 +237,9 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) DBG("%s", gpu->name); + if (gpu->devfreq.devfreq) + devfreq_suspend_device(gpu->devfreq.devfreq); + ret = disable_axi(gpu); if (ret) return ret; @@ -294,6 +348,8 @@ static void recover_worker(struct work_struct *work) msm_rd_dump_submit(priv->hangrd, submit, "offending task: %s (%s)", task->comm, cmd); + + kfree(cmd); } else { msm_rd_dump_submit(priv->hangrd, submit, NULL); } @@ -306,7 +362,7 @@ static void recover_worker(struct work_struct *work) * needs to happen after msm_rd_dump_submit() to ensure that the * bo's referenced by the offending submit are still around. */ - for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; uint32_t fence = ring->memptrs->fence; @@ -753,7 +809,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->pdev = pdev; platform_set_drvdata(pdev, gpu); - bs_init(gpu); + msm_devfreq_init(gpu); gpu->aspace = msm_gpu_create_address_space(gpu, pdev, config->va_start, config->va_end); @@ -824,8 +880,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) WARN_ON(!list_empty(&gpu->active_list)); - bs_fini(gpu); - for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { msm_ringbuffer_destroy(gpu->rb[i]); gpu->rb[i] = NULL; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index e113d64574d3..fccfccd303af 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -66,6 +66,7 @@ struct msm_gpu_funcs { /* show GPU status in debugfs: */ void (*show)(struct msm_gpu *gpu, struct seq_file *m); #endif + int (*gpu_busy)(struct msm_gpu *gpu, uint64_t *value); }; struct msm_gpu { @@ -108,12 +109,7 @@ struct msm_gpu { struct clk **grp_clks; int nr_clocks; struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; - uint32_t fast_rate, bus_freq; - -#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING - struct msm_bus_scale_pdata *bus_scale_table; - uint32_t bsc; -#endif + uint32_t fast_rate; /* Hang and Inactivity Detection: */ @@ -125,6 +121,12 @@ struct msm_gpu { struct work_struct recover_work; struct drm_gem_object *memptrs_bo; + + struct { + struct devfreq *devfreq; + u64 busy_cycles; + ktime_t time; + } devfreq; }; /* It turns out that all targets use the same ringbuffer size */ |