diff options
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/Kconfig | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_bo.c | 116 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_crtc.c | 54 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_dpi.c | 180 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 82 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_dsi.c | 294 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_fence.c | 56 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 189 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hdmi.c | 202 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hvs.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 65 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_kms.c | 101 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_render_cl.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 185 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate.c | 57 |
19 files changed, 1056 insertions, 606 deletions
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 973b4203c0b2..4361bdcfd28a 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -1,13 +1,13 @@ config DRM_VC4 tristate "Broadcom VC4 Graphics" - depends on ARCH_BCM2835 || COMPILE_TEST + depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST depends on DRM depends on SND && SND_SOC depends on COMMON_CLK select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER - select DRM_PANEL + select DRM_PANEL_BRIDGE select SND_PCM select SND_PCM_ELD select SND_SOC_GENERIC_DMAENGINE_PCM diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 61f45d122bd0..25bd5d30415d 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -1,5 +1,3 @@ -ccflags-y := -Iinclude/drm - # Please keep these build lists sorted! # core driver code @@ -9,6 +7,7 @@ vc4-y := \ vc4_drv.o \ vc4_dpi.o \ vc4_dsi.o \ + vc4_fence.o \ vc4_kms.o \ vc4_gem.o \ vc4_hdmi.o \ diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index af29432a6471..487f96412d35 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -19,6 +19,8 @@ * rendering can return quickly. */ +#include <linux/dma-buf.h> + #include "vc4_drv.h" #include "uapi/drm/vc4_drm.h" @@ -88,6 +90,9 @@ static void vc4_bo_destroy(struct vc4_bo *bo) vc4->bo_stats.num_allocated--; vc4->bo_stats.size_allocated -= obj->size; + + reservation_object_fini(&bo->_resv); + drm_gem_cma_free_object(obj); } @@ -206,6 +211,8 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) vc4->bo_stats.num_allocated++; vc4->bo_stats.size_allocated += size; mutex_unlock(&vc4->bo_lock); + bo->resv = &bo->_resv; + reservation_object_init(bo->resv); return &bo->base.base; } @@ -244,7 +251,6 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return ERR_PTR(-ENOMEM); } } - return to_vc4_bo(&cma_obj->base); } @@ -337,6 +343,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) bo->validated_shader = NULL; } + bo->t_format = false; bo->free_time = jiffies; list_add(&bo->size_head, cache_list); list_add(&bo->unref_head, &vc4->bo_cache.time_list); @@ -369,6 +376,13 @@ static void vc4_bo_cache_time_timer(unsigned long data) schedule_work(&vc4->bo_cache.time_work); } +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + + return bo->resv; +} + struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { @@ -440,6 +454,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj) return drm_gem_cma_prime_vmap(obj); } +struct drm_gem_object * +vc4_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct vc4_bo *bo; + + obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(obj)) + return obj; + + bo = to_vc4_bo(obj); + bo->resv = attach->dmabuf->resv; + + return obj; +} + int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -537,6 +569,88 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, return ret; } +/** + * vc4_set_tiling_ioctl() - Sets the tiling modifier for a BO. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * The tiling state of the BO decides the default modifier of an fb if + * no specific modifier was set by userspace, and the return value of + * vc4_get_tiling_ioctl() (so that userspace can treat a BO it + * received from dmabuf as the same tiling format as the producer + * used). + */ +int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_set_tiling *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + bool t_format; + + if (args->flags != 0) + return -EINVAL; + + switch (args->modifier) { + case DRM_FORMAT_MOD_NONE: + t_format = false; + break; + case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: + t_format = true; + break; + default: + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_vc4_bo(gem_obj); + bo->t_format = t_format; + + drm_gem_object_unreference_unlocked(gem_obj); + + return 0; +} + +/** + * vc4_get_tiling_ioctl() - Gets the tiling modifier for a BO. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Returns the tiling modifier for a BO as set by vc4_set_tiling_ioctl(). + */ +int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_get_tiling *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + + if (args->flags != 0 || args->modifier != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_vc4_bo(gem_obj); + + if (bo->t_format) + args->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + else + args->modifier = DRM_FORMAT_MOD_NONE; + + drm_gem_object_unreference_unlocked(gem_obj); + + return 0; +} + void vc4_bo_cache_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index d86c8cce3182..403bbd5f99a9 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -32,13 +32,13 @@ * ones that set the clock. */ -#include "drm_atomic.h" -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "linux/clk.h" -#include "drm_fb_cma_helper.h" -#include "linux/component.h" -#include "linux/of_device.h" +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <linux/clk.h> +#include <drm/drm_fb_cma_helper.h> +#include <linux/component.h> +#include <linux/of_device.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -151,10 +151,10 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused) } #endif -int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode) +bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_crtc *crtc = drm_crtc_from_index(dev, crtc_id); @@ -162,7 +162,7 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, u32 val; int fifo_lines; int vblank_lines; - int ret = 0; + bool ret = false; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -198,7 +198,7 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay; if (fifo_lines > 0) - ret |= DRM_SCANOUTPOS_VALID; + ret = true; /* HVS more than fifo_lines into frame for compositing? */ if (*vpos > fifo_lines) { @@ -216,7 +216,6 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, */ *vpos -= fifo_lines + 1; - ret |= DRM_SCANOUTPOS_ACCURATE; return ret; } @@ -229,10 +228,9 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, * We can't get meaningful readings wrt. scanline position of the PV * and need to make things up in a approximative but consistent way. */ - ret |= DRM_SCANOUTPOS_IN_VBLANK; vblank_lines = mode->vtotal - mode->vdisplay; - if (flags & DRM_CALLED_FROM_VBLIRQ) { + if (in_vblank_irq) { /* * Assume the irq handler got called close to first * line of vblank, so PV has about a full vblank @@ -254,9 +252,10 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, * we are at the very beginning of vblank, as the hvs just * started refilling, and the stime and etime timestamps * truly correspond to start of vblank. + * + * Unfortunately there's no way to report this to upper levels + * and make it more useful. */ - if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL) - ret |= DRM_SCANOUTPOS_ACCURATE; } else { /* * No clue where we are inside vblank. Return a vpos of zero, @@ -270,19 +269,6 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, return ret; } -int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, - int *max_error, struct timeval *vblank_time, - unsigned flags) -{ - struct drm_crtc *crtc = drm_crtc_from_index(dev, crtc_id); - struct drm_crtc_state *state = crtc->state; - - /* Helper routine in DRM core does all the work: */ - return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error, - vblank_time, flags, - &state->adjusted_mode); -} - static void vc4_crtc_destroy(struct drm_crtc *crtc) { drm_crtc_cleanup(crtc); @@ -359,12 +345,16 @@ static u32 vc4_get_fifo_full_level(u32 format) static struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc) { struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; - drm_for_each_connector(connector, crtc->dev) { + drm_connector_list_iter_begin(crtc->dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->state->crtc == crtc) { + drm_connector_list_iter_end(&conn_iter); return connector->encoder; } } + drm_connector_list_iter_end(&conn_iter); return NULL; } diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c index c6d703903fd9..2e0fe46aeb2e 100644 --- a/drivers/gpu/drm/vc4/vc4_dpi.c +++ b/drivers/gpu/drm/vc4/vc4_dpi.c @@ -22,14 +22,16 @@ * ALT2 function. */ -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_edid.h" -#include "drm_panel.h" -#include "linux/clk.h" -#include "linux/component.h" -#include "linux/of_graph.h" -#include "linux/of_platform.h" +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_of.h> +#include <drm/drm_panel.h> +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/of_graph.h> +#include <linux/of_platform.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -95,7 +97,8 @@ struct vc4_dpi { struct drm_encoder *encoder; struct drm_connector *connector; - struct drm_panel *panel; + struct drm_bridge *bridge; + bool is_panel_bridge; void __iomem *regs; @@ -118,24 +121,6 @@ to_vc4_dpi_encoder(struct drm_encoder *encoder) return container_of(encoder, struct vc4_dpi_encoder, base.base); } -/* VC4 DPI connector KMS struct */ -struct vc4_dpi_connector { - struct drm_connector base; - struct vc4_dpi *dpi; - - /* Since the connector is attached to just the one encoder, - * this is the reference to it so we can do the best_encoder() - * hook. - */ - struct drm_encoder *encoder; -}; - -static inline struct vc4_dpi_connector * -to_vc4_dpi_connector(struct drm_connector *connector) -{ - return container_of(connector, struct vc4_dpi_connector, base); -} - #define DPI_REG(reg) { reg, #reg } static const struct { u32 reg; @@ -167,80 +152,6 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused) } #endif -static enum drm_connector_status -vc4_dpi_connector_detect(struct drm_connector *connector, bool force) -{ - struct vc4_dpi_connector *vc4_connector = - to_vc4_dpi_connector(connector); - struct vc4_dpi *dpi = vc4_connector->dpi; - - if (dpi->panel) - return connector_status_connected; - else - return connector_status_disconnected; -} - -static void vc4_dpi_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static int vc4_dpi_connector_get_modes(struct drm_connector *connector) -{ - struct vc4_dpi_connector *vc4_connector = - to_vc4_dpi_connector(connector); - struct vc4_dpi *dpi = vc4_connector->dpi; - - if (dpi->panel) - return drm_panel_get_modes(dpi->panel); - - return 0; -} - -static const struct drm_connector_funcs vc4_dpi_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, - .detect = vc4_dpi_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = vc4_dpi_connector_destroy, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static const struct drm_connector_helper_funcs vc4_dpi_connector_helper_funcs = { - .get_modes = vc4_dpi_connector_get_modes, -}; - -static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev, - struct vc4_dpi *dpi) -{ - struct drm_connector *connector = NULL; - struct vc4_dpi_connector *dpi_connector; - - dpi_connector = devm_kzalloc(dev->dev, sizeof(*dpi_connector), - GFP_KERNEL); - if (!dpi_connector) - return ERR_PTR(-ENOMEM); - - connector = &dpi_connector->base; - - dpi_connector->encoder = dpi->encoder; - dpi_connector->dpi = dpi; - - drm_connector_init(dev, connector, &vc4_dpi_connector_funcs, - DRM_MODE_CONNECTOR_DPI); - drm_connector_helper_add(connector, &vc4_dpi_connector_helper_funcs); - - connector->polled = 0; - connector->interlace_allowed = 0; - connector->doublescan_allowed = 0; - - drm_mode_connector_attach_encoder(connector, dpi->encoder); - - return connector; -} - static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = { .destroy = drm_encoder_cleanup, }; @@ -250,11 +161,7 @@ static void vc4_dpi_encoder_disable(struct drm_encoder *encoder) struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder); struct vc4_dpi *dpi = vc4_encoder->dpi; - drm_panel_disable(dpi->panel); - clk_disable_unprepare(dpi->pixel_clock); - - drm_panel_unprepare(dpi->panel); } static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) @@ -265,12 +172,6 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE; int ret; - ret = drm_panel_prepare(dpi->panel); - if (ret) { - DRM_ERROR("Panel failed to prepare\n"); - return; - } - if (dpi->connector->display_info.num_bus_formats) { u32 bus_format = dpi->connector->display_info.bus_formats[0]; @@ -321,13 +222,6 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) ret = clk_prepare_enable(dpi->pixel_clock); if (ret) DRM_ERROR("Failed to set clock rate: %d\n", ret); - - ret = drm_panel_enable(dpi->panel); - if (ret) { - DRM_ERROR("Panel failed to enable\n"); - drm_panel_unprepare(dpi->panel); - return; - } } static bool vc4_dpi_encoder_mode_fixup(struct drm_encoder *encoder, @@ -351,24 +245,34 @@ static const struct of_device_id vc4_dpi_dt_match[] = { {} }; -/* Walks the OF graph to find the panel node and then asks DRM to look - * up the panel. +/* Sets up the next link in the display chain, whether it's a panel or + * a bridge. */ -static struct drm_panel *vc4_dpi_get_panel(struct device *dev) +static int vc4_dpi_init_bridge(struct vc4_dpi *dpi) { - struct device_node *panel_node; - struct device_node *np = dev->of_node; + struct device *dev = &dpi->pdev->dev; struct drm_panel *panel; + int ret; - /* don't proceed if we have an endpoint but no panel_node tied to it */ - panel_node = of_graph_get_remote_node(np, 0, 0); - if (!panel_node) - return NULL; + ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, + &panel, &dpi->bridge); + if (ret) { + /* If nothing was connected in the DT, that's not an + * error. + */ + if (ret == -ENODEV) + return 0; + else + return ret; + } - panel = of_drm_find_panel(panel_node); - of_node_put(panel_node); + if (panel) { + dpi->bridge = drm_panel_bridge_add(panel, + DRM_MODE_CONNECTOR_DPI); + dpi->is_panel_bridge = true; + } - return panel; + return drm_bridge_attach(dpi->encoder, dpi->bridge, NULL); } static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) @@ -422,20 +326,13 @@ static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) if (ret) DRM_ERROR("Failed to turn on core clock: %d\n", ret); - dpi->panel = vc4_dpi_get_panel(dev); - drm_encoder_init(drm, dpi->encoder, &vc4_dpi_encoder_funcs, DRM_MODE_ENCODER_DPI, NULL); drm_encoder_helper_add(dpi->encoder, &vc4_dpi_encoder_helper_funcs); - dpi->connector = vc4_dpi_connector_init(drm, dpi); - if (IS_ERR(dpi->connector)) { - ret = PTR_ERR(dpi->connector); + ret = vc4_dpi_init_bridge(dpi); + if (ret) goto err_destroy_encoder; - } - - if (dpi->panel) - drm_panel_attach(dpi->panel, dpi->connector); dev_set_drvdata(dev, dpi); @@ -456,10 +353,9 @@ static void vc4_dpi_unbind(struct device *dev, struct device *master, struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_dpi *dpi = dev_get_drvdata(dev); - if (dpi->panel) - drm_panel_detach(dpi->panel); + if (dpi->is_panel_bridge) + drm_panel_bridge_remove(dpi->bridge); - vc4_dpi_connector_destroy(dpi->connector); drm_encoder_cleanup(dpi->encoder); clk_disable_unprepare(dpi->core_clock); diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 61e674baf3a6..c6b487c3d2b7 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -31,7 +31,7 @@ #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> -#include "drm_fb_cma_helper.h" +#include <drm/drm_fb_cma_helper.h> #include <drm/drm_fb_helper.h> #include "uapi/drm/vc4_drm.h" @@ -138,6 +138,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -154,7 +156,7 @@ static struct drm_driver vc4_drm_driver = { .irq_uninstall = vc4_irq_uninstall, .get_scanout_position = vc4_crtc_get_scanoutpos, - .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp, + .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, #if defined(CONFIG_DEBUG_FS) .debugfs_init = vc4_debugfs_init, @@ -168,8 +170,9 @@ static struct drm_driver vc4_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, .gem_prime_export = vc4_prime_export, + .gem_prime_res_obj = vc4_prime_res_obj, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_import_sg_table = vc4_prime_import_sg_table, .gem_prime_vmap = vc4_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = vc4_prime_mmap, @@ -334,6 +337,7 @@ static int vc4_platform_drm_remove(struct platform_device *pdev) static const struct of_device_id vc4_of_match[] = { { .compatible = "brcm,bcm2835-vc4", }, + { .compatible = "brcm,cygnus-vc4", }, {}, }; MODULE_DEVICE_TABLE(of, vc4_of_match); diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index dffce6293d87..df22698d62ee 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -6,10 +6,10 @@ * published by the Free Software Foundation. */ -#include "drmP.h" -#include "drm_gem_cma_helper.h" - +#include <linux/reservation.h> +#include <drm/drmP.h> #include <drm/drm_encoder.h> +#include <drm/drm_gem_cma_helper.h> struct vc4_dev { struct drm_device *dev; @@ -56,6 +56,8 @@ struct vc4_dev { /* Protects bo_cache and the BO stats. */ struct mutex bo_lock; + uint64_t dma_fence_context; + /* Sequence number for the last job queued in bin_job_list. * Starts at 0 (no jobs emitted). */ @@ -95,12 +97,23 @@ struct vc4_dev { */ struct list_head seqno_cb_list; - /* The binner overflow memory that's currently set up in - * BPOA/BPOS registers. When overflow occurs and a new one is - * allocated, the previous one will be moved to - * vc4->current_exec's free list. + /* The memory used for storing binner tile alloc, tile state, + * and overflow memory allocations. This is freed when V3D + * powers down. + */ + struct vc4_bo *bin_bo; + + /* Size of blocks allocated within bin_bo. */ + uint32_t bin_alloc_size; + + /* Bitmask of the bin_alloc_size chunks in bin_bo that are + * used. */ - struct vc4_bo *overflow_mem; + uint32_t bin_alloc_used; + + /* Bitmask of the current bin_alloc used for overflow memory. */ + uint32_t bin_alloc_overflow; + struct work_struct overflow_mem_work; int power_refcount; @@ -135,6 +148,8 @@ struct vc4_bo { */ uint64_t write_seqno; + bool t_format; + /* List entry for the BO's position in either * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list */ @@ -150,6 +165,10 @@ struct vc4_bo { * DRM_IOCTL_VC4_CREATE_SHADER_BO. */ struct vc4_validated_shader_info *validated_shader; + + /* normally (resv == &_resv) except for imported bo's */ + struct reservation_object *resv; + struct reservation_object _resv; }; static inline struct vc4_bo * @@ -158,6 +177,19 @@ to_vc4_bo(struct drm_gem_object *bo) return (struct vc4_bo *)bo; } +struct vc4_fence { + struct dma_fence base; + struct drm_device *dev; + /* vc4 seqno for signaled() test */ + uint64_t seqno; +}; + +static inline struct vc4_fence * +to_vc4_fence(struct dma_fence *fence) +{ + return (struct vc4_fence *)fence; +} + struct vc4_seqno_cb { struct work_struct work; uint64_t seqno; @@ -168,6 +200,7 @@ struct vc4_v3d { struct vc4_dev *vc4; struct platform_device *pdev; void __iomem *regs; + struct clk *clk; }; struct vc4_hvs { @@ -230,6 +263,8 @@ struct vc4_exec_info { /* Latest write_seqno of any BO that binning depends on. */ uint64_t bin_dep_seqno; + struct dma_fence *fence; + /* Last current addresses the hardware was processing when the * hangcheck timer checked on us. */ @@ -293,8 +328,12 @@ struct vc4_exec_info { bool found_increment_semaphore_packet; bool found_flush; uint8_t bin_tiles_x, bin_tiles_y; - struct drm_gem_cma_object *tile_bo; + /* Physical address of the start of the tile alloc array + * (where each tile's binned CL will start) + */ uint32_t tile_alloc_offset; + /* Bitmask of which binner slots are freed when this job completes. */ + uint32_t bin_slots; /** * Computed addresses pointing into exec_bo where we start the @@ -433,10 +472,18 @@ int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); void *vc4_prime_vmap(struct drm_gem_object *obj); void vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); @@ -446,13 +493,10 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); extern struct platform_driver vc4_crtc_driver; bool vc4_event_pending(struct drm_crtc *crtc); int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); -int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode); -int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, - int *max_error, struct timeval *vblank_time, - unsigned flags); +bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode); /* vc4_debugfs.c */ int vc4_debugfs_init(struct drm_minor *minor); @@ -468,6 +512,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused); extern struct platform_driver vc4_dsi_driver; int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused); +/* vc4_fence.c */ +extern const struct dma_fence_ops vc4_fence_ops; + /* vc4_gem.c */ void vc4_gem_init(struct drm_device *dev); void vc4_gem_destroy(struct drm_device *dev); @@ -491,7 +538,7 @@ int vc4_queue_seqno_cb(struct drm_device *dev, extern struct platform_driver vc4_hdmi_driver; int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); -/* vc4_hdmi.c */ +/* vc4_vec.c */ extern struct platform_driver vc4_vec_driver; int vc4_vec_debugfs_regs(struct seq_file *m, void *unused); @@ -522,6 +569,7 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, extern struct platform_driver vc4_v3d_driver; int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); +int vc4_v3d_get_bin_slot(struct vc4_dev *vc4); /* vc4_validate.c */ int diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 160f981d1cf4..5e8b81eaa168 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -29,20 +29,20 @@ * hopefully present. */ -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_edid.h" -#include "drm_mipi_dsi.h" -#include "drm_panel.h" -#include "linux/clk.h" -#include "linux/clk-provider.h" -#include "linux/completion.h" -#include "linux/component.h" -#include "linux/dmaengine.h" -#include "linux/i2c.h" -#include "linux/of_address.h" -#include "linux/of_platform.h" -#include "linux/pm_runtime.h" +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_panel.h> +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/completion.h> +#include <linux/component.h> +#include <linux/dmaengine.h> +#include <linux/i2c.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/pm_runtime.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -503,8 +503,8 @@ struct vc4_dsi { struct mipi_dsi_host dsi_host; struct drm_encoder *encoder; - struct drm_connector *connector; - struct drm_panel *panel; + struct drm_bridge *bridge; + bool is_panel_bridge; void __iomem *regs; @@ -519,7 +519,8 @@ struct vc4_dsi { /* DSI channel for the panel we're connected to. */ u32 channel; u32 lanes; - enum mipi_dsi_pixel_format format; + u32 format; + u32 divider; u32 mode_flags; /* Input clock from CPRMAN to the digital PHY, for the DSI @@ -604,18 +605,6 @@ to_vc4_dsi_encoder(struct drm_encoder *encoder) return container_of(encoder, struct vc4_dsi_encoder, base.base); } -/* VC4 DSI connector KMS struct */ -struct vc4_dsi_connector { - struct drm_connector base; - struct vc4_dsi *dsi; -}; - -static inline struct vc4_dsi_connector * -to_vc4_dsi_connector(struct drm_connector *connector) -{ - return container_of(connector, struct vc4_dsi_connector, base); -} - #define DSI_REG(reg) { reg, #reg } static const struct { u32 reg; @@ -723,79 +712,6 @@ int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused) } #endif -static enum drm_connector_status -vc4_dsi_connector_detect(struct drm_connector *connector, bool force) -{ - struct vc4_dsi_connector *vc4_connector = - to_vc4_dsi_connector(connector); - struct vc4_dsi *dsi = vc4_connector->dsi; - - if (dsi->panel) - return connector_status_connected; - else - return connector_status_disconnected; -} - -static void vc4_dsi_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static int vc4_dsi_connector_get_modes(struct drm_connector *connector) -{ - struct vc4_dsi_connector *vc4_connector = - to_vc4_dsi_connector(connector); - struct vc4_dsi *dsi = vc4_connector->dsi; - - if (dsi->panel) - return drm_panel_get_modes(dsi->panel); - - return 0; -} - -static const struct drm_connector_funcs vc4_dsi_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, - .detect = vc4_dsi_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = vc4_dsi_connector_destroy, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static const struct drm_connector_helper_funcs vc4_dsi_connector_helper_funcs = { - .get_modes = vc4_dsi_connector_get_modes, -}; - -static struct drm_connector *vc4_dsi_connector_init(struct drm_device *dev, - struct vc4_dsi *dsi) -{ - struct drm_connector *connector; - struct vc4_dsi_connector *dsi_connector; - - dsi_connector = devm_kzalloc(dev->dev, sizeof(*dsi_connector), - GFP_KERNEL); - if (!dsi_connector) - return ERR_PTR(-ENOMEM); - - connector = &dsi_connector->base; - - dsi_connector->dsi = dsi; - - drm_connector_init(dev, connector, &vc4_dsi_connector_funcs, - DRM_MODE_CONNECTOR_DSI); - drm_connector_helper_add(connector, &vc4_dsi_connector_helper_funcs); - - connector->polled = 0; - connector->interlace_allowed = 0; - connector->doublescan_allowed = 0; - - drm_mode_connector_attach_encoder(connector, dsi->encoder); - - return connector; -} - static void vc4_dsi_encoder_destroy(struct drm_encoder *encoder) { drm_encoder_cleanup(encoder); @@ -893,12 +809,8 @@ static void vc4_dsi_encoder_disable(struct drm_encoder *encoder) struct vc4_dsi *dsi = vc4_encoder->dsi; struct device *dev = &dsi->pdev->dev; - drm_panel_disable(dsi->panel); - vc4_dsi_ulps(dsi, true); - drm_panel_unprepare(dsi->panel); - clk_disable_unprepare(dsi->pll_phy_clock); clk_disable_unprepare(dsi->escape_clock); clk_disable_unprepare(dsi->pixel_clock); @@ -906,13 +818,67 @@ static void vc4_dsi_encoder_disable(struct drm_encoder *encoder) pm_runtime_put(dev); } +/* Extends the mode's blank intervals to handle BCM2835's integer-only + * DSI PLL divider. + * + * On 2835, PLLD is set to 2Ghz, and may not be changed by the display + * driver since most peripherals are hanging off of the PLLD_PER + * divider. PLLD_DSI1, which drives our DSI bit clock (and therefore + * the pixel clock), only has an integer divider off of DSI. + * + * To get our panel mode to refresh at the expected 60Hz, we need to + * extend the horizontal blank time. This means we drive a + * higher-than-expected clock rate to the panel, but that's what the + * firmware does too. + */ +static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder); + struct vc4_dsi *dsi = vc4_encoder->dsi; + struct clk *phy_parent = clk_get_parent(dsi->pll_phy_clock); + unsigned long parent_rate = clk_get_rate(phy_parent); + unsigned long pixel_clock_hz = mode->clock * 1000; + unsigned long pll_clock = pixel_clock_hz * dsi->divider; + int divider; + + /* Find what divider gets us a faster clock than the requested + * pixel clock. + */ + for (divider = 1; divider < 8; divider++) { + if (parent_rate / divider < pll_clock) { + divider--; + break; + } + } + + /* Now that we've picked a PLL divider, calculate back to its + * pixel clock. + */ + pll_clock = parent_rate / divider; + pixel_clock_hz = pll_clock / dsi->divider; + + /* Round up the clk_set_rate() request slightly, since + * PLLD_DSI1 is an integer divider and its rate selection will + * never round up. + */ + adjusted_mode->clock = pixel_clock_hz / 1000 + 1; + + /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */ + adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal); + adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal; + adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal; + + return true; +} + static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) { - struct drm_display_mode *mode = &encoder->crtc->mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder); struct vc4_dsi *dsi = vc4_encoder->dsi; struct device *dev = &dsi->pdev->dev; - u32 format = 0, divider = 0; bool debug_dump_regs = false; unsigned long hs_clock; u32 ui_ns; @@ -929,37 +895,12 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) return; } - ret = drm_panel_prepare(dsi->panel); - if (ret) { - DRM_ERROR("Panel failed to prepare\n"); - return; - } - if (debug_dump_regs) { DRM_INFO("DSI regs before:\n"); vc4_dsi_dump_regs(dsi); } - switch (dsi->format) { - case MIPI_DSI_FMT_RGB888: - format = DSI_PFORMAT_RGB888; - divider = 24 / dsi->lanes; - break; - case MIPI_DSI_FMT_RGB666: - format = DSI_PFORMAT_RGB666; - divider = 24 / dsi->lanes; - break; - case MIPI_DSI_FMT_RGB666_PACKED: - format = DSI_PFORMAT_RGB666_PACKED; - divider = 18 / dsi->lanes; - break; - case MIPI_DSI_FMT_RGB565: - format = DSI_PFORMAT_RGB565; - divider = 16 / dsi->lanes; - break; - } - - phy_clock = pixel_clock_hz * divider; + phy_clock = pixel_clock_hz * dsi->divider; ret = clk_set_rate(dsi->pll_phy_clock, phy_clock); if (ret) { dev_err(&dsi->pdev->dev, @@ -1134,8 +1075,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { DSI_PORT_WRITE(DISP0_CTRL, - VC4_SET_FIELD(divider, DSI_DISP0_PIX_CLK_DIV) | - VC4_SET_FIELD(format, DSI_DISP0_PFORMAT) | + VC4_SET_FIELD(dsi->divider, + DSI_DISP0_PIX_CLK_DIV) | + VC4_SET_FIELD(dsi->format, DSI_DISP0_PFORMAT) | VC4_SET_FIELD(DSI_DISP0_LP_STOP_PERFRAME, DSI_DISP0_LP_STOP_CTRL) | DSI_DISP0_ST_END | @@ -1174,13 +1116,6 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) DRM_INFO("DSI regs after:\n"); vc4_dsi_dump_regs(dsi); } - - ret = drm_panel_enable(dsi->panel); - if (ret) { - DRM_ERROR("Panel failed to enable\n"); - drm_panel_unprepare(dsi->panel); - return; - } } static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host, @@ -1347,26 +1282,53 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, dsi->lanes = device->lanes; dsi->channel = device->channel; - dsi->format = device->format; dsi->mode_flags = device->mode_flags; + switch (device->format) { + case MIPI_DSI_FMT_RGB888: + dsi->format = DSI_PFORMAT_RGB888; + dsi->divider = 24 / dsi->lanes; + break; + case MIPI_DSI_FMT_RGB666: + dsi->format = DSI_PFORMAT_RGB666; + dsi->divider = 24 / dsi->lanes; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + dsi->format = DSI_PFORMAT_RGB666_PACKED; + dsi->divider = 18 / dsi->lanes; + break; + case MIPI_DSI_FMT_RGB565: + dsi->format = DSI_PFORMAT_RGB565; + dsi->divider = 16 / dsi->lanes; + break; + default: + dev_err(&dsi->pdev->dev, "Unknown DSI format: %d.\n", + dsi->format); + return 0; + } + if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO)) { dev_err(&dsi->pdev->dev, "Only VIDEO mode panels supported currently.\n"); return 0; } - dsi->panel = of_drm_find_panel(device->dev.of_node); - if (!dsi->panel) - return 0; - - ret = drm_panel_attach(dsi->panel, dsi->connector); - if (ret != 0) - return ret; + dsi->bridge = of_drm_find_bridge(device->dev.of_node); + if (!dsi->bridge) { + struct drm_panel *panel = + of_drm_find_panel(device->dev.of_node); - drm_helper_hpd_irq_event(dsi->connector->dev); + dsi->bridge = drm_panel_bridge_add(panel, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(dsi->bridge)) { + ret = PTR_ERR(dsi->bridge); + dsi->bridge = NULL; + return ret; + } + dsi->is_panel_bridge = true; + } - return 0; + return drm_bridge_attach(dsi->encoder, dsi->bridge, NULL); } static int vc4_dsi_host_detach(struct mipi_dsi_host *host, @@ -1374,15 +1336,9 @@ static int vc4_dsi_host_detach(struct mipi_dsi_host *host, { struct vc4_dsi *dsi = host_to_dsi(host); - if (dsi->panel) { - int ret = drm_panel_detach(dsi->panel); - - if (ret) - return ret; - - dsi->panel = NULL; - - drm_helper_hpd_irq_event(dsi->connector->dev); + if (dsi->is_panel_bridge) { + drm_panel_bridge_remove(dsi->bridge); + dsi->bridge = NULL; } return 0; @@ -1397,6 +1353,7 @@ static const struct mipi_dsi_host_ops vc4_dsi_host_ops = { static const struct drm_encoder_helper_funcs vc4_dsi_encoder_helper_funcs = { .disable = vc4_dsi_encoder_disable, .enable = vc4_dsi_encoder_enable, + .mode_fixup = vc4_dsi_encoder_mode_fixup, }; static const struct of_device_id vc4_dsi_dt_match[] = { @@ -1648,12 +1605,6 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) DRM_MODE_ENCODER_DSI, NULL); drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs); - dsi->connector = vc4_dsi_connector_init(drm, dsi); - if (IS_ERR(dsi->connector)) { - ret = PTR_ERR(dsi->connector); - goto err_destroy_encoder; - } - dsi->dsi_host.ops = &vc4_dsi_host_ops; dsi->dsi_host.dev = dev; @@ -1664,11 +1615,6 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) pm_runtime_enable(dev); return 0; - -err_destroy_encoder: - vc4_dsi_encoder_destroy(dsi->encoder); - - return ret; } static void vc4_dsi_unbind(struct device *dev, struct device *master, @@ -1680,7 +1626,7 @@ static void vc4_dsi_unbind(struct device *dev, struct device *master, pm_runtime_disable(dev); - vc4_dsi_connector_destroy(dsi->connector); + drm_bridge_remove(dsi->bridge); vc4_dsi_encoder_destroy(dsi->encoder); mipi_dsi_host_unregister(&dsi->dsi_host); diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c new file mode 100644 index 000000000000..dbf5a5a5d5f5 --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_fence.c @@ -0,0 +1,56 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vc4_drv.h" + +static const char *vc4_fence_get_driver_name(struct dma_fence *fence) +{ + return "vc4"; +} + +static const char *vc4_fence_get_timeline_name(struct dma_fence *fence) +{ + return "vc4-v3d"; +} + +static bool vc4_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static bool vc4_fence_signaled(struct dma_fence *fence) +{ + struct vc4_fence *f = to_vc4_fence(fence); + struct vc4_dev *vc4 = to_vc4_dev(f->dev); + + return vc4->finished_seqno >= f->seqno; +} + +const struct dma_fence_ops vc4_fence_ops = { + .get_driver_name = vc4_fence_get_driver_name, + .get_timeline_name = vc4_fence_get_timeline_name, + .enable_signaling = vc4_fence_enable_signaling, + .signaled = vc4_fence_signaled, + .wait = dma_fence_default_wait, + .release = dma_fence_free, +}; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e9c381c42139..d5b821ad06af 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -111,8 +111,8 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, &handle); if (ret) { - state->bo_count = i - 1; - goto err; + state->bo_count = i; + goto err_delete_handle; } bo_state[i].handle = handle; bo_state[i].paddr = vc4_bo->base.paddr; @@ -124,13 +124,16 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, state->bo_count * sizeof(*bo_state))) ret = -EFAULT; - kfree(bo_state); +err_delete_handle: + if (ret) { + for (i = 0; i < state->bo_count; i++) + drm_gem_handle_delete(file_priv, bo_state[i].handle); + } err_free: - vc4_free_hang_state(dev, kernel_state); + kfree(bo_state); -err: return ret; } @@ -463,6 +466,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->bo_count; i++) { bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno; + + reservation_object_add_shared_fence(bo->resv, exec->fence); } list_for_each_entry(bo, &exec->unref_list, unref_head) { @@ -472,7 +477,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->rcl_write_bo_count; i++) { bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno; + + reservation_object_add_excl_fence(bo->resv, exec->fence); + } +} + +static void +vc4_unlock_bo_reservations(struct drm_device *dev, + struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < exec->bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + ww_acquire_fini(acquire_ctx); +} + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to vc4, so we don't attach dma-buf fences to them. + */ +static int +vc4_lock_bo_reservations(struct drm_device *dev, + struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended_lock = -1; + int i, ret; + struct vc4_bo *bo; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended_lock != -1) { + bo = to_vc4_bo(&exec->bo[contended_lock]->base); + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < exec->bo_count; i++) { + if (i == contended_lock) + continue; + + bo = to_vc4_bo(&exec->bo[i]->base); + + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) { + bo = to_vc4_bo(&exec->bo[j]->base); + ww_mutex_unlock(&bo->resv->lock); + } + + if (contended_lock != -1 && contended_lock >= i) { + bo = to_vc4_bo(&exec->bo[contended_lock]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + if (ret == -EDEADLK) { + contended_lock = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } + } + + ww_acquire_done(acquire_ctx); + + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ + for (i = 0; i < exec->bo_count; i++) { + bo = to_vc4_bo(&exec->bo[i]->base); + + ret = reservation_object_reserve_shared(bo->resv); + if (ret) { + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + return ret; + } } + + return 0; } /* Queues a struct vc4_exec_info for execution. If no job is @@ -484,19 +585,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) * then bump the end address. That's a change for a later date, * though. */ -static void -vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) +static int +vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) { struct vc4_dev *vc4 = to_vc4_dev(dev); uint64_t seqno; unsigned long irqflags; + struct vc4_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return -ENOMEM; + fence->dev = dev; spin_lock_irqsave(&vc4->job_lock, irqflags); seqno = ++vc4->emit_seqno; exec->seqno = seqno; + + dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, + vc4->dma_fence_context, exec->seqno); + fence->seqno = exec->seqno; + exec->fence = &fence->base; + vc4_update_bo_seqnos(exec, seqno); + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + list_add_tail(&exec->head, &vc4->bin_job_list); /* If no job was executing, kick ours off. Otherwise, it'll @@ -509,6 +625,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) } spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + return 0; } /** @@ -545,14 +663,15 @@ vc4_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = drm_calloc_large(exec->bo_count, - sizeof(struct drm_gem_cma_object *)); + exec->bo = kvmalloc_array(exec->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); if (!exec->bo) { DRM_ERROR("Failed to allocate validated BO pointers\n"); return -ENOMEM; } - handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); + handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL); if (!handles) { ret = -ENOMEM; DRM_ERROR("Failed to allocate incoming GEM handles\n"); @@ -584,7 +703,7 @@ vc4_cl_lookup_bos(struct drm_device *dev, spin_unlock(&file_priv->table_lock); fail: - drm_free_large(handles); + kvfree(handles); return ret; } @@ -622,7 +741,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) * read the contents back for validation, and I think the * bo->vaddr is uncached access. */ - temp = drm_malloc_ab(temp_size, 1); + temp = kvmalloc_array(temp_size, 1, GFP_KERNEL); if (!temp) { DRM_ERROR("Failed to allocate storage for copying " "in bin/render CLs.\n"); @@ -697,7 +816,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); fail: - drm_free_large(temp); + kvfree(temp); return ret; } @@ -705,12 +824,19 @@ static void vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) { struct vc4_dev *vc4 = to_vc4_dev(dev); + unsigned long irqflags; unsigned i; + /* If we got force-completed because of GPU reset rather than + * through our IRQ handler, signal the fence now. + */ + if (exec->fence) + dma_fence_signal(exec->fence); + if (exec->bo) { for (i = 0; i < exec->bo_count; i++) drm_gem_object_unreference_unlocked(&exec->bo[i]->base); - drm_free_large(exec->bo); + kvfree(exec->bo); } while (!list_empty(&exec->unref_list)) { @@ -720,6 +846,11 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) drm_gem_object_unreference_unlocked(&bo->base.base); } + /* Free up the allocation of any bin slots we used. */ + spin_lock_irqsave(&vc4->job_lock, irqflags); + vc4->bin_alloc_used &= ~exec->bin_slots; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -874,6 +1005,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; + struct ww_acquire_ctx acquire_ctx; int ret = 0; if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { @@ -888,13 +1020,16 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } mutex_lock(&vc4->power_lock); - if (vc4->power_refcount++ == 0) + if (vc4->power_refcount++ == 0) { ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - mutex_unlock(&vc4->power_lock); - if (ret < 0) { - kfree(exec); - return ret; + if (ret < 0) { + mutex_unlock(&vc4->power_lock); + vc4->power_refcount--; + kfree(exec); + return ret; + } } + mutex_unlock(&vc4->power_lock); exec->args = args; INIT_LIST_HEAD(&exec->unref_list); @@ -916,12 +1051,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); + if (ret) + goto fail; + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - vc4_queue_submit(dev, exec); + ret = vc4_queue_submit(dev, exec, &acquire_ctx); + if (ret) + goto fail; /* Return the seqno for our job. */ args->seqno = vc4->emit_seqno; @@ -939,6 +1080,8 @@ vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + vc4->dma_fence_context = dma_fence_context_alloc(1); + INIT_LIST_HEAD(&vc4->bin_job_list); INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list); @@ -968,9 +1111,9 @@ vc4_gem_destroy(struct drm_device *dev) /* V3D should already have disabled its interrupt and cleared * the overflow allocation registers. Now free the object. */ - if (vc4->overflow_mem) { - drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); - vc4->overflow_mem = NULL; + if (vc4->bin_bo) { + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); + vc4->bin_bo = NULL; } if (vc4->hang_state) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index e9cbe269710b..ed63d4e85762 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -42,20 +42,21 @@ * encoder block has CEC support. */ -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_edid.h" -#include "linux/clk.h" -#include "linux/component.h" -#include "linux/i2c.h" -#include "linux/of_address.h" -#include "linux/of_gpio.h" -#include "linux/of_platform.h" -#include "linux/rational.h" -#include "sound/dmaengine_pcm.h" -#include "sound/pcm_drm_eld.h" -#include "sound/pcm_params.h" -#include "sound/soc.h" +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/i2c.h> +#include <linux/of_address.h> +#include <linux/of_gpio.h> +#include <linux/of_platform.h> +#include <linux/pm_runtime.h> +#include <linux/rational.h> +#include <sound/dmaengine_pcm.h> +#include <sound/pcm_drm_eld.h> +#include <sound/pcm_params.h> +#include <sound/soc.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -449,13 +450,38 @@ static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder) vc4_hdmi_set_spd_infoframe(encoder); } -static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *unadjusted_mode, - struct drm_display_mode *mode) +static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder) +{ + struct drm_device *dev = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hdmi *hdmi = vc4->hdmi; + int ret; + + HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0); + + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); + HD_WRITE(VC4_HD_VID_CTL, + HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); + udelay(1); + HD_WRITE(VC4_HD_M_CTL, 0); + + clk_disable_unprepare(hdmi->hsm_clock); + clk_disable_unprepare(hdmi->pixel_clock); + + ret = pm_runtime_put(&hdmi->pdev->dev); + if (ret < 0) + DRM_ERROR("Failed to release power domain: %d\n", ret); +} + +static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) { + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct drm_device *dev = encoder->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hdmi *hdmi = vc4->hdmi; bool debug_dump_regs = false; bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC; bool vsync_pos = mode->flags & DRM_MODE_FLAG_PVSYNC; @@ -475,6 +501,64 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, interlaced, VC4_HDMI_VERTB_VBP)); u32 csc_ctl; + int ret; + + ret = pm_runtime_get_sync(&hdmi->pdev->dev); + if (ret < 0) { + DRM_ERROR("Failed to retain power domain: %d\n", ret); + return; + } + + /* This is the rate that is set by the firmware. The number + * needs to be a bit higher than the pixel clock rate + * (generally 148.5Mhz). + */ + ret = clk_set_rate(hdmi->hsm_clock, 163682864); + if (ret) { + DRM_ERROR("Failed to set HSM clock rate: %d\n", ret); + return; + } + + ret = clk_set_rate(hdmi->pixel_clock, + mode->clock * 1000 * + ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1)); + if (ret) { + DRM_ERROR("Failed to set pixel clock rate: %d\n", ret); + return; + } + + ret = clk_prepare_enable(hdmi->pixel_clock); + if (ret) { + DRM_ERROR("Failed to turn on pixel clock: %d\n", ret); + return; + } + + ret = clk_prepare_enable(hdmi->hsm_clock); + if (ret) { + DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n", + ret); + clk_disable_unprepare(hdmi->pixel_clock); + return; + } + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); + udelay(1); + HD_WRITE(VC4_HD_M_CTL, 0); + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE); + + HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, + VC4_HDMI_SW_RESET_HDMI | + VC4_HDMI_SW_RESET_FORMAT_DETECT); + + HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0); + + /* PHY should be in reset, like + * vc4_hdmi_encoder_disable() does. + */ + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); + + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0); if (debug_dump_regs) { DRM_INFO("HDMI regs before:\n"); @@ -483,9 +567,6 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, HD_WRITE(VC4_HD_VID_CTL, 0); - clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000 * - ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1)); - HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL, HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) | VC4_HDMI_SCHEDULER_CONTROL_MANUAL_FORMAT | @@ -559,28 +640,6 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, DRM_INFO("HDMI regs after:\n"); vc4_hdmi_dump_regs(dev); } -} - -static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct vc4_dev *vc4 = to_vc4_dev(dev); - - HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0); - - HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); - HD_WRITE(VC4_HD_VID_CTL, - HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); -} - -static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) -{ - struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); - struct drm_device *dev = encoder->dev; - struct vc4_dev *vc4 = to_vc4_dev(dev); - int ret; - - HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0); HD_WRITE(VC4_HD_VID_CTL, HD_READ(VC4_HD_VID_CTL) | @@ -646,7 +705,6 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) } static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = { - .mode_set = vc4_hdmi_encoder_mode_set, .disable = vc4_hdmi_encoder_disable, .enable = vc4_hdmi_encoder_enable, }; @@ -1147,33 +1205,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) return -EPROBE_DEFER; } - /* Enable the clocks at startup. We can't quite recover from - * turning off the pixel clock during disable/enables yet, so - * it's always running. - */ - ret = clk_prepare_enable(hdmi->pixel_clock); - if (ret) { - DRM_ERROR("Failed to turn on pixel clock: %d\n", ret); - goto err_put_i2c; - } - - /* This is the rate that is set by the firmware. The number - * needs to be a bit higher than the pixel clock rate - * (generally 148.5Mhz). - */ - ret = clk_set_rate(hdmi->hsm_clock, 163682864); - if (ret) { - DRM_ERROR("Failed to set HSM clock rate: %d\n", ret); - goto err_unprepare_pix; - } - - ret = clk_prepare_enable(hdmi->hsm_clock); - if (ret) { - DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n", - ret); - goto err_unprepare_pix; - } - /* Only use the GPIO HPD pin if present in the DT, otherwise * we'll use the HDMI core's register. */ @@ -1185,7 +1216,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) &hpd_gpio_flags); if (hdmi->hpd_gpio < 0) { ret = hdmi->hpd_gpio; - goto err_unprepare_hsm; + goto err_put_i2c; } hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW; @@ -1193,25 +1224,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4->hdmi = hdmi; - /* HDMI core must be enabled. */ - if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) { - HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); - udelay(1); - HD_WRITE(VC4_HD_M_CTL, 0); - - HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE); - - HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, - VC4_HDMI_SW_RESET_HDMI | - VC4_HDMI_SW_RESET_FORMAT_DETECT); - - HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0); - - /* PHY should be in reset, like - * vc4_hdmi_encoder_disable() does. - */ - HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); - } + pm_runtime_enable(dev); drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); @@ -1231,10 +1244,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) err_destroy_encoder: vc4_hdmi_encoder_destroy(hdmi->encoder); -err_unprepare_hsm: - clk_disable_unprepare(hdmi->hsm_clock); -err_unprepare_pix: - clk_disable_unprepare(hdmi->pixel_clock); + pm_runtime_disable(dev); err_put_i2c: put_device(&hdmi->ddc->dev); @@ -1253,8 +1263,8 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, vc4_hdmi_connector_destroy(hdmi->connector); vc4_hdmi_encoder_destroy(hdmi->encoder); - clk_disable_unprepare(hdmi->pixel_clock); - clk_disable_unprepare(hdmi->hsm_clock); + pm_runtime_disable(dev); + put_device(&hdmi->ddc->dev); vc4->hdmi = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index fd421ba3c5d7..2b62fc5b8d85 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -22,7 +22,7 @@ * each CRTC. */ -#include "linux/component.h" +#include <linux/component.h> #include "vc4_drv.h" #include "vc4_regs.h" diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index cdc6e6760705..7d7af3a93d94 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -59,50 +59,45 @@ vc4_overflow_mem_work(struct work_struct *work) { struct vc4_dev *vc4 = container_of(work, struct vc4_dev, overflow_mem_work); - struct drm_device *dev = vc4->dev; - struct vc4_bo *bo; + struct vc4_bo *bo = vc4->bin_bo; + int bin_bo_slot; + struct vc4_exec_info *exec; + unsigned long irqflags; - bo = vc4_bo_create(dev, 256 * 1024, true); - if (IS_ERR(bo)) { + bin_bo_slot = vc4_v3d_get_bin_slot(vc4); + if (bin_bo_slot < 0) { DRM_ERROR("Couldn't allocate binner overflow mem\n"); return; } - /* If there's a job executing currently, then our previous - * overflow allocation is getting used in that job and we need - * to queue it to be released when the job is done. But if no - * job is executing at all, then we can free the old overflow - * object direcctly. - * - * No lock necessary for this pointer since we're the only - * ones that update the pointer, and our workqueue won't - * reenter. - */ - if (vc4->overflow_mem) { - struct vc4_exec_info *current_exec; - unsigned long irqflags; - - spin_lock_irqsave(&vc4->job_lock, irqflags); - current_exec = vc4_first_bin_job(vc4); - if (!current_exec) - current_exec = vc4_last_render_job(vc4); - if (current_exec) { - vc4->overflow_mem->seqno = current_exec->seqno; - list_add_tail(&vc4->overflow_mem->unref_head, - ¤t_exec->unref_list); - vc4->overflow_mem = NULL; + spin_lock_irqsave(&vc4->job_lock, irqflags); + + if (vc4->bin_alloc_overflow) { + /* If we had overflow memory allocated previously, + * then that chunk will free when the current bin job + * is done. If we don't have a bin job running, then + * the chunk will be done whenever the list of render + * jobs has drained. + */ + exec = vc4_first_bin_job(vc4); + if (!exec) + exec = vc4_last_render_job(vc4); + if (exec) { + exec->bin_slots |= vc4->bin_alloc_overflow; + } else { + /* There's nothing queued in the hardware, so + * the old slot is free immediately. + */ + vc4->bin_alloc_used &= ~vc4->bin_alloc_overflow; } - spin_unlock_irqrestore(&vc4->job_lock, irqflags); } + vc4->bin_alloc_overflow = BIT(bin_bo_slot); - if (vc4->overflow_mem) - drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); - vc4->overflow_mem = bo; - - V3D_WRITE(V3D_BPOA, bo->base.paddr); + V3D_WRITE(V3D_BPOA, bo->base.paddr + bin_bo_slot * vc4->bin_alloc_size); V3D_WRITE(V3D_BPOS, bo->base.base.size); V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM); V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM); + spin_unlock_irqrestore(&vc4->job_lock, irqflags); } static void @@ -142,6 +137,10 @@ vc4_irq_finish_render_job(struct drm_device *dev) vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); + if (exec->fence) { + dma_fence_signal_locked(exec->fence); + exec->fence = NULL; + } vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index ad7925a9e0ea..bc6ecdc6f104 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -14,12 +14,12 @@ * crtc, HDMI encoder). */ -#include "drm_crtc.h" -#include "drm_atomic.h" -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_plane_helper.h" -#include "drm_fb_cma_helper.h" +#include <drm/drm_crtc.h> +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_fb_cma_helper.h> #include "vc4_drv.h" static void vc4_output_poll_changed(struct drm_device *dev) @@ -42,6 +42,10 @@ vc4_atomic_complete_commit(struct vc4_commit *c) struct drm_device *dev = state->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + drm_atomic_helper_wait_for_fences(dev, state, false); + + drm_atomic_helper_wait_for_dependencies(state); + drm_atomic_helper_commit_modeset_disables(dev, state); drm_atomic_helper_commit_planes(dev, state, 0); @@ -57,10 +61,14 @@ vc4_atomic_complete_commit(struct vc4_commit *c) */ state->legacy_cursor_update = false; + drm_atomic_helper_commit_hw_done(state); + drm_atomic_helper_wait_for_vblanks(dev, state); drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_commit_cleanup_done(state); + drm_atomic_state_put(state); up(&vc4->async_modeset); @@ -117,32 +125,10 @@ static int vc4_atomic_commit(struct drm_device *dev, if (!c) return -ENOMEM; - /* Make sure that any outstanding modesets have finished. */ - if (nonblock) { - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned long flags; - bool busy = false; - - /* - * If there's an undispatched event to send then we're - * obviously still busy. If there isn't, then we can - * unconditionally wait for the semaphore because it - * shouldn't be contended (for long). - * - * This is to prevent a race where queuing a new flip - * from userspace immediately on receipt of an event - * beats our clean-up and returns EBUSY. - */ - spin_lock_irqsave(&dev->event_lock, flags); - for_each_crtc_in_state(state, crtc, crtc_state, i) - busy |= vc4_event_pending(crtc); - spin_unlock_irqrestore(&dev->event_lock, flags); - if (busy) { - kfree(c); - return -EBUSY; - } - } + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (ret) + return ret; + ret = down_interruptible(&vc4->async_modeset); if (ret) { kfree(c); @@ -202,11 +188,50 @@ static int vc4_atomic_commit(struct drm_device *dev, return 0; } +static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_mode_fb_cmd2 mode_cmd_local; + + /* If the user didn't specify a modifier, use the + * vc4_set_tiling_ioctl() state for the BO. + */ + if (!(mode_cmd->flags & DRM_MODE_FB_MODIFIERS)) { + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + + gem_obj = drm_gem_object_lookup(file_priv, + mode_cmd->handles[0]); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", + mode_cmd->handles[0]); + return ERR_PTR(-ENOENT); + } + bo = to_vc4_bo(gem_obj); + + mode_cmd_local = *mode_cmd; + + if (bo->t_format) { + mode_cmd_local.modifier[0] = + DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + } else { + mode_cmd_local.modifier[0] = DRM_FORMAT_MOD_NONE; + } + + drm_gem_object_unreference_unlocked(gem_obj); + + mode_cmd = &mode_cmd_local; + } + + return drm_fb_cma_create(dev, file_priv, mode_cmd); +} + static const struct drm_mode_config_funcs vc4_mode_funcs = { .output_poll_changed = vc4_output_poll_changed, .atomic_check = drm_atomic_helper_check, .atomic_commit = vc4_atomic_commit, - .fb_create = drm_fb_cma_create, + .fb_create = vc4_fb_create, }; int vc4_kms_load(struct drm_device *dev) @@ -230,10 +255,12 @@ int vc4_kms_load(struct drm_device *dev) drm_mode_config_reset(dev); - vc4->fbdev = drm_fbdev_cma_init(dev, 32, - dev->mode_config.num_connector); - if (IS_ERR(vc4->fbdev)) - vc4->fbdev = NULL; + if (dev->mode_config.num_connector) { + vc4->fbdev = drm_fbdev_cma_init(dev, 32, + dev->mode_config.num_connector); + if (IS_ERR(vc4->fbdev)) + vc4->fbdev = NULL; + } drm_kms_helper_poll_init(dev); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index d34cd5393a9b..fa6809d8b0fe 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -18,12 +18,13 @@ * into the region of the HVS that it has allocated for us. */ +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_plane_helper.h> + #include "vc4_drv.h" #include "vc4_regs.h" -#include "drm_atomic.h" -#include "drm_atomic_helper.h" -#include "drm_fb_cma_helper.h" -#include "drm_plane_helper.h" enum vc4_scaling_mode { VC4_SCALING_NONE, @@ -499,8 +500,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); int num_planes = drm_format_num_planes(format->drm); - u32 scl0, scl1; - u32 lbm_size; + u32 scl0, scl1, pitch0; + u32 lbm_size, tiling; unsigned long irqflags; int ret, i; @@ -541,11 +542,31 @@ static int vc4_plane_mode_set(struct drm_plane *plane, scl1 = vc4_get_scl_field(state, 0); } + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + tiling = SCALER_CTL0_TILING_LINEAR; + pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); + break; + case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: + tiling = SCALER_CTL0_TILING_256B_OR_T; + + pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET), + VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L), + VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5, + SCALER_PITCH0_TILE_WIDTH_R)); + break; + default: + DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", + (long long)fb->modifier); + return -EINVAL; + } + /* Control word */ vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | + VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); @@ -599,8 +620,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane, for (i = 0; i < num_planes; i++) vc4_dlist_write(vc4_state, 0xc0c0c0c0); - /* Pitch word 0/1/2 */ - for (i = 0; i < num_planes; i++) { + /* Pitch word 0 */ + vc4_dlist_write(vc4_state, pitch0); + + /* Pitch word 1/2 */ + for (i = 1; i < num_planes; i++) { vc4_dlist_write(vc4_state, VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); } diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 932093936178..d382c34c1b9e 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -709,6 +709,13 @@ enum hvs_pixel_format { #define SCALER_CTL0_SIZE_MASK VC4_MASK(29, 24) #define SCALER_CTL0_SIZE_SHIFT 24 +#define SCALER_CTL0_TILING_MASK VC4_MASK(21, 20) +#define SCALER_CTL0_TILING_SHIFT 20 +#define SCALER_CTL0_TILING_LINEAR 0 +#define SCALER_CTL0_TILING_64B 1 +#define SCALER_CTL0_TILING_128B 2 +#define SCALER_CTL0_TILING_256B_OR_T 3 + #define SCALER_CTL0_HFLIP BIT(16) #define SCALER_CTL0_VFLIP BIT(15) @@ -838,7 +845,19 @@ enum hvs_pixel_format { #define SCALER_PPF_KERNEL_OFFSET_SHIFT 0 #define SCALER_PPF_KERNEL_UNCACHED BIT(31) +/* PITCH0/1/2 fields for raster. */ #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0) #define SCALER_SRC_PITCH_SHIFT 0 +/* PITCH0 fields for T-tiled. */ +#define SCALER_PITCH0_TILE_WIDTH_L_MASK VC4_MASK(22, 16) +#define SCALER_PITCH0_TILE_WIDTH_L_SHIFT 16 +#define SCALER_PITCH0_TILE_LINE_DIR BIT(15) +#define SCALER_PITCH0_TILE_INITIAL_LINE_DIR BIT(14) +/* Y offset within a tile. */ +#define SCALER_PITCH0_TILE_Y_OFFSET_MASK VC4_MASK(13, 7) +#define SCALER_PITCH0_TILE_Y_OFFSET_SHIFT 7 +#define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0) +#define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0 + #endif /* VC4_REGS_H */ diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 4339471f517f..5dc19429d4ae 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -182,8 +182,7 @@ static void emit_tile(struct vc4_exec_info *exec, if (has_bin) { rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); - rcl_u32(setup, (exec->tile_bo->paddr + - exec->tile_alloc_offset + + rcl_u32(setup, (exec->tile_alloc_offset + (y * exec->bin_tiles_x + x) * 32)); } diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0b..8c723da71f66 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -16,8 +16,9 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "linux/component.h" -#include "linux/pm_runtime.h" +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/pm_runtime.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -156,6 +157,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev) V3D_WRITE(V3D_VPMBASE, 0); } +int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) +{ + struct drm_device *dev = vc4->dev; + unsigned long irqflags; + int slot; + uint64_t seqno = 0; + struct vc4_exec_info *exec; + +try_again: + spin_lock_irqsave(&vc4->job_lock, irqflags); + slot = ffs(~vc4->bin_alloc_used); + if (slot != 0) { + /* Switch from ffs() bit index to a 0-based index. */ + slot--; + vc4->bin_alloc_used |= BIT(slot); + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return slot; + } + + /* Couldn't find an open slot. Wait for render to complete + * and try again. + */ + exec = vc4_last_render_job(vc4); + if (exec) + seqno = exec->seqno; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + if (seqno) { + int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true); + + if (ret == 0) + goto try_again; + + return ret; + } + + return -ENOMEM; +} + +/** + * vc4_allocate_bin_bo() - allocates the memory that will be used for + * tile binning. + * + * The binner has a limitation that the addresses in the tile state + * buffer that point into the tile alloc buffer or binner overflow + * memory only have 28 bits (256MB), and the top 4 on the bus for + * tile alloc references end up coming from the tile state buffer's + * address. + * + * To work around this, we allocate a single large buffer while V3D is + * in use, make sure that it has the top 4 bits constant across its + * entire extent, and then put the tile state, tile alloc, and binner + * overflow memory inside that buffer. + * + * This creates a limitation where we may not be able to execute a job + * if it doesn't fit within the buffer that we allocated up front. + * However, it turns out that 16MB is "enough for anybody", and + * real-world applications run into allocation failures from the + * overall CMA pool before they make scenes complicated enough to run + * out of bin space. + */ +int +vc4_allocate_bin_bo(struct drm_device *drm) +{ + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_v3d *v3d = vc4->v3d; + uint32_t size = 16 * 1024 * 1024; + int ret = 0; + struct list_head list; + + /* We may need to try allocating more than once to get a BO + * that doesn't cross 256MB. Track the ones we've allocated + * that failed so far, so that we can free them when we've got + * one that succeeded (if we freed them right away, our next + * allocation would probably be the same chunk of memory). + */ + INIT_LIST_HEAD(&list); + + while (true) { + struct vc4_bo *bo = vc4_bo_create(drm, size, true); + + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + + dev_err(&v3d->pdev->dev, + "Failed to allocate memory for tile binning: " + "%d. You may need to enable CMA or give it " + "more memory.", + ret); + break; + } + + /* Check if this BO won't trigger the addressing bug. */ + if ((bo->base.paddr & 0xf0000000) == + ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) { + vc4->bin_bo = bo; + + /* Set up for allocating 512KB chunks of + * binner memory. The biggest allocation we + * need to do is for the initial tile alloc + + * tile state buffer. We can render to a + * maximum of ((2048*2048) / (32*32) = 4096 + * tiles in a frame (until we do floating + * point rendering, at which point it would be + * 8192). Tile state is 48b/tile (rounded to + * a page), and tile alloc is 32b/tile + * (rounded to a page), plus a page of extra, + * for a total of 320kb for our worst-case. + * We choose 512kb so that it divides evenly + * into our 16MB, and the rest of the 512kb + * will be used as storage for the overflow + * from the initial 32b CL per bin. + */ + vc4->bin_alloc_size = 512 * 1024; + vc4->bin_alloc_used = 0; + vc4->bin_alloc_overflow = 0; + WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 != + bo->base.base.size / vc4->bin_alloc_size); + + break; + } + + /* Put it on the list to free later, and try again. */ + list_add(&bo->unref_head, &list); + } + + /* Free all the BOs we allocated but didn't choose. */ + while (!list_empty(&list)) { + struct vc4_bo *bo = list_last_entry(&list, + struct vc4_bo, unref_head); + + list_del(&bo->unref_head); + drm_gem_object_put_unlocked(&bo->base.base); + } + + return ret; +} + #ifdef CONFIG_PM static int vc4_v3d_runtime_suspend(struct device *dev) { @@ -164,6 +303,11 @@ static int vc4_v3d_runtime_suspend(struct device *dev) vc4_irq_uninstall(vc4->dev); + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); + vc4->bin_bo = NULL; + + clk_disable_unprepare(v3d->clk); + return 0; } @@ -171,6 +315,15 @@ static int vc4_v3d_runtime_resume(struct device *dev) { struct vc4_v3d *v3d = dev_get_drvdata(dev); struct vc4_dev *vc4 = v3d->vc4; + int ret; + + ret = vc4_allocate_bin_bo(vc4->dev); + if (ret) + return ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; vc4_v3d_init_hw(vc4->dev); vc4_irq_postinstall(vc4->dev); @@ -202,12 +355,38 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) vc4->v3d = v3d; v3d->vc4 = vc4; + v3d->clk = devm_clk_get(dev, NULL); + if (IS_ERR(v3d->clk)) { + int ret = PTR_ERR(v3d->clk); + + if (ret == -ENOENT) { + /* bcm2835 didn't have a clock reference in the DT. */ + ret = 0; + v3d->clk = NULL; + } else { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get V3D clock: %d\n", + ret); + return ret; + } + } + if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); return -EINVAL; } + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; + + ret = vc4_allocate_bin_bo(drm); + if (ret) { + clk_disable_unprepare(v3d->clk); + return ret; + } + /* Reset the binner overflow address/size at setup, to be sure * we don't reuse an old one. */ @@ -222,6 +401,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) return ret; } + pm_runtime_set_active(dev); pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ pm_runtime_enable(dev); @@ -271,6 +451,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev) static const struct of_device_id vc4_v3d_dt_match[] = { { .compatible = "brcm,bcm2835-v3d" }, + { .compatible = "brcm,cygnus-v3d" }, { .compatible = "brcm,vc4-v3d" }, {} }; diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index da6f1e138e8d..814b512c6b9a 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -172,7 +172,8 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, * our math. */ if (width > 4096 || height > 4096) { - DRM_ERROR("Surface dimesions (%d,%d) too large", width, height); + DRM_ERROR("Surface dimensions (%d,%d) too large", + width, height); return false; } @@ -348,10 +349,11 @@ static int validate_tile_binning_config(VALIDATE_ARGS) { struct drm_device *dev = exec->exec_bo->base.dev; - struct vc4_bo *tile_bo; + struct vc4_dev *vc4 = to_vc4_dev(dev); uint8_t flags; - uint32_t tile_state_size, tile_alloc_size; - uint32_t tile_count; + uint32_t tile_state_size; + uint32_t tile_count, bin_addr; + int bin_slot; if (exec->found_tile_binning_mode_config_packet) { DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); @@ -377,13 +379,28 @@ validate_tile_binning_config(VALIDATE_ARGS) return -EINVAL; } + bin_slot = vc4_v3d_get_bin_slot(vc4); + if (bin_slot < 0) { + if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) { + DRM_ERROR("Failed to allocate binner memory: %d\n", + bin_slot); + } + return bin_slot; + } + + /* The slot we allocated will only be used by this job, and is + * free when the job completes rendering. + */ + exec->bin_slots |= BIT(bin_slot); + bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size; + /* The tile state data array is 48 bytes per tile, and we put it at * the start of a BO containing both it and the tile alloc. */ tile_state_size = 48 * tile_count; /* Since the tile alloc array will follow us, align. */ - exec->tile_alloc_offset = roundup(tile_state_size, 4096); + exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096); *(uint8_t *)(validated + 14) = ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | @@ -394,35 +411,13 @@ validate_tile_binning_config(VALIDATE_ARGS) VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); - /* Initial block size. */ - tile_alloc_size = 32 * tile_count; - - /* - * The initial allocation gets rounded to the next 256 bytes before - * the hardware starts fulfilling further allocations. - */ - tile_alloc_size = roundup(tile_alloc_size, 256); - - /* Add space for the extra allocations. This is what gets used first, - * before overflow memory. It must have at least 4096 bytes, but we - * want to avoid overflow memory usage if possible. - */ - tile_alloc_size += 1024 * 1024; - - tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size, - true); - exec->tile_bo = &tile_bo->base; - if (IS_ERR(exec->tile_bo)) - return PTR_ERR(exec->tile_bo); - list_add_tail(&tile_bo->unref_head, &exec->unref_list); - /* tile alloc address. */ - *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + - exec->tile_alloc_offset); + *(uint32_t *)(validated + 0) = exec->tile_alloc_offset; /* tile alloc size. */ - *(uint32_t *)(validated + 4) = tile_alloc_size; + *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size - + exec->tile_alloc_offset); /* tile state address. */ - *(uint32_t *)(validated + 8) = exec->tile_bo->paddr; + *(uint32_t *)(validated + 8) = bin_addr; return 0; } |