diff options
Diffstat (limited to 'drivers/gpu/drm')
739 files changed, 25379 insertions, 13100 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 9bb456b49b59..b1f22e457fd0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -8,6 +8,7 @@ menuconfig DRM tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)" depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && HAS_DMA + select DRM_NOMODESET select DRM_PANEL_ORIENTATION_QUIRKS select HDMI select FB_CMDLINE @@ -216,13 +217,6 @@ config DRM_GEM_CMA_HELPER help Choose this if you need the GEM CMA helper functions -config DRM_KMS_CMA_HELPER - bool - depends on DRM - select DRM_GEM_CMA_HELPER - help - Choose this if you need the KMS CMA helper functions - config DRM_GEM_SHMEM_HELPER tristate depends on DRM && MMU @@ -394,6 +388,8 @@ source "drivers/gpu/drm/xlnx/Kconfig" source "drivers/gpu/drm/gud/Kconfig" +source "drivers/gpu/drm/sprd/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" depends on DRM && PCI && MMU && HYPERV @@ -492,6 +488,11 @@ config DRM_EXPORT_FOR_TESTS config DRM_PANEL_ORIENTATION_QUIRKS tristate +# Separate option because nomodeset parameter is global and expected built-in +config DRM_NOMODESET + bool + default n + config DRM_LIB_RANDOM bool default n diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 1c41156deb5f..301a44dc18e3 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -6,7 +6,7 @@ drm-y := drm_aperture.o drm_auth.o drm_cache.o \ drm_file.o drm_gem.o drm_ioctl.o \ drm_drv.o \ - drm_sysfs.o drm_hashtab.o drm_mm.o \ + drm_sysfs.o drm_mm.o \ drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o drm_displayid.o \ drm_trace_points.o drm_prime.o \ drm_vma_manager.o \ @@ -20,8 +20,8 @@ drm-y := drm_aperture.o drm_auth.o drm_cache.o \ drm_managed.o drm_vblank_work.o drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o drm_dma.o \ - drm_irq.o drm_legacy_misc.o drm_lock.o drm_memory.o \ - drm_scatter.o drm_vm.o + drm_hashtab.o drm_irq.o drm_legacy_misc.o drm_lock.o \ + drm_memory.o drm_scatter.o drm_vm.o drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o drm-$(CONFIG_COMPAT) += drm_ioc32.o drm-$(CONFIG_DRM_PANEL) += drm_panel.o @@ -33,7 +33,10 @@ drm-$(CONFIG_DRM_PRIVACY_SCREEN) += drm_privacy_screen.o drm_privacy_screen_x86. obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o +obj-$(CONFIG_DRM_NOMODESET) += drm_nomodeset.o + drm_cma_helper-y := drm_gem_cma_helper.o +drm_cma_helper-$(CONFIG_DRM_KMS_HELPER) += drm_fb_cma_helper.o obj-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_cma_helper.o drm_shmem_helper-y := drm_gem_shmem_helper.o @@ -58,7 +61,6 @@ drm_kms_helper-y := drm_bridge_connector.o drm_crtc_helper.o drm_dp_helper.o \ drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o -drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o drm_kms_helper-$(CONFIG_DRM_DP_CEC) += drm_dp_cec.o @@ -132,3 +134,4 @@ obj-$(CONFIG_DRM_TIDSS) += tidss/ obj-y += xlnx/ obj-y += gud/ obj-$(CONFIG_DRM_HYPERV) += hyperv/ +obj-$(CONFIG_DRM_SPRD) += sprd/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 653726588956..7fedbb725e17 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -45,7 +45,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ - amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ + amdgpu_gem.o amdgpu_ring.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9f017663ac50..9eb7afa276f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1095,7 +1095,9 @@ struct amdgpu_device { pci_channel_state_t pci_channel_state; struct amdgpu_reset_control *reset_cntl; - uint32_t ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE]; + uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; + + bool ram_is_direct_mapped; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1316,6 +1318,8 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); +void amdgpu_device_halt(struct amdgpu_device *adev); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); @@ -1359,8 +1363,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); -long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg); int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 7077f21f0021..46cf48b3904a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -72,7 +72,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) if (!kfd_initialized) return; - adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, vf); + adev->kfd.dev = kgd2kfd_probe(adev, vf); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -233,19 +233,16 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) return r; } -void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) +void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (amdgpu_device_should_recover_gpu(adev)) amdgpu_device_gpu_recover(adev, NULL); } -int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, +int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool cp_mqd_gfx9) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; struct amdgpu_bo_param bp; int r; @@ -314,7 +311,7 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) { struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; @@ -325,10 +322,9 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } -int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, +int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; struct amdgpu_bo_user *ubo; struct amdgpu_bo_param bp; @@ -355,18 +351,16 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, return 0; } -void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj) { struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; amdgpu_bo_unref(&bo); } -uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, +uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, enum kgd_engine_type type) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - switch (type) { case KGD_ENGINE_PFP: return adev->gfx.pfp_fw_version; @@ -399,11 +393,9 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, return 0; } -void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, +void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, struct kfd_local_mem_info *mem_info) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - memset(mem_info, 0, sizeof(*mem_info)); mem_info->local_mem_size_public = adev->gmc.visible_vram_size; @@ -428,19 +420,15 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, mem_info->mem_clk_max = 100; } -uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->gfx.funcs->get_gpu_clock_counter) return adev->gfx.funcs->get_gpu_clock_counter(adev); return 0; } -uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - /* the sclk is in quantas of 10kHz */ if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; @@ -450,9 +438,8 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) return 100; } -void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_cu_info acu_info = adev->gfx.cu_info; memset(cu_info, 0, sizeof(*cu_info)); @@ -473,13 +460,12 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->lds_size = acu_info.lds_size; } -int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - struct kgd_dev **dma_buf_kgd, +int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, + struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct dma_buf *dma_buf; struct drm_gem_object *obj; struct amdgpu_bo *bo; @@ -507,8 +493,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, goto out_put; r = 0; - if (dma_buf_kgd) - *dma_buf_kgd = (struct kgd_dev *)adev; + if (dmabuf_adev) + *dmabuf_adev = adev; if (bo_size) *bo_size = amdgpu_bo_size(bo); if (metadata_buffer) @@ -528,32 +514,18 @@ out_put: return r; } -uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) +uint64_t amdgpu_amdkfd_get_vram_usage(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return amdgpu_vram_mgr_usage(vram_man); } -uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->gmc.xgmi.hive_id; -} - -uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->unique_id; -} - -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, + struct amdgpu_device *src) { - struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; - struct amdgpu_device *adev = (struct amdgpu_device *)dst; + struct amdgpu_device *peer_adev = src; + struct amdgpu_device *adev = dst; int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); if (ret < 0) { @@ -565,16 +537,18 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s return (uint8_t)ret; } -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min) +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, + struct amdgpu_device *src, + bool is_min) { - struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev; + struct amdgpu_device *adev = dst, *peer_adev; int num_links; if (adev->asic_type != CHIP_ALDEBARAN) return 0; if (src) - peer_adev = (struct amdgpu_device *)src; + peer_adev = src; /* num links returns 0 for indirect peers since indirect route is unknown. */ num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); @@ -589,9 +563,8 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev return (num_links * 16 * 25000)/BITS_PER_BYTE; } -int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min) { - struct amdgpu_device *adev = (struct amdgpu_device *)dev; int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : fls(adev->pm.pcie_mlw_mask)) - 1; int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & @@ -647,39 +620,11 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; } -uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->rmmio_remap.bus_addr; -} - -uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->gds.gws_size; -} - -uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->rev_id; -} - -int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - return adev->gmc.noretry; -} - -int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, +int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, + enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_job *job; struct amdgpu_ib *ib; struct amdgpu_ring *ring; @@ -730,10 +675,8 @@ err: return ret; } -void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) +void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); @@ -747,10 +690,9 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, + uint16_t vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->family == AMDGPU_FAMILY_AI) { int i; @@ -763,10 +705,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type) +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; bool all_hub = false; if (adev->family == AMDGPU_FAMILY_AI) @@ -775,21 +716,18 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); } -bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) +bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - return adev->have_atomics_support; } -void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd) +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct ras_err_data err_data = {0, 0, 0, NULL}; /* CPU MCA will handle page retirement if connected_to_cpu is 1 */ if (!adev->gmc.xgmi.connected_to_cpu) amdgpu_umc_process_ras_data_cb(adev, &err_data, NULL); else - amdgpu_amdkfd_gpu_reset(kgd); + amdgpu_amdkfd_gpu_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a15a4787c7ee..fcbc8a9c9e06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -144,14 +144,16 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); -int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, +int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, + enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); -void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); -bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type); +void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle); +bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, + uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -159,7 +161,7 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); -void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); +void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev); int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); @@ -198,37 +200,36 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) } #endif /* Shared API */ -int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, +int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); -void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, + void **mem_obj); +void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); -uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, +uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, enum kgd_engine_type type); -void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, +void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, struct kfd_local_mem_info *mem_info); -uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); -uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); -void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); -int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, - struct kgd_dev **dmabuf_kgd, +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); +void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, + struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, + struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags); -uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); -uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); -uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd); -uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); -uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); -uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); -int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); -int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); +uint64_t amdgpu_amdkfd_get_vram_usage(struct amdgpu_device *adev); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, + struct amdgpu_device *src); +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, + struct amdgpu_device *src, + bool is_min); +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -258,45 +259,54 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); (&((struct amdgpu_fpriv *) \ ((struct drm_file *)(drm_priv))->driver_priv)->vm) -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct file *filp, u32 pasid, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv); +void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, + void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - struct kgd_dev *kgd, uint64_t va, uint64_t size, + struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed); + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, + bool *table_freed); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( - struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, struct kgd_mem *mem, void **kptr, uint64_t *size); -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem); +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, + struct kgd_mem *mem); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); -int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, struct dma_buf *dmabuf, uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); -int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); -void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd); +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); + +/** + * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released + * + * Allows KFD to release its resources associated with the GEM object. + */ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo); void amdgpu_amdkfd_reserve_system_mem(uint64_t size); #else @@ -324,7 +334,7 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, #if IS_ENABLED(CONFIG_HSA_AMD) int kgd2kfd_init(void); void kgd2kfd_exit(void); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf); +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); @@ -348,7 +358,7 @@ static inline void kgd2kfd_exit(void) } static inline -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 5a7f680bcb3f..abe93b3ff765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -57,11 +57,6 @@ (*dump)[i++][1] = RREG32(addr); \ } while (0) -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) { return (struct v9_sdma_mqd *)mqd; @@ -123,10 +118,9 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, return sdma_rlc_reg_offset; } -int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -193,11 +187,10 @@ int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, +int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -225,9 +218,9 @@ int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev, + void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -244,10 +237,9 @@ bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h index ce08131b7b5f..756c1a5679c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h @@ -20,11 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); -int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, +int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); -bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev, + void *mqd); +int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 960acf68150a..7b7f4b2764c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -39,37 +39,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -81,33 +70,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -150,22 +135,21 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -218,12 +202,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v10_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_compute_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; @@ -231,7 +214,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -296,16 +279,15 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; @@ -313,7 +295,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -349,16 +331,15 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -372,13 +353,13 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -386,10 +367,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -456,11 +436,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -488,15 +467,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -506,13 +485,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -529,12 +507,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; @@ -548,7 +525,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, int retry; #endif - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -633,20 +610,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -683,11 +659,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); @@ -696,12 +671,12 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +static int kgd_address_watch_disable(struct amdgpu_device *adev) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +static int kgd_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -710,11 +685,10 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -735,18 +709,16 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", vmid); @@ -757,12 +729,10 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } -static void program_trap_handler_settings(struct kgd_dev *kgd, +static void program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers @@ -781,7 +751,7 @@ static void program_trap_handler_settings(struct kgd_dev *kgd, WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } const struct kfd2kgd_calls gfx_v10_kfd2kgd = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index dac0d751d5af..1f37d3574001 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -38,37 +38,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -80,34 +69,30 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid, +static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } /* ATC is defeatured on Sienna_Cichlid */ -static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid, +static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; /* Mapping vmid to pasid also for IH block */ @@ -118,22 +103,21 @@ static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id) +static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -188,12 +172,11 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v10_sdma_mqd *)mqd; } -static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_compute_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; @@ -201,7 +184,7 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HIQ is set during driver init period with vmid set to 0*/ if (m->cp_hqd_vmid == 0) { @@ -281,16 +264,15 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, +static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v10_compute_mqd *m; uint32_t mec, pipe; @@ -298,7 +280,7 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -334,16 +316,15 @@ static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -static int hqd_dump_v10_3(struct kgd_dev *kgd, +static int hqd_dump_v10_3(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -357,13 +338,13 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -371,10 +352,9 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd, return 0; } -static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd, +static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -441,11 +421,10 @@ static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd, return 0; } -static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd, +static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -473,15 +452,15 @@ static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd, return 0; } -static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -491,13 +470,13 @@ static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd) +static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev, + void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -514,18 +493,17 @@ static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd) return false; } -static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, +static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; struct v10_compute_mqd *m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -555,20 +533,19 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, if (time_after(jiffies, end_jiffies)) { pr_err("cp queue pipe %d queue %d preemption failed\n", pipe_id, queue_id); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd, +static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -606,12 +583,12 @@ static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd, } -static int address_watch_disable_v10_3(struct kgd_dev *kgd) +static int address_watch_disable_v10_3(struct amdgpu_device *adev) { return 0; } -static int address_watch_execute_v10_3(struct kgd_dev *kgd, +static int address_watch_execute_v10_3(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -620,11 +597,10 @@ static int address_watch_execute_v10_3(struct kgd_dev *kgd, return 0; } -static int wave_control_execute_v10_3(struct kgd_dev *kgd, +static int wave_control_execute_v10_3(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -645,28 +621,24 @@ static int wave_control_execute_v10_3(struct kgd_dev *kgd, return 0; } -static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd, +static uint32_t address_watch_get_offset_v10_3(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* SDMA is on gfxhub as well for Navi1* series */ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } -static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, +static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers @@ -685,15 +657,14 @@ static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } #if 0 -uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, +uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev, uint32_t trap_debug_wave_launch_mode, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; uint32_t orig_wave_cntl_value; uint32_t orig_stall_vmid; @@ -720,10 +691,8 @@ uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, return 0; } -uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd) +uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->grbm_idx_mutex); WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); @@ -733,11 +702,10 @@ uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd) return 0; } -uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd, +uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev, uint32_t trap_override, uint32_t trap_mask) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -762,11 +730,10 @@ uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd, return 0; } -uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd, +uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev, uint8_t wave_launch_mode, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; bool is_stall_mode; bool is_mode_set; @@ -805,16 +772,14 @@ uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd, * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. * deq_retry_wait_time -- Wait Count for Global Wave Syncs. */ -void get_iq_wait_times_v10_3(struct kgd_dev *kgd, +void get_iq_wait_times_v10_3(struct amdgpu_device *adev, uint32_t *wait_times) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); } -void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd, +void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b91d27e39bad..36528dad7684 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -82,68 +82,54 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); mutex_lock(&adev->srbm_mutex); WREG32(mmSRBM_GFX_CNTL, value); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - WREG32(mmSRBM_GFX_CNTL, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_MEM_CONFIG, sh_mem_config); WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); WREG32(mmSH_MEM_BASES, sh_mem_bases); - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -165,21 +151,20 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, return 0; } -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -207,12 +192,11 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) return (struct cik_sdma_rlc_registers *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_mqd *m; uint32_t *mqd_hqd; uint32_t reg, wptr_val, data; @@ -220,7 +204,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -239,25 +223,24 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, * release srbm_mutex to avoid circular dependency between * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. */ - release_queue(kgd); + release_queue(adev); valid_wptr = read_user_wptr(mm, wptr, wptr_val); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (valid_wptr) WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32(mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS (35+4) #define DUMP_REG(addr) do { \ @@ -271,7 +254,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); @@ -281,7 +264,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -289,10 +272,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; unsigned long end_jiffies; uint32_t sdma_rlc_reg_offset; @@ -345,11 +327,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; uint32_t i = 0, reg; @@ -372,15 +353,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32(mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -390,13 +371,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32(mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -412,12 +392,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; enum hqd_dequeue_request_type type; unsigned long flags, end_jiffies; @@ -426,7 +405,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); switch (reset_type) { @@ -504,20 +483,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -551,9 +529,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +static int kgd_address_watch_disable(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); union TCP_WATCH_CNTL_BITS cntl; unsigned int i; @@ -571,13 +548,12 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd) return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +static int kgd_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, uint32_t addr_lo) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); union TCP_WATCH_CNTL_BITS cntl; cntl.u32All = cntl_val; @@ -602,11 +578,10 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data; mutex_lock(&adev->grbm_idx_mutex); @@ -627,18 +602,17 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; @@ -646,21 +620,17 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static void set_scratch_backing_va(struct kgd_dev *kgd, +static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); - unlock_srbm(kgd); + unlock_srbm(adev); } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID\n"); return; @@ -676,10 +646,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, * @vmid: vmid pointer * read vmid from register (CIK). */ -static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) +static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 5ce0ce704a21..52832cd69a93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -39,68 +39,54 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); mutex_lock(&adev->srbm_mutex); WREG32(mmSRBM_GFX_CNTL, value); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - WREG32(mmSRBM_GFX_CNTL, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_MEM_CONFIG, sh_mem_config); WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); WREG32(mmSH_MEM_BASES, sh_mem_bases); - unlock_srbm(kgd); + unlock_srbm(adev); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -123,21 +109,20 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, return 0; } -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -165,12 +150,11 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) return (struct vi_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_mqd *m; uint32_t *mqd_hqd; uint32_t reg, wptr_val, data; @@ -178,7 +162,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HIQ is set during driver init period with vmid set to 0*/ if (m->cp_hqd_vmid == 0) { @@ -206,7 +190,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, * on ASICs that do not support context-save. * EOP writes/reads can start anywhere in the ring. */ - if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { + if (adev->asic_type != CHIP_TONGA) { WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); @@ -226,25 +210,24 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, * release srbm_mutex to avoid circular dependency between * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. */ - release_queue(kgd); + release_queue(adev); valid_wptr = read_user_wptr(mm, wptr, wptr_val); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (valid_wptr) WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32(mmCP_HQD_ACTIVE, data); - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS (54+4) #define DUMP_REG(addr) do { \ @@ -258,7 +241,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); @@ -268,7 +251,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -276,10 +259,9 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; uint32_t sdma_rlc_reg_offset; @@ -331,11 +313,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + queue_id * KFD_VI_SDMA_QUEUE_OFFSET; uint32_t i = 0, reg; @@ -367,15 +348,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32(mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -385,13 +366,12 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32(mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -407,12 +387,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; enum hqd_dequeue_request_type type; unsigned long flags, end_jiffies; @@ -422,7 +401,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); @@ -502,20 +481,19 @@ loop: break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -549,11 +527,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; @@ -561,12 +538,12 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +static int kgd_address_watch_disable(struct amdgpu_device *adev) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +static int kgd_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -575,11 +552,10 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -600,28 +576,24 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, +static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); - unlock_srbm(kgd); + unlock_srbm(adev); } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void set_vm_context_page_table_base(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID\n"); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index bcc1cbeb8799..ddfe7aff919d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -46,37 +46,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -88,33 +77,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } -int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -171,22 +156,21 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, * but still works */ -int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -233,19 +217,18 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -308,16 +291,15 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); - release_queue(kgd); + release_queue(adev); return 0; } -int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v9_mqd *m; uint32_t mec, pipe; @@ -325,7 +307,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -361,16 +343,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -384,13 +365,13 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -398,10 +379,9 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -468,11 +448,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -500,15 +479,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); if (act) { low = lower_32_bits(queue_address >> 8); @@ -518,13 +497,12 @@ bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -541,12 +519,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; @@ -555,7 +532,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -584,20 +561,19 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -634,11 +610,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); @@ -647,12 +622,12 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev) { return 0; } -int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -661,11 +636,10 @@ int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, return 0; } -int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); @@ -686,18 +660,16 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, return 0; } -uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, +void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", vmid); @@ -804,7 +776,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu) { int qidx; @@ -818,10 +790,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, int pasid_tmp; int max_queue_cnt; int vmid_wave_cnt = 0; - struct amdgpu_device *adev; DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); - adev = get_amdgpu_device(kgd); lock_spi_csq_mutexes(adev); soc15_grbm_select(adev, 1, 0, 0, 0); @@ -882,12 +852,10 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, adev->gfx.cu_info.max_waves_per_simd; } -void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, +void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers @@ -905,7 +873,7 @@ void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } const struct kfd2kgd_calls gfx_v9_kfd2kgd = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index c63591106879..24be49df26fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -22,48 +22,49 @@ -void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); -int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid); -int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off); -int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); -bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, +bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); -int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev); +int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, uint32_t addr_lo); -int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); -uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset); -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, +void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu); -void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, +void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 71acd577803e..f9bab963a948 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -60,12 +60,6 @@ static const char * const domain_bit_to_string[] = { static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); - -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - static bool kfd_mem_is_attached(struct amdgpu_vm *avm, struct kgd_mem *mem) { @@ -126,8 +120,19 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size) PAGE_ALIGN(size); } +/** + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size + * of buffer including any reserved for control structures + * + * @adev: Device to which allocated BO belongs to + * @size: Size of buffer, in bytes, encapsulated by B0. This should be + * equivalent to amdgpu_bo_size(BO) + * @alloc_flag: Flag used in allocating a BO as noted above + * + * Return: returns -ENOMEM in case of error, ZERO otherwise + */ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain, bool sg) + uint64_t size, u32 alloc_flag) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); @@ -137,20 +142,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, acc_size = amdgpu_amdkfd_acc_size(size); vram_needed = 0; - if (domain == AMDGPU_GEM_DOMAIN_GTT) { - /* TTM GTT memory */ + if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size + size; - } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { - /* Userptr */ + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + system_mem_needed = acc_size; + ttm_mem_needed = acc_size; + vram_needed = size; + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; - } else { - /* VRAM and SG */ + } else if (alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - if (domain == AMDGPU_GEM_DOMAIN_VRAM) - vram_needed = size; + } else { + pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); + return -ENOMEM; } spin_lock(&kfd_mem_limit.mem_limit_lock); @@ -166,64 +175,72 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (adev->kfd.vram_used + vram_needed > adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; - } else { - kfd_mem_limit.system_mem_used += system_mem_needed; - kfd_mem_limit.ttm_mem_used += ttm_mem_needed; - adev->kfd.vram_used += vram_needed; + goto release; } + /* Update memory accounting by decreasing available system + * memory, TTM memory and GPU memory as computed above + */ + adev->kfd.vram_used += vram_needed; + kfd_mem_limit.system_mem_used += system_mem_needed; + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + +release: spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } static void unreserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain, bool sg) + uint64_t size, u32 alloc_flag) { size_t acc_size; acc_size = amdgpu_amdkfd_acc_size(size); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) { + + if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.ttm_mem_used -= acc_size; + adev->kfd.vram_used -= size; + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + } else if (alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { - adev->kfd.vram_used -= size; - WARN_ONCE(adev->kfd.vram_used < 0, - "kfd VRAM memory accounting unbalanced"); - } + } else { + pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); + goto release; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); + + WARN_ONCE(adev->kfd.vram_used < 0, + "KFD VRAM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, - "kfd TTM memory accounting unbalanced"); + "KFD TTM memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "KFD system memory accounting unbalanced"); +release: spin_unlock(&kfd_mem_limit.mem_limit_lock); } void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u32 domain = bo->preferred_domains; - bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); - - if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { - domain = AMDGPU_GEM_DOMAIN_CPU; - sg = false; - } + u32 alloc_flags = bo->kfd_bo->alloc_flags; + u64 size = amdgpu_bo_size(bo); - unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); + unreserve_mem_limit(adev, size, alloc_flags); kfree(bo->kfd_bo); } - /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -646,12 +663,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, if (IS_ERR(gobj)) return PTR_ERR(gobj); - /* Import takes an extra reference on the dmabuf. Drop it now to - * avoid leaking it. We only need the one reference in - * kgd_mem->dmabuf. - */ - dma_buf_put(mem->dmabuf); - *bo = gem_to_amdgpu_bo(gobj); (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE; (*bo)->parent = amdgpu_bo_ref(mem->bo); @@ -697,10 +708,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && - amdgpu_xgmi_same_hive(adev, bo_adev))) { - /* Mappings on the local GPU and VRAM mappings in the - * local hive share the original BO + if (adev == bo_adev || + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || + (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { + /* Mappings on the local GPU, or VRAM mappings in the + * local hive, or userptr mapping IOMMU direct map mode + * share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; bo[i] = mem->bo; @@ -1278,12 +1291,60 @@ create_evict_fence_fail: return ret; } -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, +/** + * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria + * @bo: Handle of buffer object being pinned + * @domain: Domain into which BO should be pinned + * + * - USERPTR BOs are UNPINNABLE and will return error + * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + * PIN count incremented. It is valid to PIN a BO multiple times + * + * Return: ZERO if successful in pinning, Non-Zero in case of error. + */ +static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) +{ + int ret = 0; + + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret)) + return ret; + + ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); + if (ret) + pr_err("Error in Pinning BO to domain: %d\n", domain); + + amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); + amdgpu_bo_unreserve(bo); + + return ret; +} + +/** + * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria + * @bo: Handle of buffer object being unpinned + * + * - Is a illegal request for USERPTR BOs and is ignored + * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + * PIN count decremented. Calls to UNPIN must balance calls to PIN + */ +static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) +{ + int ret = 0; + + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret)) + return; + + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); +} + +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct file *filp, u32 pasid, void **process_info, struct dma_fence **ef) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_fpriv *drv_priv; struct amdgpu_vm *avm; int ret; @@ -1359,12 +1420,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, + void *drm_priv) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm; - if (WARN_ON(!kgd || !drm_priv)) + if (WARN_ON(!adev || !drm_priv)) return; avm = drm_priv_to_vm(drm_priv); @@ -1392,17 +1453,16 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - struct kgd_dev *kgd, uint64_t va, uint64_t size, + struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; - struct drm_gem_object *gobj; + struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; u64 alloc_flags; int ret; @@ -1460,7 +1520,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags); if (ret) { pr_debug("Insufficient memory\n"); goto err_reserve_limit; @@ -1501,6 +1561,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ret = init_user_pages(*mem, user_addr); if (ret) goto allocate_init_user_pages_failed; + } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { + ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n"); + goto err_pin_bo; + } + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; } if (offset) @@ -1509,17 +1578,20 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return 0; allocate_init_user_pages_failed: +err_pin_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: - drm_gem_object_put(gobj); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_mem_limit(adev, size, alloc_domain, !!sg); + unreserve_mem_limit(adev, size, flags); err_reserve_limit: mutex_destroy(&(*mem)->lock); - kfree(*mem); + if (gobj) + drm_gem_object_put(gobj); + else + kfree(*mem); err: if (sg) { sg_free_table(sg); @@ -1529,7 +1601,7 @@ err: } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; @@ -1542,6 +1614,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( bool is_imported = false; mutex_lock(&mem->lock); + + /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ + if (mem->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo); + } + mapped_to_gpu_memory = mem->mapped_to_gpu_memory; is_imported = mem->is_imported; mutex_unlock(&mem->lock); @@ -1621,10 +1701,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, bool *table_freed) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; struct amdgpu_bo *bo; @@ -1751,7 +1830,7 @@ out: } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) + struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct amdkfd_process_info *process_info = avm->process_info; @@ -1812,7 +1891,7 @@ out: } int amdgpu_amdkfd_gpuvm_sync_memory( - struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) + struct amdgpu_device *adev, struct kgd_mem *mem, bool intr) { struct amdgpu_sync sync; int ret; @@ -1828,7 +1907,7 @@ int amdgpu_amdkfd_gpuvm_sync_memory( return ret; } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, struct kgd_mem *mem, void **kptr, uint64_t *size) { int ret; @@ -1884,7 +1963,8 @@ bo_reserve_failed: return ret; } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem) +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, + struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; @@ -1894,12 +1974,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kg amdgpu_bo_unreserve(bo); } -int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, - struct kfd_vm_fault_info *mem) +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, + struct kfd_vm_fault_info *mem) { - struct amdgpu_device *adev; - - adev = (struct amdgpu_device *)kgd; if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; mb(); @@ -1908,13 +1985,12 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, struct dma_buf *dma_buf, uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct drm_gem_object *obj; struct amdgpu_bo *bo; @@ -2541,11 +2617,9 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) } /* Returns GPU-specific tiling mode information */ -int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - config->gb_addr_config = adev->gfx.config.gb_addr_config; config->tile_config_ptr = adev->gfx.config.tile_mode_array; config->num_tile_configs = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 96b7bb13a2dd..12a6b1c99c93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1569,6 +1569,18 @@ void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, WREG32(adev->bios_scratch_reg_offset + 3, tmp); } +void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev, + u32 backlight_level) +{ + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 2); + + tmp &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK; + tmp |= (backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) & + ATOM_S2_CURRENT_BL_LEVEL_MASK; + + WREG32(adev->bios_scratch_reg_offset + 2, tmp); +} + bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev) { u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 8cc0222dba19..27e74b1fc260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -185,6 +185,8 @@ bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung); +void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev, + u32 backlight_level); bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev); void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 97178b307ed6..4d4ddf026faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -470,8 +470,8 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade /** * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS - * adev: amdgpu_device pointer - * i2c_address: pointer to u8; if not NULL, will contain + * @adev: amdgpu_device pointer + * @i2c_address: pointer to u8; if not NULL, will contain * the RAS EEPROM address if the function returns true * * Return true if VBIOS supports RAS EEPROM address reporting, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 7abe9500c0c6..d6d986be906a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include "amdgpu.h" #include "amd_acpi.h" #define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0) @@ -165,7 +166,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas } /** - * amdgpu_atpx_validate_functions - validate ATPX functions + * amdgpu_atpx_validate - validate ATPX functions * * @atpx: amdgpu atpx struct * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 0de66f59adb8..c16a2704ced6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -108,7 +108,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: if (amdgpu_connector->use_digital) { - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -116,7 +116,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) break; case DRM_MODE_CONNECTOR_DVID: case DRM_MODE_CONNECTOR_HDMIA: - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -125,7 +125,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) dig_connector = amdgpu_connector->con_priv; if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) || - drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + connector->display_info.is_hdmi) { if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -149,7 +149,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) break; } - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { /* * Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make * much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at @@ -315,8 +315,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector) if (!amdgpu_connector->edid) { /* some laptops provide a hardcoded edid in rom for LCDs */ if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) || - (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) + (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) { amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev); + drm_connector_update_edid_property(connector, amdgpu_connector->edid); + } } } @@ -326,6 +328,7 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; + drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) @@ -387,6 +390,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -401,6 +407,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } @@ -1171,7 +1180,7 @@ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) || (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) { return MODE_OK; - } else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + } else if (connector->display_info.is_hdmi) { /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; @@ -1463,7 +1472,7 @@ static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) { return amdgpu_atombios_dp_mode_valid_helper(connector, mode); } else { - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 164d6a9e9fbb..25e2e5bf90eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1618,6 +1618,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (!debugfs_initialized()) return 0; + debugfs_create_x32("amdgpu_smu_debug", 0600, root, + &adev->pm.smu_debug_mask); + ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); if (IS_ERR(ent)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 188accb71249..8219e40240ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/iommu.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> @@ -331,7 +332,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_vram_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperature * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -550,11 +551,11 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, trace_amdgpu_device_wreg(adev->pdev->device, reg, v); } -/* +/** * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range * * this function is invoked only the debugfs register access - * */ + */ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { @@ -1100,7 +1101,7 @@ static void amdgpu_device_wb_fini(struct amdgpu_device *adev) } /** - * amdgpu_device_wb_init- Init Writeback driver info and allocate memory + * amdgpu_device_wb_init - Init Writeback driver info and allocate memory * * @adev: amdgpu_device pointer * @@ -2657,6 +2658,36 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_device_smu_fini_early - smu hw_fini wrapper + * + * @adev: amdgpu_device pointer + * + * For ASICs need to disable SMC first + */ +static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) +{ + int i, r; + + if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) + return; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.hw) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); + /* XXX handle errors */ + if (r) { + DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } + adev->ip_blocks[i].status.hw = false; + break; + } + } +} + static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) { int i, r; @@ -2677,21 +2708,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - /* need to disable SMC first */ - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.hw) - continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); - /* XXX handle errors */ - if (r) { - DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - adev->ip_blocks[i].status.hw = false; - break; - } - } + /* Workaroud for ASICs need to disable SMC first */ + amdgpu_device_smu_fini_early(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.hw) @@ -2733,8 +2751,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) amdgpu_virt_release_ras_err_handler_data(adev); - amdgpu_ras_pre_fini(adev); - if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); @@ -3367,6 +3383,22 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) return ret; } +/** + * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU + * + * @adev: amdgpu_device pointer + * + * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode + */ +static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) + adev->ram_is_direct_mapped = true; +} + static const struct attribute *amdgpu_dev_attributes[] = { &dev_attr_product_name.attr, &dev_attr_product_number.attr, @@ -3687,8 +3719,6 @@ fence_driver_init: /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - amdgpu_fbdev_init(adev); - r = amdgpu_pm_sysfs_init(adev); if (r) { adev->pm_sysfs_en = false; @@ -3772,6 +3802,8 @@ fence_driver_init: queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); + amdgpu_device_check_iommu_direct_map(adev); + return 0; release_ras_con: @@ -3805,7 +3837,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) } /** - * amdgpu_device_fini - tear down the driver + * amdgpu_device_fini_hw - tear down the driver * * @adev: amdgpu_device pointer * @@ -3833,7 +3865,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ - if (!amdgpu_device_has_dc_support(adev)) + if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) drm_helper_force_disable_all(adev_to_drm(adev)); else drm_atomic_helper_shutdown(adev_to_drm(adev)); @@ -3846,7 +3878,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_ucode_sysfs_fini(adev); sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); - amdgpu_fbdev_fini(adev); + /* disable ras feature must before hw fini */ + amdgpu_ras_pre_fini(adev); amdgpu_device_ip_fini_early(adev); @@ -3942,7 +3975,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_kms_helper_poll_disable(dev); if (fbcon) - amdgpu_fbdev_set_suspend(adev, 1); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4019,7 +4052,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) flush_delayed_work(&adev->delayed_init_work); if (fbcon) - amdgpu_fbdev_set_suspend(adev, 0); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); drm_kms_helper_poll_enable(dev); @@ -4288,6 +4321,11 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor) { int r; + struct amdgpu_hive_info *hive = NULL; + + amdgpu_amdkfd_pre_reset(adev); + + amdgpu_amdkfd_pre_reset(adev); if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); @@ -4314,9 +4352,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) goto error; - amdgpu_irq_gpu_reset_resume_helper(adev); - r = amdgpu_ib_ring_tests(adev); - amdgpu_amdkfd_post_reset(adev); + hive = amdgpu_get_xgmi_hive(adev); + /* Update PSP FW topology after reset */ + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) + r = amdgpu_xgmi_update_topology(hive, adev); + + if (hive) + amdgpu_put_xgmi_hive(hive); + + if (!r) { + amdgpu_irq_gpu_reset_resume_helper(adev); + r = amdgpu_ib_ring_tests(adev); + amdgpu_amdkfd_post_reset(adev); + } error: if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { @@ -4649,7 +4697,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) goto out; - amdgpu_fbdev_set_suspend(tmp_adev, 0); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); /* * The GPU enters bad state once faulty pages @@ -4748,7 +4796,7 @@ static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgp { struct amdgpu_device *tmp_adev = NULL; - if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { if (!hive) { dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes"); return -ENODEV; @@ -4960,7 +5008,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * We always reset all schedulers for device and all devices for XGMI * hive so that should take care of them too. */ - hive = amdgpu_get_xgmi_hive(adev); + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); if (hive) { if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", @@ -5001,7 +5050,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * to put adev in the 1st position. */ INIT_LIST_HEAD(&device_list); - if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) list_add_tail(&tmp_adev->reset_list, &device_list); if (!list_is_first(&adev->reset_list, &device_list)) @@ -5031,7 +5080,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, cancel_delayed_work_sync(&tmp_adev->delayed_init_work); - amdgpu_amdkfd_pre_reset(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); /* * Mark these ASICs to be reseted as untracked first @@ -5039,7 +5089,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - amdgpu_fbdev_set_suspend(tmp_adev, 1); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); /* disable ras on ALL IPs */ if (!need_emergency_restart && @@ -5089,7 +5139,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter)); /* Actual ASIC resets if needed.*/ - /* TODO Implement XGMI hive reset logic for SRIOV */ + /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { r = amdgpu_device_reset_sriov(adev, job ? false : true); if (r) @@ -5130,7 +5180,7 @@ skip_hw_reset: drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); } - if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { + if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } @@ -5151,7 +5201,7 @@ skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, reset_list) { /* unlock kfd: SRIOV would do it separately */ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) - amdgpu_amdkfd_post_reset(tmp_adev); + amdgpu_amdkfd_post_reset(tmp_adev); /* kfd_post_reset will do nothing if kfd device is not initialized, * need to bring up kfd here if it's not be initialized before @@ -5634,3 +5684,42 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, amdgpu_asic_invalidate_hdp(adev, ring); } + +/** + * amdgpu_device_halt() - bring hardware to some kind of halt state + * + * @adev: amdgpu_device pointer + * + * Bring hardware to some kind of halt state so that no one can touch it + * any more. It will help to maintain error context when error occurred. + * Compare to a simple hang, the system will keep stable at least for SSH + * access. Then it should be trivial to inspect the hardware state and + * see what's going on. Implemented as following: + * + * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), + * clears all CPU mappings to device, disallows remappings through page faults + * 2. amdgpu_irq_disable_all() disables all interrupts + * 3. amdgpu_fence_driver_hw_fini() signals all HW fences + * 4. set adev->no_hw_access to avoid potential crashes after setp 5 + * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings + * 6. pci_disable_device() and pci_wait_for_pending_transaction() + * flush any in flight DMA operations + */ +void amdgpu_device_halt(struct amdgpu_device *adev) +{ + struct pci_dev *pdev = adev->pdev; + struct drm_device *ddev = adev_to_drm(adev); + + drm_dev_unplug(ddev); + + amdgpu_irq_disable_all(adev); + + amdgpu_fence_driver_hw_fini(adev); + + adev->no_hw_access = true; + + amdgpu_device_unmap_mmio(adev); + + pci_disable_device(pdev); + pci_wait_for_pending_transaction(pdev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 4e3669407518..5bc072220f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -67,7 +67,8 @@ #include "smuio_v11_0_6.h" #include "smuio_v13_0.h" -MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); +#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" +MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); #define mmRCC_CONFIG_MEMSIZE 0xde3 #define mmMM_INDEX 0x0 @@ -157,6 +158,8 @@ static int hw_id_map[MAX_HWIP] = { [HDP_HWIP] = HDP_HWID, [SDMA0_HWIP] = SDMA0_HWID, [SDMA1_HWIP] = SDMA1_HWID, + [SDMA2_HWIP] = SDMA2_HWID, + [SDMA3_HWIP] = SDMA3_HWID, [MMHUB_HWIP] = MMHUB_HWID, [ATHUB_HWIP] = ATHUB_HWID, [NBIO_HWIP] = NBIF_HWID, @@ -177,7 +180,7 @@ static int hw_id_map[MAX_HWIP] = { [DCI_HWIP] = DCI_HWID, }; -static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; @@ -187,6 +190,34 @@ static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *bin return 0; } +static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) +{ + const struct firmware *fw; + const char *fw_name; + int r; + + switch (amdgpu_discovery) { + case 2: + fw_name = FIRMWARE_IP_DISCOVERY; + break; + default: + dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); + return -EINVAL; + } + + r = request_firmware(&fw, fw_name, adev->dev); + if (r) { + dev_err(adev->dev, "can't load firmware \"%s\"\n", + fw_name); + return r; + } + + memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size); + release_firmware(fw); + + return 0; +} + static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) { uint16_t checksum = 0; @@ -204,13 +235,20 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); } +static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary) +{ + struct binary_header *bhdr; + bhdr = (struct binary_header *)binary; + + return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE); +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct gpu_info_header *ghdr; - const struct firmware *fw; uint16_t offset; uint16_t size; uint16_t checksum; @@ -221,39 +259,40 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (!adev->mman.discovery_bin) return -ENOMEM; - if (amdgpu_discovery == 2) { - r = request_firmware(&fw, "amdgpu/ip_discovery.bin", adev->dev); - if (r) - goto get_from_vram; - dev_info(adev->dev, "Using IP discovery from file\n"); - memcpy((u8 *)adev->mman.discovery_bin, (u8 *)fw->data, - adev->mman.discovery_tmr_size); - release_firmware(fw); - } else { -get_from_vram: - r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin); + r = amdgpu_discovery_read_binary_from_vram(adev, adev->mman.discovery_bin); + if (r) { + dev_err(adev->dev, "failed to read ip discovery binary from vram\n"); + r = -EINVAL; + goto out; + } + + if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); + /* retry read ip discovery binary from file */ + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); if (r) { - DRM_ERROR("failed to read ip discovery binary\n"); + dev_err(adev->dev, "failed to read ip discovery binary from file\n"); + r = -EINVAL; + goto out; + } + /* check the ip discovery binary signature */ + if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + dev_warn(adev->dev, "get invalid ip discovery binary signature from file\n"); + r = -EINVAL; goto out; } } bhdr = (struct binary_header *)adev->mman.discovery_bin; - if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { - DRM_ERROR("invalid ip discovery binary signature\n"); - r = -EINVAL; - goto out; - } - offset = offsetof(struct binary_header, binary_checksum) + sizeof(bhdr->binary_checksum); - size = bhdr->binary_size - offset; - checksum = bhdr->binary_checksum; + size = le16_to_cpu(bhdr->binary_size) - offset; + checksum = le16_to_cpu(bhdr->binary_checksum); if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, size, checksum)) { - DRM_ERROR("invalid ip discovery binary checksum\n"); + dev_err(adev->dev, "invalid ip discovery binary checksum\n"); r = -EINVAL; goto out; } @@ -264,14 +303,14 @@ get_from_vram: ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset); if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { - DRM_ERROR("invalid ip discovery data table signature\n"); + dev_err(adev->dev, "invalid ip discovery data table signature\n"); r = -EINVAL; goto out; } if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - ihdr->size, checksum)) { - DRM_ERROR("invalid ip discovery data table checksum\n"); + le16_to_cpu(ihdr->size), checksum)) { + dev_err(adev->dev, "invalid ip discovery data table checksum\n"); r = -EINVAL; goto out; } @@ -282,8 +321,8 @@ get_from_vram: ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset); if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - ghdr->size, checksum)) { - DRM_ERROR("invalid gc data table checksum\n"); + le32_to_cpu(ghdr->size), checksum)) { + dev_err(adev->dev, "invalid gc data table checksum\n"); r = -EINVAL; goto out; } @@ -377,8 +416,18 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ip->major, ip->minor, ip->revision); - if (le16_to_cpu(ip->hw_id) == VCN_HWID) + if (le16_to_cpu(ip->hw_id) == VCN_HWID) { + /* Bit [5:0]: original revision value + * Bit [7:6]: en/decode capability: + * 0b00 : VCN function normally + * 0b10 : encode is disabled + * 0b01 : decode is disabled + */ + adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + ip->revision & 0xc0; + ip->revision &= ~0xc0; adev->vcn.num_vcn_inst++; + } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID || @@ -470,14 +519,6 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n return -EINVAL; } - -int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, - int *major, int *minor, int *revision) -{ - return amdgpu_discovery_get_ip_version(adev, VCN_HWID, - vcn_instance, major, minor, revision); -} - void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { struct binary_header *bhdr; @@ -489,10 +530,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset)); for (i = 0; i < 32; i++) { - if (le32_to_cpu(harvest_info->list[i].hw_id) == 0) + if (le16_to_cpu(harvest_info->list[i].hw_id) == 0) break; - switch (le32_to_cpu(harvest_info->list[i].hw_id)) { + switch (le16_to_cpu(harvest_info->list[i].hw_id)) { case VCN_HWID: vcn_harvest_count++; if (harvest_info->list[i].number_instance == 0) @@ -915,9 +956,9 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 16): - case IP_VERSION(3, 0, 64): case IP_VERSION(3, 1, 1): case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 192): amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 0ea029e3b850..14537cec19db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -33,8 +33,6 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, int *major, int *minor, int *revision); -int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, - int *major, int *minor, int *revision); int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 68108f151dad..82011e75ed85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1360,7 +1360,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && - drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + connector->display_info.is_hdmi && amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; @@ -1599,13 +1599,10 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - /* don't unpin kernel fb objects */ - if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); - } + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ae6ab93c868b..4896c876ffec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -61,9 +61,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0) attach->peer2peer = false; - if (attach->dev->driver == adev->dev->driver) - return 0; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad95de6399af..02099058d0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -31,7 +31,6 @@ #include "amdgpu_drv.h" #include <drm/drm_pciids.h> -#include <linux/console.h> #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> @@ -314,9 +313,12 @@ module_param_named(dpm, amdgpu_dpm, int, 0444); /** * DOC: fw_load_type (int) - * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto). + * Set different firmware loading type for debugging, if supported. + * Set to 0 to force direct loading if supported by the ASIC. Set + * to -1 to select the default loading mode for the ASIC, as defined + * by the driver. The default is -1 (auto). */ -MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)"); +MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = force direct if supported, -1 = auto)"); module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444); /** @@ -2002,6 +2004,19 @@ retry_init: goto err_pci; } + /* + * 1. don't init fbdev on hw without DCE + * 2. don't init fbdev if there are no connectors + */ + if (adev->mode_info.mode_config_initialized && + !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) { + /* select 8 bpp console on low vram cards */ + if (adev->gmc.real_vram_size <= (32*1024*1024)) + drm_fbdev_generic_setup(adev_to_drm(adev), 8); + else + drm_fbdev_generic_setup(adev_to_drm(adev), 32); + } + ret = amdgpu_debugfs_init(adev); if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); @@ -2516,10 +2531,8 @@ static int __init amdgpu_init(void) { int r; - if (vgacon_text_force()) { - DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); + if (drm_firmware_drivers_only()) return -EINVAL; - } r = amdgpu_sync_init(); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h index e3a4f7048042..8178323e4bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h @@ -45,4 +45,7 @@ long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long amdgpu_kms_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index af4ef84e27a7..c96e458ed088 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -222,7 +222,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, case DRM_MODE_CONNECTOR_HDMIB: if (amdgpu_connector->use_digital) { /* HDMI 1.3 supports up to 340 Mhz over single link */ - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (pixel_clock > 340000) return true; else @@ -244,7 +244,7 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, return false; else { /* HDMI 1.3 supports up to 340 Mhz over single link */ - if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector))) { + if (connector->display_info.is_hdmi) { if (pixel_clock > 340000) return true; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c deleted file mode 100644 index cd0acbea75da..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Copyright © 2007 David Airlie - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * David Airlie - */ - -#include <linux/module.h> -#include <linux/pm_runtime.h> -#include <linux/slab.h> -#include <linux/vga_switcheroo.h> - -#include <drm/amdgpu_drm.h> -#include <drm/drm_crtc.h> -#include <drm/drm_crtc_helper.h> -#include <drm/drm_fb_helper.h> -#include <drm/drm_fourcc.h> - -#include "amdgpu.h" -#include "cikd.h" -#include "amdgpu_gem.h" - -#include "amdgpu_display.h" - -/* object hierarchy - - this contains a helper + a amdgpu fb - the helper contains a pointer to amdgpu framebuffer baseclass. -*/ - -static int -amdgpufb_open(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - int ret = pm_runtime_get_sync(fb_helper->dev->dev); - if (ret < 0 && ret != -EACCES) { - pm_runtime_mark_last_busy(fb_helper->dev->dev); - pm_runtime_put_autosuspend(fb_helper->dev->dev); - return ret; - } - return 0; -} - -static int -amdgpufb_release(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - - pm_runtime_mark_last_busy(fb_helper->dev->dev); - pm_runtime_put_autosuspend(fb_helper->dev->dev); - return 0; -} - -static const struct fb_ops amdgpufb_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = amdgpufb_open, - .fb_release = amdgpufb_release, - .fb_fillrect = drm_fb_helper_cfb_fillrect, - .fb_copyarea = drm_fb_helper_cfb_copyarea, - .fb_imageblit = drm_fb_helper_cfb_imageblit, -}; - - -int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int cpp, bool tiled) -{ - int aligned = width; - int pitch_mask = 0; - - switch (cpp) { - case 1: - pitch_mask = 255; - break; - case 2: - pitch_mask = 127; - break; - case 3: - case 4: - pitch_mask = 63; - break; - } - - aligned += pitch_mask; - aligned &= ~pitch_mask; - return aligned * cpp; -} - -static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) -{ - struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - int ret; - - ret = amdgpu_bo_reserve(abo, true); - if (likely(ret == 0)) { - amdgpu_bo_kunmap(abo); - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - drm_gem_object_put(gobj); -} - -static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object **gobj_p) -{ - const struct drm_format_info *info; - struct amdgpu_device *adev = rfbdev->adev; - struct drm_gem_object *gobj = NULL; - struct amdgpu_bo *abo = NULL; - bool fb_tiled = false; /* useful for testing */ - u32 tiling_flags = 0, domain; - int ret; - int aligned_size, size; - int height = mode_cmd->height; - u32 cpp; - u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; - - info = drm_get_format_info(adev_to_drm(adev), mode_cmd); - cpp = info->cpp[0]; - - /* need to align pitch with crtc limits */ - mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, - fb_tiled); - domain = amdgpu_display_supported_domains(adev, flags); - height = ALIGN(mode_cmd->height, 8); - size = mode_cmd->pitches[0] * height; - aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, - ttm_bo_type_device, NULL, &gobj); - if (ret) { - pr_err("failed to allocate framebuffer (%d)\n", aligned_size); - return -ENOMEM; - } - abo = gem_to_amdgpu_bo(gobj); - - if (fb_tiled) - tiling_flags = AMDGPU_TILING_SET(ARRAY_MODE, GRPH_ARRAY_2D_TILED_THIN1); - - ret = amdgpu_bo_reserve(abo, false); - if (unlikely(ret != 0)) - goto out_unref; - - if (tiling_flags) { - ret = amdgpu_bo_set_tiling_flags(abo, - tiling_flags); - if (ret) - dev_err(adev->dev, "FB failed to set tiling flags\n"); - } - - ret = amdgpu_bo_pin(abo, domain); - if (ret) { - amdgpu_bo_unreserve(abo); - goto out_unref; - } - - ret = amdgpu_ttm_alloc_gart(&abo->tbo); - if (ret) { - amdgpu_bo_unreserve(abo); - dev_err(adev->dev, "%p bind failed\n", abo); - goto out_unref; - } - - ret = amdgpu_bo_kmap(abo, NULL); - amdgpu_bo_unreserve(abo); - if (ret) { - goto out_unref; - } - - *gobj_p = gobj; - return 0; -out_unref: - amdgpufb_destroy_pinned_object(gobj); - *gobj_p = NULL; - return ret; -} - -static int amdgpufb_create(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct amdgpu_fbdev *rfbdev = (struct amdgpu_fbdev *)helper; - struct amdgpu_device *adev = rfbdev->adev; - struct fb_info *info; - struct drm_framebuffer *fb = NULL; - struct drm_mode_fb_cmd2 mode_cmd; - struct drm_gem_object *gobj = NULL; - struct amdgpu_bo *abo = NULL; - int ret; - - memset(&mode_cmd, 0, sizeof(mode_cmd)); - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - ret = amdgpufb_create_pinned_object(rfbdev, &mode_cmd, &gobj); - if (ret) { - DRM_ERROR("failed to create fbcon object %d\n", ret); - return ret; - } - - abo = gem_to_amdgpu_bo(gobj); - - /* okay we have an object now allocate the framebuffer */ - info = drm_fb_helper_alloc_fbi(helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto out; - } - - ret = amdgpu_display_gem_fb_init(adev_to_drm(adev), &rfbdev->rfb, - &mode_cmd, gobj); - if (ret) { - DRM_ERROR("failed to initialize framebuffer %d\n", ret); - goto out; - } - - fb = &rfbdev->rfb.base; - - /* setup helper */ - rfbdev->helper.fb = fb; - - info->fbops = &amdgpufb_ops; - - info->fix.smem_start = amdgpu_gmc_vram_cpu_pa(adev, abo); - info->fix.smem_len = amdgpu_bo_size(abo); - info->screen_base = amdgpu_bo_kptr(abo); - info->screen_size = amdgpu_bo_size(abo); - - drm_fb_helper_fill_info(info, &rfbdev->helper, sizes); - - /* setup aperture base/size for vesafb takeover */ - info->apertures->ranges[0].base = adev_to_drm(adev)->mode_config.fb_base; - info->apertures->ranges[0].size = adev->gmc.aper_size; - - /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - - if (info->screen_base == NULL) { - ret = -ENOSPC; - goto out; - } - - DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); - DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base); - DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo)); - DRM_INFO("fb depth is %d\n", fb->format->depth); - DRM_INFO(" pitch is %d\n", fb->pitches[0]); - - vga_switcheroo_client_fb_set(adev->pdev, info); - return 0; - -out: - if (fb && ret) { - drm_gem_object_put(gobj); - drm_framebuffer_unregister_private(fb); - drm_framebuffer_cleanup(fb); - kfree(fb); - } - return ret; -} - -static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev) -{ - struct amdgpu_framebuffer *rfb = &rfbdev->rfb; - int i; - - drm_fb_helper_unregister_fbi(&rfbdev->helper); - - if (rfb->base.obj[0]) { - for (i = 0; i < rfb->base.format->num_planes; i++) - drm_gem_object_put(rfb->base.obj[0]); - amdgpufb_destroy_pinned_object(rfb->base.obj[0]); - rfb->base.obj[0] = NULL; - drm_framebuffer_unregister_private(&rfb->base); - drm_framebuffer_cleanup(&rfb->base); - } - drm_fb_helper_fini(&rfbdev->helper); - - return 0; -} - -static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = { - .fb_probe = amdgpufb_create, -}; - -int amdgpu_fbdev_init(struct amdgpu_device *adev) -{ - struct amdgpu_fbdev *rfbdev; - int bpp_sel = 32; - int ret; - - /* don't init fbdev on hw without DCE */ - if (!adev->mode_info.mode_config_initialized) - return 0; - - /* don't init fbdev if there are no connectors */ - if (list_empty(&adev_to_drm(adev)->mode_config.connector_list)) - return 0; - - /* select 8 bpp console on low vram cards */ - if (adev->gmc.real_vram_size <= (32*1024*1024)) - bpp_sel = 8; - - rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL); - if (!rfbdev) - return -ENOMEM; - - rfbdev->adev = adev; - adev->mode_info.rfbdev = rfbdev; - - drm_fb_helper_prepare(adev_to_drm(adev), &rfbdev->helper, - &amdgpu_fb_helper_funcs); - - ret = drm_fb_helper_init(adev_to_drm(adev), &rfbdev->helper); - if (ret) { - kfree(rfbdev); - return ret; - } - - /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display) - drm_helper_disable_unused_functions(adev_to_drm(adev)); - - drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel); - return 0; -} - -void amdgpu_fbdev_fini(struct amdgpu_device *adev) -{ - if (!adev->mode_info.rfbdev) - return; - - amdgpu_fbdev_destroy(adev_to_drm(adev), adev->mode_info.rfbdev); - kfree(adev->mode_info.rfbdev); - adev->mode_info.rfbdev = NULL; -} - -void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state) -{ - if (adev->mode_info.rfbdev) - drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper, - state); -} - -int amdgpu_fbdev_total_size(struct amdgpu_device *adev) -{ - struct amdgpu_bo *robj; - int size = 0; - - if (!adev->mode_info.rfbdev) - return 0; - - robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); - size += amdgpu_bo_size(robj); - return size; -} - -bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) -{ - if (!adev->mode_info.rfbdev) - return false; - if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) - return true; - return false; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a1e63ba4c54a..c0d8f40a5b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -877,6 +877,32 @@ out: return r; } +static int amdgpu_gem_align_pitch(struct amdgpu_device *adev, + int width, + int cpp, + bool tiled) +{ + int aligned = width; + int pitch_mask = 0; + + switch (cpp) { + case 1: + pitch_mask = 255; + break; + case 2: + pitch_mask = 127; + break; + case 3: + case 4: + pitch_mask = 63; + break; + } + + aligned += pitch_mask; + aligned &= ~pitch_mask; + return aligned * cpp; +} + int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) @@ -885,7 +911,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_gem_object *gobj; uint32_t handle; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; u32 domain; int r; @@ -897,8 +924,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, if (adev->mman.buffer_funcs_enabled) flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; - args->pitch = amdgpu_align_pitch(adev, args->width, - DIV_ROUND_UP(args->bpp, 8), 0); + args->pitch = amdgpu_gem_align_pitch(adev, args->width, + DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_domain(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 08478fce00f2..2430d6223c2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -350,6 +350,7 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) * amdgpu_gmc_filter_faults - filter VM faults * * @adev: amdgpu device structure + * @ih: interrupt ring that the fault received from * @addr: address of the VM fault * @pasid: PASID of the process causing the fault * @timestamp: timestamp of the fault @@ -358,7 +359,8 @@ static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) * True if the fault was filtered and should not be processed further. * False if the fault is a new one and needs to be handled. */ -bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, uint64_t addr, uint16_t pasid, uint64_t timestamp) { struct amdgpu_gmc *gmc = &adev->gmc; @@ -366,6 +368,10 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, struct amdgpu_gmc_fault *fault; uint32_t hash; + /* Stale retry fault if timestamp goes backward */ + if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp)) + return true; + /* If we don't have space left in the ring buffer return immediately */ stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) - AMDGPU_GMC_FAULT_TIMEOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e55201134a01..8458cebc6d5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -316,7 +316,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); -bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, uint64_t addr, uint16_t pasid, uint64_t timestamp); void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3d62e196901..3df146579ad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -164,52 +164,32 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, } } -/* Waiter helper that checks current rptr matches or passes checkpoint wptr */ -static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih, - uint32_t checkpoint_wptr, - uint32_t *prev_rptr) -{ - uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask); - - /* rptr has wrapped. */ - if (cur_rptr < *prev_rptr) - cur_rptr += ih->ptr_mask + 1; - *prev_rptr = cur_rptr; - - /* check ring is empty to workaround missing wptr overflow flag */ - return cur_rptr >= checkpoint_wptr || - (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); -} - /** - * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint + * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint * * @adev: amdgpu_device pointer * @ih: ih ring to process * * Used to ensure ring has processed IVs up to the checkpoint write pointer. */ -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - uint32_t checkpoint_wptr, rptr; + uint32_t checkpoint_wptr; + uint64_t checkpoint_ts; + long timeout = HZ; if (!ih->enabled || adev->shutdown) return -ENODEV; checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); - /* Order wptr with rptr. */ + /* Order wptr with ring data. */ rmb(); - rptr = READ_ONCE(ih->rptr); - - /* wptr has wrapped. */ - if (rptr > checkpoint_wptr) - checkpoint_wptr += ih->ptr_mask + 1; + checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); - return wait_event_interruptible(ih->wait_process, - amdgpu_ih_has_checkpoint_processed(adev, ih, - checkpoint_wptr, &rptr)); + return wait_event_interruptible_timeout(ih->wait_process, + amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) || + ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout); } /** @@ -223,7 +203,7 @@ int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, */ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - unsigned int count = AMDGPU_IH_MAX_NUM_IVS; + unsigned int count; u32 wptr; if (!ih->enabled || adev->shutdown) @@ -232,6 +212,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) wptr = amdgpu_ih_get_wptr(adev, ih); restart_ih: + count = AMDGPU_IH_MAX_NUM_IVS; DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ @@ -298,3 +279,18 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, /* wptr/rptr are in bytes! */ ih->rptr += 32; } + +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, + signed int offset) +{ + uint32_t iv_size = 32; + uint32_t ring_index; + uint32_t dw1, dw2; + + rptr += iv_size * offset; + ring_index = (rptr & ih->ptr_mask) >> 2; + + dw1 = le32_to_cpu(ih->ring[ring_index + 1]); + dw2 = le32_to_cpu(ih->ring[ring_index + 2]); + return dw1 | ((u64)(dw2 & 0xffff) << 32); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 0649b59830a5..dd1c2eded6b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -68,20 +68,30 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; + uint64_t processed_timestamp; }; +/* return true if time stamp t2 is after t1 with 48bit wrap around */ +#define amdgpu_ih_ts_after(t1, t2) \ + (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL) + /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry); + uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr, + signed int offset); void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); }; #define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih)) #define amdgpu_ih_decode_iv(adev, iv) \ (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv)) +#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \ + (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \ + (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset))) #define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih)) int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, @@ -89,10 +99,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, unsigned int num_dw); -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih); +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih); int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry); +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, + signed int offset); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c index 5cf142e849bb..a1cbd7c3deb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c @@ -1,4 +1,4 @@ -/** +/* * \file amdgpu_ioc32.c * * 32-bit ioctl compatibility routines for the AMDGPU DRM. @@ -37,12 +37,9 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { unsigned int nr = DRM_IOCTL_NR(cmd); - int ret; if (nr < DRM_COMMAND_BASE) return drm_compat_ioctl(filp, cmd, arg); - ret = amdgpu_drm_ioctl(filp, cmd, arg); - - return ret; + return amdgpu_drm_ioctl(filp, cmd, arg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index cc2e0c9cfe0a..f5cbc2747ac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -333,7 +333,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ - /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); @@ -391,7 +390,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev) } /** - * amdgpu_irq_fini - shut down interrupt handling + * amdgpu_irq_fini_sw - shut down interrupt handling * * @adev: amdgpu device pointer * @@ -529,6 +528,9 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, /* Send it to amdkfd as well if it isn't already handled */ if (!handled) amdgpu_amdkfd_interrupt(adev, entry.iv_entry); + + if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp)) + ih->processed_timestamp = entry.timestamp; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 89fb372ed49c..6043bf6fd414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -232,8 +232,6 @@ struct amdgpu_i2c_chan { struct mutex mutex; }; -struct amdgpu_fbdev; - struct amdgpu_afmt { bool enabled; int offset; @@ -309,13 +307,6 @@ struct amdgpu_framebuffer { uint64_t address; }; -struct amdgpu_fbdev { - struct drm_fb_helper helper; - struct amdgpu_framebuffer rfb; - struct list_head fbdev_list; - struct amdgpu_device *adev; -}; - struct amdgpu_mode_info { struct atom_context *atom_context; struct card_info *atom_card_info; @@ -341,8 +332,6 @@ struct amdgpu_mode_info { struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; - /* pointer to fbdev info structure */ - struct amdgpu_fbdev *rfbdev; /* firmware flags */ u32 firmware_flags; /* pointer to backlight encoder */ @@ -631,15 +620,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -/* fbdev layer */ -int amdgpu_fbdev_init(struct amdgpu_device *adev); -void amdgpu_fbdev_fini(struct amdgpu_device *adev); -void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state); -int amdgpu_fbdev_total_size(struct amdgpu_device *adev); -bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj); - -int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); - /* amdgpu_display.c */ void amdgpu_display_print_display_setup(struct drm_device *dev); int amdgpu_display_modeset_create_props(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4fcfc2313b8c..3a7b56e57cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1032,9 +1032,14 @@ int amdgpu_bo_init(struct amdgpu_device *adev) /* On A+A platform, VRAM can be mapped as WB */ if (!adev->gmc.xgmi.connected_to_cpu) { /* reserve PAT memory space to WC for VRAM */ - arch_io_reserve_memtype_wc(adev->gmc.aper_base, + int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); + if (r) { + DRM_ERROR("Unable to set WC memtype for the aperture base\n"); + return r; + } + /* Add an MTRR for the VRAM */ adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base, adev->gmc.aper_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 4eaec446b49d..0bb2466d539a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -69,6 +69,7 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den, /** * amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation * + * @adev: amdgpu_device pointer * @nom: nominator * @den: denominator * @post_div: post divider @@ -106,6 +107,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n /** * amdgpu_pll_compute - compute PLL paramaters * + * @adev: amdgpu_device pointer * @pll: information about the PLL * @freq: requested frequency * @dot_clock_p: resulting pixel clock diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 82e9ecf84352..71ee361d0972 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -233,6 +233,10 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_start)) + return; + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; @@ -268,6 +272,10 @@ static void amdgpu_perf_read(struct perf_event *event) pmu); u64 count, prev; + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_get_count)) + return; + do { prev = local64_read(&hwc->prev_count); @@ -297,6 +305,10 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_stop)) + return; + switch (hwc->config_base) { case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: @@ -326,6 +338,10 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) struct amdgpu_pmu_entry, pmu); + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_start)) + return -EINVAL; + switch (pe->pmu_perf_type) { case AMDGPU_PMU_PERF_TYPE_DF: hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF; @@ -371,6 +387,9 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) struct amdgpu_pmu_entry *pe = container_of(event->pmu, struct amdgpu_pmu_entry, pmu); + if ((!pe->adev->df.funcs) || + (!pe->adev->df.funcs->pmc_stop)) + return; amdgpu_perf_stop(event, PERF_EF_UPDATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c index d02c8637f909..786afe4f58f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -59,7 +59,7 @@ static DEVICE_ATTR_RO(mem_info_preempt_used); * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: TTM memory object * * Dummy, just count the space used without allocating resources or any limit. */ @@ -85,7 +85,7 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, * amdgpu_preempt_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated GTT again. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c641f84649d6..dee17a0e1187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -518,7 +518,7 @@ static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp) return cmd; } -void release_psp_cmd_buf(struct psp_context *psp) +static void release_psp_cmd_buf(struct psp_context *psp) { mutex_unlock(&psp->mutex); } @@ -2017,12 +2017,16 @@ static int psp_hw_start(struct psp_context *psp) return ret; } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + goto skip_pin_bo; + ret = psp_tmr_init(psp); if (ret) { DRM_ERROR("PSP tmr init failed!\n"); return ret; } +skip_pin_bo: /* * For ASICs with DF Cstate management centralized * to PMFW, TMR setup should be performed after PMFW @@ -2452,6 +2456,18 @@ skip_memalloc: return ret; } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) { + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp, false, true); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } + } + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 08133de21fdd..cd9e5914944b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -867,9 +867,9 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* feature ctl end */ -void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_err_data *err_data) +static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_err_data *err_data) { switch (ras_block->sub_block_index) { case AMDGPU_RAS_MCA_BLOCK__MP0: @@ -892,6 +892,38 @@ void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, } } +static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; + + /* + * choosing right query method according to + * whether smu support query error information + */ + ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc)); + if (ret == -EOPNOTSUPP) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, err_data); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address) + adev->umc.ras_funcs->query_ras_error_address(adev, err_data); + } else if (!ret) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_count) + adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_address) + adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data); + } +} + /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) @@ -905,15 +937,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, switch (info->head.block) { case AMDGPU_RAS_BLOCK__UMC: - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, &err_data); - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address) - adev->umc.ras_funcs->query_ras_error_address(adev, &err_data); + amdgpu_ras_get_ecc_info(adev, &err_data); break; case AMDGPU_RAS_BLOCK__SDMA: if (adev->sdma.funcs->query_ras_error_count) { @@ -1137,9 +1161,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, /** * amdgpu_ras_query_error_count -- Get error counts of all IPs - * adev: pointer to AMD GPU device - * ce_count: pointer to an integer to be set to the count of correctible errors. - * ue_count: pointer to an integer to be set to the count of uncorrectible + * @adev: pointer to AMD GPU device + * @ce_count: pointer to an integer to be set to the count of correctible errors. + * @ue_count: pointer to an integer to be set to the count of uncorrectible * errors. * * If set, @ce_count or @ue_count, count and return the corresponding @@ -1723,6 +1747,16 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF) continue; + /* + * this is a workaround for aldebaran, skip send msg to + * smu to get ecc_info table due to smu handle get ecc + * info table failed temporarily. + * should be removed until smu fix handle ecc_info table. + */ + if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) && + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))) + continue; + amdgpu_ras_query_error_status(adev, &info); } } @@ -1935,9 +1969,11 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) if (!con || !con->eh_data) return 0; + mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; save_count = data->count - control->ras_num_recs; + mutex_unlock(&con->recovery_lock); /* only new entries are saved */ if (save_count > 0) { if (amdgpu_ras_eeprom_append(control, @@ -2336,7 +2372,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } /* Init poison supported flag, the default value is false */ - if (adev->df.funcs && + if (adev->gmc.xgmi.connected_to_cpu) { + /* enabled by default when GPU is connected to CPU */ + con->poison_supported = true; + } + else if (adev->df.funcs && adev->df.funcs->query_ras_poison_mode && adev->umc.ras_funcs && adev->umc.ras_funcs->query_ras_poison_mode) { @@ -2477,7 +2517,6 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev, amdgpu_ras_sysfs_remove(adev, ras_block); if (ih_info->cb) amdgpu_ras_interrupt_remove_handler(adev, ih_info); - amdgpu_ras_feature_enable(adev, ras_block, 0); } /* do some init work after IP late init as dependence. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e36f4de9fa55..1c708122d492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -319,6 +319,19 @@ struct ras_common_if { char name[32]; }; +#define MAX_UMC_CHANNEL_NUM 32 + +struct ecc_info_per_ch { + uint16_t ce_count_lo_chip; + uint16_t ce_count_hi_chip; + uint64_t mca_umc_status; + uint64_t mca_umc_addr; +}; + +struct umc_ecc_info { + struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM]; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -358,6 +371,9 @@ struct amdgpu_ras { struct delayed_work ras_counte_delay_work; atomic_t ras_ue_count; atomic_t ras_ce_count; + + /* record umc error info queried from smu */ + struct umc_ecc_info umc_ecc; }; struct ras_fs_data { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index eab4380f28e5..fb0d8bffdce2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -116,17 +116,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = ttm_to_amdgpu_bo(bo); if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { - struct dma_fence *fence; - struct dma_resv *resv = &bo->base._resv; - - rcu_read_lock(); - fence = rcu_dereference(resv->fence_excl); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - placement->num_placement = 0; placement->num_busy_placement = 0; - rcu_read_unlock(); return; } @@ -922,11 +913,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, ttm->num_pages, bo_mem, ttm); } - if (bo_mem->mem_type == AMDGPU_PL_GDS || - bo_mem->mem_type == AMDGPU_PL_GWS || - bo_mem->mem_type == AMDGPU_PL_OA) - return -EINVAL; - if (bo_mem->mem_type != TTM_PL_TT || !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { gtt->offset = AMDGPU_BO_INVALID_OFFSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index a90029ee9733..6e4bea012ea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -94,30 +94,58 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); - - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address && - adev->umc.max_ras_err_cnt_per_query) { - err_data->err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - - /* still call query_ras_error_address to clear error status - * even NOMEM error is encountered - */ - if(!err_data->err_addr) - dev_warn(adev->dev, "Failed to alloc memory for " - "umc error address record!\n"); - - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc)); + if (ret == -EOPNOTSUPP) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + dev_warn(adev->dev, "Failed to alloc memory for " + "umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + } + } else if (!ret) { + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_count) + adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status); + + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ecc_info_query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + dev_warn(adev->dev, "Failed to alloc memory for " + "umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status); + } } /* only uncorrectable error needs gpu reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 1f5fe2315236..9e40bade0a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -49,6 +49,10 @@ struct amdgpu_umc_ras_funcs { void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); + void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); }; struct amdgpu_umc_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 688bef1649b5..344f711ad144 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -434,7 +434,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * * @ring: ring we should submit the msg to * @handle: VCE session handle to use - * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 4f7c70845785..9a19a6a57b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -135,6 +135,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): + case IP_VERSION(3, 0, 192): if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) fw_name = FIRMWARE_SIENNA_CICHLID; else @@ -285,20 +286,13 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; + int vcn_config = adev->vcn.vcn_config[vcn_instance]; - int major; - int minor; - int revision; - - /* if cannot find IP data, then this VCN does not exist */ - if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0) - return true; - - if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) { + if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) { ret = true; - } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) { + } else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) { ret = true; - } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) { + } else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) { ret = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index bfa27ea94804..5d3728b027d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -235,6 +235,7 @@ struct amdgpu_vcn { uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES]; struct amdgpu_vcn_reg internal; struct mutex vcn_pg_lock; struct mutex vcn1_jpeg1_workaround; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 04cf9b207e62..3fc49823f527 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -283,17 +283,15 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL); if (!*data) - return -ENOMEM; + goto data_failure; bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); - bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + if (!bps) + goto bps_failure; - if (!bps || !bps_bo) { - kfree(bps); - kfree(bps_bo); - kfree(*data); - return -ENOMEM; - } + bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + if (!bps_bo) + goto bps_bo_failure; (*data)->bps = bps; (*data)->bps_bo = bps_bo; @@ -303,6 +301,13 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) virt->ras_init_done = true; return 0; + +bps_bo_failure: + kfree(bps); +bps_failure: + kfree(*data); +data_failure: + return -ENOMEM; } static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index ce982afeff91..2dcc68e04e84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -16,6 +16,8 @@ #include "ivsrcid/ivsrcid_vislands30.h" #include "amdgpu_vkms.h" #include "amdgpu_display.h" +#include "atom.h" +#include "amdgpu_irq.h" /** * DOC: amdgpu_vkms @@ -41,16 +43,16 @@ static const u32 amdgpu_vkms_formats[] = { static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer) { - struct amdgpu_vkms_output *output = container_of(timer, - struct amdgpu_vkms_output, - vblank_hrtimer); - struct drm_crtc *crtc = &output->crtc; + struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer); + struct drm_crtc *crtc = &amdgpu_crtc->base; + struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); u64 ret_overrun; bool ret; - ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer, + ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer, output->period_ns); - WARN_ON(ret_overrun != 1); + if (ret_overrun != 1) + DRM_WARN("%s: vblank timer overrun\n", __func__); ret = drm_crtc_handle_vblank(crtc); if (!ret) @@ -65,22 +67,21 @@ static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc) unsigned int pipe = drm_crtc_index(crtc); struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); drm_calc_timestamping_constants(crtc, &crtc->mode); - hrtimer_init(&out->vblank_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - out->vblank_hrtimer.function = &amdgpu_vkms_vblank_simulate; out->period_ns = ktime_set(0, vblank->framedur_ns); - hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL); + hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL); return 0; } static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc) { - struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - hrtimer_cancel(&out->vblank_hrtimer); + hrtimer_cancel(&amdgpu_crtc->vblank_timer); } static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, @@ -92,13 +93,14 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, unsigned int pipe = crtc->index; struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); if (!READ_ONCE(vblank->enabled)) { *vblank_time = ktime_get(); return true; } - *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires); + *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires); if (WARN_ON(*vblank_time == vblank->time)) return true; @@ -165,6 +167,8 @@ static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = { static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, struct drm_plane *primary, struct drm_plane *cursor) { + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int ret; ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor, @@ -176,6 +180,17 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs); + amdgpu_crtc->crtc_id = drm_crtc_index(crtc); + adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc; + + amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; + amdgpu_crtc->encoder = NULL; + amdgpu_crtc->connector = NULL; + amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; + + hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate; + return ret; } @@ -375,6 +390,7 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, int index) { struct drm_plane *plane; + uint64_t modifiers[] = {DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID}; int ret; plane = kzalloc(sizeof(*plane), GFP_KERNEL); @@ -385,7 +401,7 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, &amdgpu_vkms_plane_funcs, amdgpu_vkms_formats, ARRAY_SIZE(amdgpu_vkms_formats), - NULL, type, NULL); + modifiers, type, NULL); if (ret) { kfree(plane); return ERR_PTR(ret); @@ -396,12 +412,12 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, return plane; } -int amdgpu_vkms_output_init(struct drm_device *dev, - struct amdgpu_vkms_output *output, int index) +static int amdgpu_vkms_output_init(struct drm_device *dev, struct + amdgpu_vkms_output *output, int index) { struct drm_connector *connector = &output->connector; struct drm_encoder *encoder = &output->encoder; - struct drm_crtc *crtc = &output->crtc; + struct drm_crtc *crtc = &output->crtc.base; struct drm_plane *primary, *cursor = NULL; int ret; @@ -465,6 +481,11 @@ static int amdgpu_vkms_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, + sizeof(struct amdgpu_vkms_output), GFP_KERNEL); + if (!adev->amdgpu_vkms_output) + return -ENOMEM; + adev_to_drm(adev)->max_vblank_count = 0; adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs; @@ -481,10 +502,6 @@ static int amdgpu_vkms_sw_init(void *handle) if (r) return r; - adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, sizeof(struct amdgpu_vkms_output), GFP_KERNEL); - if (!adev->amdgpu_vkms_output) - return -ENOMEM; - /* allocate crtcs, encoders, connectors */ for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i); @@ -507,12 +524,13 @@ static int amdgpu_vkms_sw_fini(void *handle) if (adev->mode_info.crtcs[i]) hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); - kfree(adev->mode_info.bios_hardcoded_edid); - kfree(adev->amdgpu_vkms_output); - drm_kms_helper_poll_fini(adev_to_drm(adev)); + drm_mode_config_cleanup(adev_to_drm(adev)); adev->mode_info.mode_config_initialized = false; + + kfree(adev->mode_info.bios_hardcoded_edid); + kfree(adev->amdgpu_vkms_output); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h index 97f1b79c0724..4f8722ff37c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h @@ -10,15 +10,14 @@ #define YRES_MAX 16384 #define drm_crtc_to_amdgpu_vkms_output(target) \ - container_of(target, struct amdgpu_vkms_output, crtc) + container_of(target, struct amdgpu_vkms_output, crtc.base) extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block; struct amdgpu_vkms_output { - struct drm_crtc crtc; + struct amdgpu_crtc crtc; struct drm_encoder encoder; struct drm_connector connector; - struct hrtimer vblank_hrtimer; ktime_t period_ns; struct drm_pending_vblank_event *event; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a96ae4c0e040..b37fc7d7d2c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -53,7 +53,7 @@ * can be mapped as snooped (cached system pages) or unsnooped * (uncached system pages). * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When execting a command buffer, + * associated with each VMID. When executing a command buffer, * the kernel tells the the ring what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 567df2db23ac..a38c6a747fa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -265,6 +265,11 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); + if ((!adev->df.funcs) || + (!adev->df.funcs->get_fica) || + (!adev->df.funcs->set_fica)) + return -EINVAL; + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); if (fica_out != 0x1f) pr_err("xGMI error counters not enabled!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 6134ed964027..a92d86e12718 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -469,7 +469,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) if (amdgpu_connector->use_digital && (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)) return ATOM_ENCODER_MODE_HDMI; - else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + else if (connector->display_info.is_hdmi && (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else if (amdgpu_connector->use_digital) @@ -488,7 +488,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) if (amdgpu_audio != 0) { if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE) return ATOM_ENCODER_MODE_HDMI; - else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + else if (connector->display_info.is_hdmi && (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else @@ -506,7 +506,7 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) } else if (amdgpu_audio != 0) { if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE) return ATOM_ENCODER_MODE_HDMI; - else if (drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && + else if (connector->display_info.is_hdmi && (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index b200b9e722d9..8318ee8339f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2092,22 +2092,18 @@ static int dce_v8_0_pick_dig_encoder(struct drm_encoder *encoder) return 1; else return 0; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: if (dig->linkb) return 3; else return 2; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: if (dig->linkb) return 5; else return 4; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: return 6; - break; default: DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e7dfeb466a0e..dbe7442fb25c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7707,8 +7707,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); break; default: preempt_disable(); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b4b80f27b894..edb3e3b08eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -140,6 +140,11 @@ MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 + enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -3065,8 +3070,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } @@ -4055,9 +4060,10 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); - /* Skip suspend with A+A reset */ - if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { - dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); + /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ + if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) { + dev_dbg(adev->dev, "Skipping RLC halt\n"); return 0; } @@ -4238,19 +4244,38 @@ failed_kiq_read: static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) { - uint64_t clock; + uint64_t clock, clock_lo, clock_hi, hi_check; - amdgpu_gfx_off_ctrl(adev, false); - mutex_lock(&adev->gfx.gpu_clock_mutex); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) { - clock = gfx_v9_0_kiq_read_clock(adev); - } else { - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 3, 0): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; + default: + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->gfx.gpu_clock_mutex); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) { + clock = gfx_v9_0_kiq_read_clock(adev); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } + mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); + break; } - mutex_unlock(&adev->gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); return clock; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 480e41847d7c..ec4d5e15b766 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -162,7 +162,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 14c1c1a297dd..6e0ace2fbfab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -196,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index e80d1dc43079..b4eddf6e98a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -197,7 +197,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3ec5ff5a6dbe..ae46eb35b3d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -107,7 +107,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Process it onyl if it's the first fault for this address */ if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) return 1; @@ -992,10 +992,14 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + goto skip_pin_bo; + r = amdgpu_gart_table_vram_pin(adev); if (r) return r; +skip_pin_bo: r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cb82404df534..a5471923b3f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -523,7 +523,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Process it onyl if it's the first fault for this address */ if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) return 1; @@ -1294,7 +1294,8 @@ static int gmc_v9_0_late_init(void *handle) if (!amdgpu_sriov_vf(adev) && (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) { if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { - if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + if (adev->df.funcs && + adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } } @@ -1505,9 +1506,11 @@ static int gmc_v9_0_sw_init(void *handle) chansize = 64; else chansize = 128; - - numchan = adev->df.funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; + if (adev->df.funcs && + adev->df.funcs->get_hbm_channel_number) { + numchan = adev->df.funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } } adev->gmc.vram_type = vram_type; @@ -1714,10 +1717,14 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + goto skip_pin_bo; + r = amdgpu_gart_table_vram_pin(adev); if (r) return r; +skip_pin_bo: r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1742,7 +1749,7 @@ static int gmc_v9_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool value; - int r, i; + int i; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -1777,9 +1784,7 @@ static int gmc_v9_0_hw_init(void *handle) if (adev->umc.funcs && adev->umc.funcs->init_registers) adev->umc.funcs->init_registers(adev); - r = gmc_v9_0_gart_enable(adev); - - return r; + return gmc_v9_0_gart_enable(adev); } /** @@ -1808,6 +1813,14 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + /* + * Pair the operations did in gmc_v9_0_hw_init and thus maintain + * a correct cached state for GMC. Otherwise, the "gate" again + * operation on S3 resuming will fail due to wrong cached state. + */ + if (adev->mmhub.funcs->update_power_gating) + adev->mmhub.funcs->update_power_gating(adev, false); + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index a99953833820..1da2ec692057 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -145,7 +145,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); @@ -302,10 +301,10 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return; - if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); - - } + if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) + amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GMC, + enable); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f80a14a1b82d..f5f7181f9af5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -165,7 +165,6 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 25f8e93e5ec3..3718ff610ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -267,7 +267,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a11d60ec6321..9e16da28505a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -194,7 +194,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c4ef822bbe8c..ff49eeaf7882 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -190,8 +190,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, 0); - tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 23b066bcffb2..0077e738db31 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -252,11 +252,12 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_write_trylock(&adev->reset_sem)) + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return; + down_write(&adev->reset_sem); + amdgpu_virt_fini_data_exchange(adev); - atomic_set(&adev->in_gpu_reset, 1); xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index bd3b23171579..f9aa4d0bb638 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -26,7 +26,7 @@ #define AI_MAILBOX_POLL_ACK_TIMEDOUT 500 #define AI_MAILBOX_POLL_MSG_TIMEDOUT 6000 -#define AI_MAILBOX_POLL_FLR_TIMEDOUT 5000 +#define AI_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define AI_MAILBOX_POLL_MSG_REP_MAX 11 enum idh_request { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index a35e6d87e537..477d0dde19c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -281,11 +281,12 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_write_trylock(&adev->reset_sem)) + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return; + down_write(&adev->reset_sem); + amdgpu_virt_fini_data_exchange(adev); - atomic_set(&adev->in_gpu_reset, 1); xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 1d8414c3fadb..8ce5b8ca1fd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -160,6 +160,7 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1); /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); @@ -275,10 +276,8 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev, tmp = navi10_ih_rb_cntl(ih, tmp); if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - if (ih == &adev->irq.ih1) { - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + if (ih == &adev->irq.ih1) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - } if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { @@ -319,7 +318,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; u32 ih_chicken; - u32 tmp; int ret; int i; @@ -363,15 +361,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell, ih[0]->doorbell_index); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); - tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, - CLIENT18_IS_STORM_CLIENT, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp); - - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL); - tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp); - pci_set_master(adev->pdev); /* enable interrupts */ @@ -420,12 +409,19 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - wptr = le32_to_cpu(*ih->wptr_cpu); - ih_regs = &ih->ih_regs; + if (ih == &adev->irq.ih) { + /* Only ring0 supports writeback. On other rings fall back + * to register-based code with overflow checking below. + */ + wptr = le32_to_cpu(*ih->wptr_cpu); - if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) - goto out; + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + } + ih_regs = &ih->ih_regs; + + /* Double check that the overflow wasn't already cleared. */ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; @@ -513,15 +509,11 @@ static int navi10_ih_self_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t wptr = cpu_to_le32(entry->src_data[0]); - switch (entry->ring_id) { case 1: - *adev->irq.ih1.wptr_cpu = wptr; schedule_work(&adev->irq.ih1_work); break; case 2: - *adev->irq.ih2.wptr_cpu = wptr; schedule_work(&adev->irq.ih2_work); break; default: break; @@ -724,6 +716,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { static const struct amdgpu_ih_funcs navi10_ih_funcs = { .get_wptr = navi10_ih_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = navi10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 4ecd2b5808ce..ee7cab37dfd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -359,6 +359,10 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); + + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1 diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 0d2d629e2d6a..4bbacf1be25a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -276,6 +276,10 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CI_CNTL, data); + + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 3c00666a13e1..37a4039fdfc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -273,7 +273,9 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { static void nbio_v7_0_init_registers(struct amdgpu_device *adev) { - + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 8f2a315e7c73..3444332ea110 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -371,6 +371,10 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data); } + + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index b8bd03d16dba..dc5e93756fea 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -362,7 +362,9 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { - + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) @@ -692,6 +694,9 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) { uint32_t def, data; + if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4)) + return; + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 59eafa31c626..2ec1ffb36b1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -183,6 +183,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, switch (adev->ip_versions[UVD_HWIP][0]) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): + case IP_VERSION(3, 0, 192): if (amdgpu_sriov_vf(adev)) { if (encode) *codecs = &sriov_sc_video_codecs_encode; @@ -731,8 +732,10 @@ static int nv_common_early_init(void *handle) #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; - adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; @@ -1032,7 +1035,7 @@ static int nv_common_hw_init(void *handle) * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) + if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ nv_enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 853d1511b889..81e033549dda 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -481,8 +481,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse * * @ring: amdgpu ring pointer - * @job: job to retrieve vmid from - * @ib: IB object to schedule * * flush the IB by graphics cache rinse. */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 4d4d1aa51b8a..4f546f632223 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -368,8 +368,6 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring, * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse * * @ring: amdgpu ring pointer - * @job: job to retrieve vmid from - * @ib: IB object to schedule * * flush the IB by graphics cache rinse. */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 0c316a2d42ed..0fc1747e4a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -744,7 +744,7 @@ static void soc15_reg_base_init(struct amdgpu_device *adev) vega10_reg_base_init(adev); break; case CHIP_RENOIR: - /* It's safe to do ip discovery here for Renior, + /* It's safe to do ip discovery here for Renoir, * it doesn't support SRIOV. */ if (amdgpu_discovery) { r = amdgpu_discovery_reg_base_init(adev); @@ -971,8 +971,10 @@ static int soc15_common_early_init(void *handle) #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; - adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; @@ -1236,7 +1238,9 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df.funcs->sw_init(adev); + if (adev->df.funcs && + adev->df.funcs->sw_init) + adev->df.funcs->sw_init(adev); return 0; } @@ -1248,7 +1252,10 @@ static int soc15_common_sw_fini(void *handle) if (adev->nbio.ras_funcs && adev->nbio.ras_funcs->ras_fini) adev->nbio.ras_funcs->ras_fini(adev); - adev->df.funcs->sw_fini(adev); + + if (adev->df.funcs && + adev->df.funcs->sw_fini) + adev->df.funcs->sw_fini(adev); return 0; } @@ -1285,7 +1292,7 @@ static int soc15_common_hw_init(void *handle) * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) + if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index f7ec3fe134e5..6dd1e19e8d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -50,6 +50,165 @@ static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst; } +static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; +} + +static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t channel_index, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt; + uint64_t mc_umc_status; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + /* + * select the lower chip and check the error count + * skip add error count, calc error counter only from mca_umc_status + */ + ecc_err_cnt = ras->umc_ecc.ecc[channel_index].ce_count_lo_chip; + + /* + * select the higher chip and check the err counter + * skip add error count, calc error counter only from mca_umc_status + */ + ecc_err_cnt = ras->umc_ecc.ecc[channel_index].ce_count_hi_chip; + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t channel_index, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + /* check the MCUMC_STATUS */ + mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + uint32_t channel_index = 0; + + /*TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC registers. Will add the protection */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, + ch_inst); + channel_index = get_umc_v6_7_channel_index(adev, + umc_inst, + ch_inst); + umc_v6_7_ecc_info_query_correctable_error_count(adev, + channel_index, + &(err_data->ce_count)); + umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, + channel_index, + &(err_data->ue_count)); + } +} + +static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status, err_addr, retired_page; + struct eeprom_table_record *err_rec; + uint32_t channel_index; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) + return; + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + + err_addr = ras->umc_ecc.ecc[channel_index].mca_umc_addr; + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) + == 1) { + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_inst; + + err_data->err_addr_cnt++; + } + } +} + +static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + /*TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC resgisters. Will add the protection + * when firmware interface is ready */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, + ch_inst); + umc_v6_7_ecc_info_query_error_address(adev, + err_data, + umc_reg_offset, + ch_inst, + umc_inst); + } +} + static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -327,4 +486,6 @@ const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { .query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_address = umc_v6_7_query_ras_error_address, .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, + .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, + .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index a9ca6988009e..3070466f54e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = vega10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index f51dfc38ac65..3b4eb8285943 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -688,6 +688,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega20_ih_funcs = { .get_wptr = vega20_ih_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = vega20_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index f6233019f042..d60576ce10cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -43,15 +43,15 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, */ if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && - dev->device_info->asic_family == CHIP_HAWAII) { + dev->adev->asic_type == CHIP_HAWAII) { struct cik_ih_ring_entry *tmp_ihre = (struct cik_ih_ring_entry *)patched_ihre; *patched_flag = true; *tmp_ihre = *ihre; - vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); - ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); + vmid = f2g->read_vmid_from_vmfault_reg(dev->adev); + ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid); tmp_ihre->ring_id &= 0x000000ff; tmp_ihre->ring_id |= vmid << 8; @@ -113,7 +113,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, kfd_process_vm_fault(dev->dqm, pasid); memset(&info, 0, sizeof(info)); - amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info); + amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info); if (!info.page_addr && !info.status) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 24ebd61395d8..4bfc0c8ab764 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -321,7 +321,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); - if (KFD_IS_SOC15(dev->device_info->asic_family)) + if (KFD_IS_SOC15(dev)) /* On SOC15 ASICs, include the doorbell offset within the * process doorbell frame, which is 2 pages. */ @@ -580,7 +580,7 @@ static int kfd_ioctl_dbg_register(struct file *filep, if (!dev) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); return -EINVAL; } @@ -631,7 +631,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, if (!dev || !dev->dbgmgr) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); return -EINVAL; } @@ -676,7 +676,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, if (!dev) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); return -EINVAL; } @@ -784,7 +784,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, if (!dev) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { + if (dev->adev->asic_type == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); return -EINVAL; } @@ -851,7 +851,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, dev = kfd_device_by_id(args->gpu_id); if (dev) /* Reading GPU clock counter from KGD */ - args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->adev); else /* Node without GPU resource */ args->gpu_clock_counter = 0; @@ -1041,7 +1041,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, goto out_unlock; } - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev, mem, &kern_addr, &size); if (err) { pr_err("Failed to map event page to kernel\n"); @@ -1051,7 +1051,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, err = kfd_event_page_set(p, kern_addr, size); if (err) { pr_err("Failed to set event page\n"); - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem); goto out_unlock; } @@ -1137,7 +1137,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) dev->kfd2kgd->set_scratch_backing_va( - dev->kgd, args->va_addr, pdd->qpd.vmid); + dev->adev, args->va_addr, pdd->qpd.vmid); return 0; @@ -1158,7 +1158,7 @@ static int kfd_ioctl_get_tile_config(struct file *filep, if (!dev) return -EINVAL; - amdgpu_amdkfd_get_tile_config(dev->kgd, &config); + amdgpu_amdkfd_get_tile_config(dev->adev, &config); args->gb_addr_config = config.gb_addr_config; args->num_banks = config.num_banks; @@ -1244,7 +1244,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) if (dev->use_iommu_v2) return false; - amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->adev, &mem_info); if (mem_info.local_mem_size_private == 0 && mem_info.local_mem_size_public > 0) return true; @@ -1313,7 +1313,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, err = -EINVAL; goto err_unlock; } - offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + offset = dev->adev->rmmio_remap.bus_addr; if (!offset) { err = -ENOMEM; goto err_unlock; @@ -1321,7 +1321,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, } err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( - dev->kgd, args->va_addr, args->size, + dev->adev, args->va_addr, args->size, pdd->drm_priv, (struct kgd_mem **) &mem, &offset, flags); @@ -1353,7 +1353,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); @@ -1399,7 +1399,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, goto err_unlock; } - ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, + ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, &size); /* If freeing the buffer failed, leave the handle in place for @@ -1484,7 +1484,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, + peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv, &table_freed); if (err) { pr_err("Failed to map to gpu %d/%d\n", @@ -1496,7 +1496,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, mutex_unlock(&p->mutex); - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; @@ -1593,7 +1593,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); + peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { pr_err("Failed to unmap from gpu %d/%d\n", i, args->n_devices); @@ -1603,8 +1603,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } mutex_unlock(&p->mutex); - if (dev->device_info->asic_family == CHIP_ALDEBARAN) { - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, + if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) { + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); @@ -1680,7 +1680,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, { struct kfd_ioctl_get_dmabuf_info_args *args = data; struct kfd_dev *dev = NULL; - struct kgd_dev *dma_buf_kgd; + struct amdgpu_device *dmabuf_adev; void *metadata_buffer = NULL; uint32_t flags; unsigned int i; @@ -1700,15 +1700,15 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, } /* Get dmabuf info from KGD */ - r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, - &dma_buf_kgd, &args->size, + r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, + &dmabuf_adev, &args->size, metadata_buffer, args->metadata_size, &args->metadata_size, &flags); if (r) goto exit; /* Reverse-lookup gpu_id from kgd pointer */ - dev = kfd_device_by_kgd(dma_buf_kgd); + dev = kfd_device_by_adev(dmabuf_adev); if (!dev) { r = -EINVAL; goto exit; @@ -1758,7 +1758,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, goto err_unlock; } - r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, + r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->adev, dmabuf, args->va_addr, pdd->drm_priv, (struct kgd_mem **)&mem, &size, NULL); @@ -1779,7 +1779,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); @@ -2066,7 +2066,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; - address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + address = dev->adev->rmmio_remap.bus_addr; vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cfedfb1e8596..f187596faf66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1340,7 +1340,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, int ret; unsigned int num_cu_shared; - switch (kdev->device_info->asic_family) { + switch (kdev->adev->asic_type) { case CHIP_KAVERI: pcache_info = kaveri_cache_info; num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); @@ -1377,67 +1377,71 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = vegam_cache_info; num_of_cache_types = ARRAY_SIZE(vegam_cache_info); break; - case CHIP_VEGA10: - pcache_info = vega10_cache_info; - num_of_cache_types = ARRAY_SIZE(vega10_cache_info); - break; - case CHIP_VEGA12: - pcache_info = vega12_cache_info; - num_of_cache_types = ARRAY_SIZE(vega12_cache_info); - break; - case CHIP_VEGA20: - case CHIP_ARCTURUS: - pcache_info = vega20_cache_info; - num_of_cache_types = ARRAY_SIZE(vega20_cache_info); - break; - case CHIP_ALDEBARAN: - pcache_info = aldebaran_cache_info; - num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); - break; - case CHIP_RAVEN: - pcache_info = raven_cache_info; - num_of_cache_types = ARRAY_SIZE(raven_cache_info); - break; - case CHIP_RENOIR: - pcache_info = renoir_cache_info; - num_of_cache_types = ARRAY_SIZE(renoir_cache_info); - break; - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_CYAN_SKILLFISH: - pcache_info = navi10_cache_info; - num_of_cache_types = ARRAY_SIZE(navi10_cache_info); - break; - case CHIP_NAVI14: - pcache_info = navi14_cache_info; - num_of_cache_types = ARRAY_SIZE(navi14_cache_info); - break; - case CHIP_SIENNA_CICHLID: - pcache_info = sienna_cichlid_cache_info; - num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); - break; - case CHIP_NAVY_FLOUNDER: - pcache_info = navy_flounder_cache_info; - num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); - break; - case CHIP_DIMGREY_CAVEFISH: - pcache_info = dimgrey_cavefish_cache_info; - num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); - break; - case CHIP_VANGOGH: - pcache_info = vangogh_cache_info; - num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); - break; - case CHIP_BEIGE_GOBY: - pcache_info = beige_goby_cache_info; - num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); - break; - case CHIP_YELLOW_CARP: - pcache_info = yellow_carp_cache_info; - num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); - break; default: - return -EINVAL; + switch(KFD_GC_VERSION(kdev)) { + case IP_VERSION(9, 0, 1): + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case IP_VERSION(9, 2, 1): + pcache_info = vega12_cache_info; + num_of_cache_types = ARRAY_SIZE(vega12_cache_info); + break; + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + pcache_info = vega20_cache_info; + num_of_cache_types = ARRAY_SIZE(vega20_cache_info); + break; + case IP_VERSION(9, 4, 2): + pcache_info = aldebaran_cache_info; + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); + break; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 2): + pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; + case IP_VERSION(9, 3, 0): + pcache_info = renoir_cache_info; + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); + break; + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + pcache_info = navi10_cache_info; + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); + break; + case IP_VERSION(10, 1, 1): + pcache_info = navi14_cache_info; + num_of_cache_types = ARRAY_SIZE(navi14_cache_info); + break; + case IP_VERSION(10, 3, 0): + pcache_info = sienna_cichlid_cache_info; + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); + break; + case IP_VERSION(10, 3, 2): + pcache_info = navy_flounder_cache_info; + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); + break; + case IP_VERSION(10, 3, 4): + pcache_info = dimgrey_cavefish_cache_info; + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); + break; + case IP_VERSION(10, 3, 1): + pcache_info = vangogh_cache_info; + num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); + break; + case IP_VERSION(10, 3, 5): + pcache_info = beige_goby_cache_info; + num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); + break; + case IP_VERSION(10, 3, 3): + pcache_info = yellow_carp_cache_info; + num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); + break; + default: + return -EINVAL; + } } *size_filled = 0; @@ -1963,8 +1967,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain) { - struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd; - *avail_size -= sizeof(struct crat_subtype_iolink); if (*avail_size < 0) return -ENOMEM; @@ -1981,7 +1983,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, /* Fill in IOLINK subtype. * TODO: Fill-in other fields of iolink subtype */ - if (adev->gmc.xgmi.connected_to_cpu) { + if (kdev->adev->gmc.xgmi.connected_to_cpu) { /* * with host gpu xgmi link, host can access gpu memory whether * or not pcie bar type is large, so always create bidirectional @@ -1990,19 +1992,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->num_hops_xgmi = 1; - if (adev->asic_type == CHIP_ALDEBARAN) { + if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) { sub_type_hdr->minimum_bandwidth_mbs = amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( - kdev->kgd, NULL, true); + kdev->adev, NULL, true); sub_type_hdr->maximum_bandwidth_mbs = sub_type_hdr->minimum_bandwidth_mbs; } } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; sub_type_hdr->minimum_bandwidth_mbs = - amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true); + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false); + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); } sub_type_hdr->proximity_domain_from = proximity_domain; @@ -2044,11 +2046,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; sub_type_hdr->num_hops_xgmi = - amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); + amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false); + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false); sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0; + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; return 0; } @@ -2114,7 +2116,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; cu->proximity_domain = proximity_domain; - amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; cu->max_waves_simd = cu_info.max_waves_per_simd; @@ -2145,7 +2147,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(kdev->adev, &local_mem_info); sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 159add0f5aaa..1e30717b5253 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -41,7 +41,7 @@ static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) { - dev->kfd2kgd->address_watch_disable(dev->kgd); + dev->kfd2kgd->address_watch_disable(dev->adev); } static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, @@ -322,7 +322,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); pdd->dev->kfd2kgd->address_watch_execute( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, cntl.u32All, addrHi.u32All, @@ -420,7 +420,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_CNTL); @@ -431,7 +431,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_ADDR_HI); @@ -441,7 +441,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_ADDR_LO); @@ -457,7 +457,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword = dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->kgd, + dbgdev->dev->adev, i, ADDRESS_WATCH_REG_CNTL); @@ -752,7 +752,7 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, + return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev, reg_gfx_index.u32All, reg_sq_cmd.u32All); } @@ -784,7 +784,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info - (dev->kgd, vmid, &queried_pasid); + (dev->adev, vmid, &queried_pasid); if (status && queried_pasid == p->pasid) { pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", @@ -811,7 +811,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) /* for non DIQ we need to patch the VMID: */ reg_sq_cmd.bits.vm_id = vmid; - dev->kfd2kgd->wave_control_execute(dev->kgd, + dev->kfd2kgd->wave_control_execute(dev->adev, reg_gfx_index.u32All, reg_sq_cmd.u32All); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3b119db16003..facc28f58c1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -53,770 +53,248 @@ extern const struct kfd2kgd_calls aldebaran_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; -#ifdef KFD_SUPPORT_IOMMU_V2 -static const struct kfd_device_info kaveri_device_info = { - .asic_family = CHIP_KAVERI, - .asic_name = "kaveri", - .gfx_target_version = 70000, - .max_pasid_bits = 16, - /* max num of queues for KV.TODO should be a dynamic value */ - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = true, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info carrizo_device_info = { - .asic_family = CHIP_CARRIZO, - .asic_name = "carrizo", - .gfx_target_version = 80001, - .max_pasid_bits = 16, - /* max num of queues for CZ.TODO should be a dynamic value */ - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = true, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info raven_device_info = { - .asic_family = CHIP_RAVEN, - .asic_name = "raven", - .gfx_target_version = 90002, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = true, - .needs_pci_atomics = true, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; -#endif - -#ifdef CONFIG_DRM_AMDGPU_CIK -static const struct kfd_device_info hawaii_device_info = { - .asic_family = CHIP_HAWAII, - .asic_name = "hawaii", - .gfx_target_version = 70001, - .max_pasid_bits = 16, - /* max num of queues for KV.TODO should be a dynamic value */ - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; -#endif - -static const struct kfd_device_info tonga_device_info = { - .asic_family = CHIP_TONGA, - .asic_name = "tonga", - .gfx_target_version = 80002, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info fiji_device_info = { - .asic_family = CHIP_FIJI, - .asic_name = "fiji", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info fiji_vf_device_info = { - .asic_family = CHIP_FIJI, - .asic_name = "fiji", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - - -static const struct kfd_device_info polaris10_device_info = { - .asic_family = CHIP_POLARIS10, - .asic_name = "polaris10", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris10_vf_device_info = { - .asic_family = CHIP_POLARIS10, - .asic_name = "polaris10", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris11_device_info = { - .asic_family = CHIP_POLARIS11, - .asic_name = "polaris11", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris12_device_info = { - .asic_family = CHIP_POLARIS12, - .asic_name = "polaris12", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vegam_device_info = { - .asic_family = CHIP_VEGAM, - .asic_name = "vegam", - .gfx_target_version = 80003, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega10_device_info = { - .asic_family = CHIP_VEGA10, - .asic_name = "vega10", - .gfx_target_version = 90000, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega10_vf_device_info = { - .asic_family = CHIP_VEGA10, - .asic_name = "vega10", - .gfx_target_version = 90000, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega12_device_info = { - .asic_family = CHIP_VEGA12, - .asic_name = "vega12", - .gfx_target_version = 90004, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega20_device_info = { - .asic_family = CHIP_VEGA20, - .asic_name = "vega20", - .gfx_target_version = 90006, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info arcturus_device_info = { - .asic_family = CHIP_ARCTURUS, - .asic_name = "arcturus", - .gfx_target_version = 90008, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 6, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info aldebaran_device_info = { - .asic_family = CHIP_ALDEBARAN, - .asic_name = "aldebaran", - .gfx_target_version = 90010, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 3, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info renoir_device_info = { - .asic_family = CHIP_RENOIR, - .asic_name = "renoir", - .gfx_target_version = 90012, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info navi10_device_info = { - .asic_family = CHIP_NAVI10, - .asic_name = "navi10", - .gfx_target_version = 100100, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 145, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navi12_device_info = { - .asic_family = CHIP_NAVI12, - .asic_name = "navi12", - .gfx_target_version = 100101, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 145, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navi14_device_info = { - .asic_family = CHIP_NAVI14, - .asic_name = "navi14", - .gfx_target_version = 100102, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 145, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info sienna_cichlid_device_info = { - .asic_family = CHIP_SIENNA_CICHLID, - .asic_name = "sienna_cichlid", - .gfx_target_version = 100300, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 4, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navy_flounder_device_info = { - .asic_family = CHIP_NAVY_FLOUNDER, - .asic_name = "navy_flounder", - .gfx_target_version = 100301, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info vangogh_device_info = { - .asic_family = CHIP_VANGOGH, - .asic_name = "vangogh", - .gfx_target_version = 100303, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info dimgrey_cavefish_device_info = { - .asic_family = CHIP_DIMGREY_CAVEFISH, - .asic_name = "dimgrey_cavefish", - .gfx_target_version = 100302, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info beige_goby_device_info = { - .asic_family = CHIP_BEIGE_GOBY, - .asic_name = "beige_goby", - .gfx_target_version = 100304, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info yellow_carp_device_info = { - .asic_family = CHIP_YELLOW_CARP, - .asic_name = "yellow_carp", - .gfx_target_version = 100305, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .no_atomic_fw_version = 92, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info cyan_skillfish_device_info = { - .asic_family = CHIP_CYAN_SKILLFISH, - .asic_name = "cyan_skillfish", - .gfx_target_version = 100103, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = true, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) +static void kfd_device_info_init(struct kfd_dev *kfd, + bool vf, uint32_t gfx_target_version) { - struct kfd_dev *kfd; - const struct kfd_device_info *device_info; - const struct kfd2kgd_calls *f2g; - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t gc_version = KFD_GC_VERSION(kfd); + uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; + uint32_t asic_type = kfd->adev->asic_type; + + kfd->device_info.max_pasid_bits = 16; + kfd->device_info.max_no_of_hqd = 24; + kfd->device_info.num_of_watch_points = 4; + kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED; + kfd->device_info.gfx_target_version = gfx_target_version; + + if (KFD_IS_SOC15(kfd)) { + kfd->device_info.doorbell_size = 8; + kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); + kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; + kfd->device_info.supports_cwsr = true; + + if ((sdma_version >= IP_VERSION(4, 0, 0) && + sdma_version <= IP_VERSION(4, 2, 0)) || + sdma_version == IP_VERSION(5, 2, 1) || + sdma_version == IP_VERSION(5, 2, 3)) + kfd->device_info.num_sdma_queues_per_engine = 2; + else + kfd->device_info.num_sdma_queues_per_engine = 8; + + /* Raven */ + if (gc_version == IP_VERSION(9, 1, 0) || + gc_version == IP_VERSION(9, 2, 2)) + kfd->device_info.needs_iommu_device = true; + + if (gc_version < IP_VERSION(11, 0, 0)) { + /* Navi2x+, Navi1x+ */ + if (gc_version >= IP_VERSION(10, 3, 0)) + kfd->device_info.no_atomic_fw_version = 92; + else if (gc_version >= IP_VERSION(10, 1, 1)) + kfd->device_info.no_atomic_fw_version = 145; + + /* Navi1x+ */ + if (gc_version >= IP_VERSION(10, 1, 1)) + kfd->device_info.needs_pci_atomics = true; + } + } else { + kfd->device_info.doorbell_size = 4; + kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t); + kfd->device_info.event_interrupt_class = &event_interrupt_class_cik; + kfd->device_info.num_sdma_queues_per_engine = 2; + + if (asic_type != CHIP_KAVERI && + asic_type != CHIP_HAWAII && + asic_type != CHIP_TONGA) + kfd->device_info.supports_cwsr = true; + + if (asic_type == CHIP_KAVERI || + asic_type == CHIP_CARRIZO) + kfd->device_info.needs_iommu_device = true; + + if (asic_type != CHIP_HAWAII && !vf) + kfd->device_info.needs_pci_atomics = true; + } +} + +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) +{ + struct kfd_dev *kfd = NULL; + const struct kfd2kgd_calls *f2g = NULL; struct pci_dev *pdev = adev->pdev; + uint32_t gfx_target_version = 0; switch (adev->asic_type) { #ifdef KFD_SUPPORT_IOMMU_V2 #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: - if (vf) - device_info = NULL; - else - device_info = &kaveri_device_info; - f2g = &gfx_v7_kfd2kgd; + gfx_target_version = 70000; + if (!vf) + f2g = &gfx_v7_kfd2kgd; break; #endif case CHIP_CARRIZO: - if (vf) - device_info = NULL; - else - device_info = &carrizo_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80001; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; #endif #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_HAWAII: - if (vf) - device_info = NULL; - else - device_info = &hawaii_device_info; - f2g = &gfx_v7_kfd2kgd; + gfx_target_version = 70001; + if (!amdgpu_exp_hw_support) + pr_info( + "KFD support on Hawaii is experimental. See modparam exp_hw_support\n" + ); + else if (!vf) + f2g = &gfx_v7_kfd2kgd; break; #endif case CHIP_TONGA: - if (vf) - device_info = NULL; - else - device_info = &tonga_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80002; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; case CHIP_FIJI: - if (vf) - device_info = &fiji_vf_device_info; - else - device_info = &fiji_device_info; + gfx_target_version = 80003; f2g = &gfx_v8_kfd2kgd; break; case CHIP_POLARIS10: - if (vf) - device_info = &polaris10_vf_device_info; - else - device_info = &polaris10_device_info; + gfx_target_version = 80003; f2g = &gfx_v8_kfd2kgd; break; case CHIP_POLARIS11: - if (vf) - device_info = NULL; - else - device_info = &polaris11_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80003; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; case CHIP_POLARIS12: - if (vf) - device_info = NULL; - else - device_info = &polaris12_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80003; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; case CHIP_VEGAM: - if (vf) - device_info = NULL; - else - device_info = &vegam_device_info; - f2g = &gfx_v8_kfd2kgd; + gfx_target_version = 80003; + if (!vf) + f2g = &gfx_v8_kfd2kgd; break; default: switch (adev->ip_versions[GC_HWIP][0]) { + /* Vega 10 */ case IP_VERSION(9, 0, 1): - if (vf) - device_info = &vega10_vf_device_info; - else - device_info = &vega10_device_info; + gfx_target_version = 90000; f2g = &gfx_v9_kfd2kgd; break; #ifdef KFD_SUPPORT_IOMMU_V2 + /* Raven */ case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): - if (vf) - device_info = NULL; - else - device_info = &raven_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90002; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; #endif + /* Vega12 */ case IP_VERSION(9, 2, 1): - if (vf) - device_info = NULL; - else - device_info = &vega12_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90004; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; + /* Renoir */ case IP_VERSION(9, 3, 0): - if (vf) - device_info = NULL; - else - device_info = &renoir_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90012; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; + /* Vega20 */ case IP_VERSION(9, 4, 0): - if (vf) - device_info = NULL; - else - device_info = &vega20_device_info; - f2g = &gfx_v9_kfd2kgd; + gfx_target_version = 90006; + if (!vf) + f2g = &gfx_v9_kfd2kgd; break; + /* Arcturus */ case IP_VERSION(9, 4, 1): - device_info = &arcturus_device_info; + gfx_target_version = 90008; f2g = &arcturus_kfd2kgd; break; + /* Aldebaran */ case IP_VERSION(9, 4, 2): - device_info = &aldebaran_device_info; + gfx_target_version = 90010; f2g = &aldebaran_kfd2kgd; break; + /* Navi10 */ case IP_VERSION(10, 1, 10): - if (vf) - device_info = NULL; - else - device_info = &navi10_device_info; - f2g = &gfx_v10_kfd2kgd; + gfx_target_version = 100100; + if (!vf) + f2g = &gfx_v10_kfd2kgd; break; + /* Navi12 */ case IP_VERSION(10, 1, 2): - device_info = &navi12_device_info; + gfx_target_version = 100101; f2g = &gfx_v10_kfd2kgd; break; + /* Navi14 */ case IP_VERSION(10, 1, 1): - if (vf) - device_info = NULL; - else - device_info = &navi14_device_info; - f2g = &gfx_v10_kfd2kgd; + gfx_target_version = 100102; + if (!vf) + f2g = &gfx_v10_kfd2kgd; break; + /* Cyan Skillfish */ case IP_VERSION(10, 1, 3): - if (vf) - device_info = NULL; - else - device_info = &cyan_skillfish_device_info; - f2g = &gfx_v10_kfd2kgd; + gfx_target_version = 100103; + if (!vf) + f2g = &gfx_v10_kfd2kgd; break; + /* Sienna Cichlid */ case IP_VERSION(10, 3, 0): - device_info = &sienna_cichlid_device_info; + gfx_target_version = 100300; f2g = &gfx_v10_3_kfd2kgd; break; + /* Navy Flounder */ case IP_VERSION(10, 3, 2): - device_info = &navy_flounder_device_info; + gfx_target_version = 100301; f2g = &gfx_v10_3_kfd2kgd; break; + /* Van Gogh */ case IP_VERSION(10, 3, 1): - if (vf) - device_info = NULL; - else - device_info = &vangogh_device_info; - f2g = &gfx_v10_3_kfd2kgd; + gfx_target_version = 100303; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; break; + /* Dimgrey Cavefish */ case IP_VERSION(10, 3, 4): - device_info = &dimgrey_cavefish_device_info; + gfx_target_version = 100302; f2g = &gfx_v10_3_kfd2kgd; break; + /* Beige Goby */ case IP_VERSION(10, 3, 5): - device_info = &beige_goby_device_info; + gfx_target_version = 100304; f2g = &gfx_v10_3_kfd2kgd; break; + /* Yellow Carp */ case IP_VERSION(10, 3, 3): - if (vf) - device_info = NULL; - else - device_info = &yellow_carp_device_info; - f2g = &gfx_v10_3_kfd2kgd; + gfx_target_version = 100305; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; break; default: - return NULL; + break; } break; } - if (!device_info || !f2g) { - dev_err(kfd_device, "%s %s not supported in kfd\n", - amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); + if (!f2g) { + if (adev->ip_versions[GC_HWIP][0]) + dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n", + adev->ip_versions[GC_HWIP][0], vf ? "VF" : ""); + else + dev_err(kfd_device, "%s %s not supported in kfd\n", + amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); return NULL; } @@ -824,8 +302,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) if (!kfd) return NULL; - kfd->kgd = kgd; - kfd->device_info = device_info; + kfd->adev = adev; + kfd_device_info_init(kfd, vf, gfx_target_version); kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; @@ -844,24 +322,24 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) static void kfd_cwsr_init(struct kfd_dev *kfd) { - if (cwsr_enable && kfd->device_info->supports_cwsr) { - if (kfd->device_info->asic_family < CHIP_VEGA10) { + if (cwsr_enable && kfd->device_info.supports_cwsr) { + if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx8_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); - } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_arcturus_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); - } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) { BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_aldebaran_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); - } else if (kfd->device_info->asic_family < CHIP_NAVI10) { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx9_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); - } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) { BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_nv1x_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); @@ -882,18 +360,17 @@ static int kfd_gws_init(struct kfd_dev *kfd) if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) return 0; - if (hws_gws_support - || (kfd->device_info->asic_family == CHIP_VEGA10 - && kfd->mec2_fw_version >= 0x81b3) - || (kfd->device_info->asic_family >= CHIP_VEGA12 - && kfd->device_info->asic_family <= CHIP_RAVEN - && kfd->mec2_fw_version >= 0x1b3) - || (kfd->device_info->asic_family == CHIP_ARCTURUS - && kfd->mec2_fw_version >= 0x30) - || (kfd->device_info->asic_family == CHIP_ALDEBARAN - && kfd->mec2_fw_version >= 0x28)) - ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, - amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); + if (hws_gws_support || (KFD_IS_SOC15(kfd) && + ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) + && kfd->mec2_fw_version >= 0x81b3) || + (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) + && kfd->mec2_fw_version >= 0x1b3) || + (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) + && kfd->mec2_fw_version >= 0x30) || + (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) + && kfd->mec2_fw_version >= 0x28)))) + ret = amdgpu_amdkfd_alloc_gws(kfd->adev, + kfd->adev->gds.gws_size, &kfd->gws); return ret; } @@ -910,11 +387,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, unsigned int size, map_process_packet_size; kfd->ddev = ddev; - kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); - kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC2); - kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; @@ -927,16 +404,16 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * 32 and 64-bit requests are possible and must be * supported. */ - kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); + kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); if (!kfd->pci_atomic_requested && - kfd->device_info->needs_pci_atomics && - (!kfd->device_info->no_atomic_fw_version || - kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { + kfd->device_info.needs_pci_atomics && + (!kfd->device_info.no_atomic_fw_version || + kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { dev_info(kfd_device, "skipped device %x:%x, PCI rejects atomics %d<%d\n", kfd->pdev->vendor, kfd->pdev->device, kfd->mec_fw_version, - kfd->device_info->no_atomic_fw_version); + kfd->device_info.no_atomic_fw_version); return false; } @@ -953,16 +430,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * - kfd->device_info->mqd_size_aligned; + kfd->device_info.mqd_size_aligned; /* * calculate max size of runlist packet. * There can be only 2 packets at once */ - map_process_packet_size = - kfd->device_info->asic_family == CHIP_ALDEBARAN ? + map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ? sizeof(struct pm4_mes_map_process_aldebaran) : - sizeof(struct pm4_mes_map_process); + sizeof(struct pm4_mes_map_process); size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) + sizeof(struct pm4_mes_runlist)) * 2; @@ -974,7 +450,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, size += 512 * 1024; if (amdgpu_amdkfd_alloc_gtt_mem( - kfd->kgd, size, &kfd->gtt_mem, + kfd->adev, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); @@ -995,9 +471,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); + kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; - kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); + kfd->noretry = kfd->adev->gmc.noretry; if (kfd_interrupt_init(kfd)) { dev_err(kfd_device, "Error initializing interrupts\n"); @@ -1015,7 +491,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, */ if (kfd_gws_init(kfd)) { dev_err(kfd_device, "Could not allocate %d gws\n", - amdgpu_amdkfd_get_num_gws(kfd->kgd)); + kfd->adev->gds.gws_size); goto gws_error; } @@ -1030,7 +506,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd); - svm_migrate_init((struct amdgpu_device *)kfd->kgd); + svm_migrate_init(kfd->adev); if(kgd2kfd_resume_iommu(kfd)) goto device_iommu_error; @@ -1068,10 +544,10 @@ kfd_interrupt_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); alloc_gtt_mem_failure: if (kfd->gws) - amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); + amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -1088,9 +564,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); if (kfd->gws) - amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); + amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); } kfree(kfd); @@ -1229,7 +705,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) if (!kfd->init_complete) return; - if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { + if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) { dev_err_once(kfd_device, "Ring entry too small\n"); return; } @@ -1526,7 +1002,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) void kfd_inc_compute_active(struct kfd_dev *kfd) { if (atomic_inc_return(&kfd->compute_profile) == 1) - amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); + amdgpu_amdkfd_set_compute_idle(kfd->adev, false); } void kfd_dec_compute_active(struct kfd_dev *kfd) @@ -1534,7 +1010,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd) int count = atomic_dec_return(&kfd->compute_profile); if (count == 0) - amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); + amdgpu_amdkfd_set_compute_idle(kfd->adev, true); WARN_ONCE(count < 0, "Compute profile ref. count error"); } @@ -1544,6 +1020,26 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); } +/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and + * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA. + * When the device has more than two engines, we reserve two for PCIe to enable + * full-duplex and the rest are used as XGMI. + */ +unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev) +{ + /* If XGMI is not supported, all SDMA engines are PCIe */ + if (!kdev->adev->gmc.xgmi.supported) + return kdev->adev->sdma.num_instances; + + return min(kdev->adev->sdma.num_instances, 2); +} + +unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev) +{ + /* After reserved for PCIe, the rest of engines are XGMI */ + return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 93e33dd84dd4..dd0b952f0173 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -99,38 +99,29 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) return dqm->dev->shared_resources.num_pipe_per_mec; } -static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) -{ - return dqm->dev->device_info->num_sdma_engines; -} - -static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) -{ - return dqm->dev->device_info->num_xgmi_sdma_engines; -} - static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) { - return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm); + return kfd_get_num_sdma_engines(dqm->dev) + + kfd_get_num_xgmi_sdma_engines(dqm->dev); } unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { - return dqm->dev->device_info->num_sdma_engines - * dqm->dev->device_info->num_sdma_queues_per_engine; + return kfd_get_num_sdma_engines(dqm->dev) * + dqm->dev->device_info.num_sdma_queues_per_engine; } unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) { - return dqm->dev->device_info->num_xgmi_sdma_engines - * dqm->dev->device_info->num_sdma_queues_per_engine; + return kfd_get_num_xgmi_sdma_engines(dqm->dev) * + dqm->dev->device_info.num_sdma_queues_per_engine; } void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return dqm->dev->kfd2kgd->program_sh_mem_settings( - dqm->dev->kgd, qpd->vmid, + dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, @@ -157,7 +148,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { struct kfd_dev *dev = qpd->dqm->dev; - if (!KFD_IS_SOC15(dev->device_info->asic_family)) { + if (!KFD_IS_SOC15(dev)) { /* On pre-SOC15 chips we need to use the queue ID to * preserve the user mode ABI. */ @@ -202,7 +193,7 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, unsigned int old; struct kfd_dev *dev = qpd->dqm->dev; - if (!KFD_IS_SOC15(dev->device_info->asic_family) || + if (!KFD_IS_SOC15(dev) || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; @@ -216,7 +207,7 @@ static void program_trap_handler_settings(struct device_queue_manager *dqm, { if (dqm->dev->kfd2kgd->program_trap_handler_settings) dqm->dev->kfd2kgd->program_trap_handler_settings( - dqm->dev->kgd, qpd->vmid, + dqm->dev->adev, qpd->vmid, qpd->tba_addr, qpd->tma_addr); } @@ -250,21 +241,20 @@ static int allocate_vmid(struct device_queue_manager *dqm, program_sh_mem_settings(dqm, qpd); - if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 && - dqm->dev->cwsr_enabled) + if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) program_trap_handler_settings(dqm, qpd); /* qpd->page_table_base is set earlier when register_process() * is called, i.e. when the first queue is created. */ - dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, + dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); if (dqm->dev->kfd2kgd->set_scratch_backing_va) - dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, + dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, qpd->sh_hidden_private_base, qpd->vmid); return 0; @@ -283,7 +273,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, if (ret) return ret; - return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, qpd->ib_base, (uint32_t *)qpd->ib_kaddr, pmf->release_mem_size / sizeof(uint32_t)); } @@ -293,7 +283,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct queue *q) { /* On GFX v7, CP doesn't flush TC at dequeue */ - if (q->device->device_info->asic_family == CHIP_HAWAII) + if (q->device->adev->asic_type == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) pr_err("Failed to flush TC\n"); @@ -776,7 +766,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, if (!list_empty(&qpd->queues_list)) { dqm->dev->kfd2kgd->set_vm_context_page_table_base( - dqm->dev->kgd, + dqm->dev->adev, qpd->vmid, qpd->page_table_base); kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); @@ -954,7 +944,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid) { return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( - dqm->dev->kgd, pasid, vmid); + dqm->dev->adev, pasid, vmid); } static void init_interrupts(struct device_queue_manager *dqm) @@ -963,7 +953,7 @@ static void init_interrupts(struct device_queue_manager *dqm) for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) if (is_pipe_enabled(dqm, 0, i)) - dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); + dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); } static int initialize_nocpsch(struct device_queue_manager *dqm) @@ -1017,7 +1007,7 @@ static int start_nocpsch(struct device_queue_manager *dqm) pr_info("SW scheduler is used"); init_interrupts(dqm); - if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + if (dqm->dev->adev->asic_type == CHIP_HAWAII) return pm_init(&dqm->packet_mgr, dqm); dqm->sched_running = true; @@ -1026,7 +1016,7 @@ static int start_nocpsch(struct device_queue_manager *dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { - if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + if (dqm->dev->adev->asic_type == CHIP_HAWAII) pm_uninit(&dqm->packet_mgr, false); dqm->sched_running = false; @@ -1055,9 +1045,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, dqm->sdma_bitmap &= ~(1ULL << bit); q->sdma_id = bit; q->properties.sdma_engine_id = q->sdma_id % - get_num_sdma_engines(dqm); + kfd_get_num_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / - get_num_sdma_engines(dqm); + kfd_get_num_sdma_engines(dqm->dev); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { if (dqm->xgmi_sdma_bitmap == 0) { pr_err("No more XGMI SDMA queue to allocate\n"); @@ -1072,10 +1062,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, * assumes the first N engines are always * PCIe-optimized ones */ - q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + - q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_engine_id = + kfd_get_num_sdma_engines(dqm->dev) + + q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / - get_num_xgmi_sdma_engines(dqm); + kfd_get_num_xgmi_sdma_engines(dqm->dev); } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1132,7 +1123,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) res.queue_mask |= 1ull << amdgpu_queue_mask_bit_to_set_resource_bit( - (struct amdgpu_device *)dqm->dev->kgd, i); + dqm->dev->adev, i); } res.gws_mask = ~0ull; res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; @@ -1847,10 +1838,10 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * get_num_all_sdma_engines(dqm) * - dev->device_info->num_sdma_queues_per_engine + + dev->device_info.num_sdma_queues_per_engine + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), (void *)&(mem_obj->cpu_ptr), false); @@ -1867,7 +1858,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (!dqm) return NULL; - switch (dev->device_info->asic_family) { + switch (dev->adev->asic_type) { /* HWS is not available on Hawaii. */ case CHIP_HAWAII: /* HWS depends on CWSR for timely dequeue. CWSR is not @@ -1930,7 +1921,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) goto out_free; } - switch (dev->device_info->asic_family) { + switch (dev->adev->asic_type) { case CHIP_CARRIZO: device_queue_manager_init_vi(&dqm->asic_ops); break; @@ -1952,31 +1943,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - device_queue_manager_init_v9(&dqm->asic_ops); - break; - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - device_queue_manager_init_v10_navi10(&dqm->asic_ops); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - goto out_free; + if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) + device_queue_manager_init_v10_navi10(&dqm->asic_ops); + else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) + device_queue_manager_init_v9(&dqm->asic_ops); + else { + WARN(1, "Unexpected ASIC family %u", + dev->adev->asic_type); + goto out_free; + } } if (init_mqd_managers(dqm)) @@ -2000,7 +1976,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) @@ -2031,7 +2007,7 @@ static void kfd_process_hw_exception(struct work_struct *work) { struct device_queue_manager *dqm = container_of(work, struct device_queue_manager, hw_exception_work); - amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); + amdgpu_amdkfd_gpu_reset(dqm->dev->adev); } #if defined(CONFIG_DEBUG_FS) @@ -2070,7 +2046,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) return 0; } - r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); if (!r) { @@ -2092,7 +2068,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) continue; r = dqm->dev->kfd2kgd->hqd_dump( - dqm->dev->kgd, pipe, queue, &dump, &n_regs); + dqm->dev->adev, pipe, queue, &dump, &n_regs); if (r) break; @@ -2106,10 +2082,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { for (queue = 0; - queue < dqm->dev->device_info->num_sdma_queues_per_engine; + queue < dqm->dev->device_info.num_sdma_queues_per_engine; queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( - dqm->dev->kgd, pipe, queue, &dump, &n_regs); + dqm->dev->adev, pipe, queue, &dump, &n_regs); if (r) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index b5c3d13643f1..f20434d9980e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) { + if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) { /* Aldebaran can safely support different XNACK modes * per process */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 768d153acff4..0dbcf54657ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -48,7 +48,7 @@ /* # of doorbell bytes allocated for each process. */ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(kfd->device_info->doorbell_size * + return roundup(kfd->device_info.doorbell_size * KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, PAGE_SIZE); } @@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; - inx *= kfd->device_info->doorbell_size / sizeof(u32); + inx *= kfd->device_info.doorbell_size / sizeof(u32); /* * Calculating the kernel doorbell offset using the first @@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) unsigned int inx; inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) - * sizeof(u32) / kfd->device_info->doorbell_size; + * sizeof(u32) / kfd->device_info.doorbell_size; mutex_lock(&kfd->doorbell_mutex); __clear_bit(inx, kfd->doorbell_available_index); @@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, return kfd->doorbell_base_dw_offset + pdd->doorbell_index * kfd_doorbell_process_slice(kfd) / sizeof(u32) + - doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); + doorbell_id * kfd->device_info.doorbell_size / sizeof(u32); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 3eea4edee355..afe72dd11325 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -935,8 +935,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, /* Workaround on Raven to not kill the process when memory is freed * before IOMMU is able to finish processing all the excessive PPRs */ - if (dev->device_info->asic_family != CHIP_RAVEN && - dev->device_info->asic_family != CHIP_RENOIR) { + + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) { mutex_lock(&p->event_mutex); /* Lookup events by type and signal them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index d1388896f9c1..2e2b7ceb71db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -394,7 +394,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - switch (dev->device_info->asic_family) { + switch (dev->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: case CHIP_CARRIZO: @@ -406,29 +406,14 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - kfd_init_apertures_v9(pdd, id); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - return -EINVAL; + if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) + kfd_init_apertures_v9(pdd, id); + else { + WARN(1, "Unexpected ASIC family %u", + dev->adev->asic_type); + return -EINVAL; + } } if (!dev->use_iommu_v2) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 543e7ea75593..deb64168c9e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -135,7 +135,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, *patched_flag = true; memcpy(patched_ihre, ih_ring_entry, - dev->device_info->ih_ring_entry_size); + dev->device_info.ih_ring_entry_size); pasid = dev->dqm->vmid_pasid[vmid]; @@ -231,7 +231,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { kfd_signal_poison_consumed_event(dev, pasid); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd); + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev); return; } break; @@ -253,7 +253,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); } else if (source_id == SOC15_INTSRC_SDMA_ECC) { kfd_signal_poison_consumed_event(dev, pasid); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd); + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev); return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index bc47f6a44456..81887c2013c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd) int r; r = kfifo_alloc(&kfd->ih_fifo, - KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, + KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size, GFP_KERNEL); if (r) { dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); @@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) int count; count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - if (count != kfd->device_info->ih_ring_entry_size) { + kfd->device_info.ih_ring_entry_size); + if (count != kfd->device_info.ih_ring_entry_size) { dev_err_ratelimited(kfd_chardev(), "Interrupt ring overflow, dropping interrupt %d\n", count); @@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) int count; count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); + kfd->device_info.ih_ring_entry_size); - WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); + WARN_ON(count && count != kfd->device_info.ih_ring_entry_size); - return count == kfd->device_info->ih_ring_entry_size; + return count == kfd->device_info.ih_ring_entry_size; } static void interrupt_wq(struct work_struct *work) @@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work) interrupt_work); uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { + if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { dev_err_once(kfd_chardev(), "Ring entry too small\n"); return; } while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - dev->device_info->event_interrupt_class->interrupt_wq(dev, + dev->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); } @@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev, /* integer and bitwise OR so there is no boolean short-circuiting */ unsigned int wanted = 0; - wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, + wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev, ih_ring_entry, patched_ihre, flag); return wanted != 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 73f2257acc23..66ad8d0b8f7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) } pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), + (unsigned int)(1 << kfd->device_info.max_pasid_bits), iommu_info.max_pasids); if (!kfd_set_pasid_limit(pasid_limit)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 64b4ac339904..16f8bc4ca7f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -91,7 +91,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_gpu_addr = kq->pq->gpu_addr; /* For CIK family asics, kq->eop_mem is not needed */ - if (dev->device_info->asic_family > CHIP_MULLINS) { + if (dev->adev->asic_type > CHIP_MULLINS) { retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); if (retval != 0) goto err_eop_allocate_vidmem; @@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, + retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif - if (kq->dev->device_info->doorbell_size == 8) { + if (kq->dev->device_info.doorbell_size == 8) { *kq->wptr64_kernel = kq->pending_wptr64; write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, kq->pending_wptr64); @@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq) void kq_rollback_packet(struct kernel_queue *kq) { - if (kq->dev->device_info->doorbell_size == 8) { + if (kq->dev->device_info.doorbell_size == 8) { kq->pending_wptr64 = *kq->wptr64_kernel; kq->pending_wptr = *kq->wptr_kernel % (kq->queue->properties.queue_size / 4); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 9b9c2b9bf2ef..48c2f2b6e217 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -108,8 +108,8 @@ error_free: * svm_migrate_copy_memory_gart - sdma copy data between ram and vram * * @adev: amdgpu device the sdma ring running - * @src: source page address array - * @dst: destination page address array + * @sys: system DMA pointer to be copied + * @vram: vram destination DMA pointer * @npages: number of pages to copy * @direction: enum MIGRATION_COPY_DIR * @mfence: output, sdma fence to signal after sdma is done @@ -938,7 +938,7 @@ int svm_migrate_init(struct amdgpu_device *adev) void *r; /* Page migration works on Vega10 or newer */ - if (kfddev->device_info->asic_family < CHIP_VEGA10) + if (!KFD_IS_SOC15(kfddev)) return -EINVAL; pgmap = &kfddev->pgmap; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index c021519af810..e2825ad4d699 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, return NULL; offset = (q->sdma_engine_id * - dev->device_info->num_sdma_queues_per_engine + + dev->device_info.num_sdma_queues_per_engine + q->sdma_queue_id) * dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; @@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; int i, se, sh, cu; - amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); if (cu_mask_count > cu_info.cu_active_number) cu_mask_count = cu_info.cu_active_number; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 8128f4d312f1..e9a8e21e144e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -171,7 +171,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, wptr_mask, mms); } @@ -180,7 +180,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -276,7 +276,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -289,7 +289,7 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied(struct mqd_manager *mm, void *mqd, @@ -297,7 +297,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, + return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -306,7 +306,7 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 270160fc401b..d74d8a6ac27a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -148,7 +148,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms); return r; @@ -158,7 +158,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id, queue_id, p->doorbell_off); } @@ -239,7 +239,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -254,7 +254,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_is_occupied( - mm->dev->kgd, queue_address, + mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -320,7 +320,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -363,14 +363,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 4e5932f54b5a..326eb2285029 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -108,7 +108,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!mqd_mem_obj) return NULL; - retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, + retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev, ALIGN(q->ctl_stack_size, PAGE_SIZE) + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), &(mqd_mem_obj->gtt_mem), @@ -199,7 +199,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms); } @@ -208,7 +208,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id, queue_id, p->doorbell_off); } @@ -291,7 +291,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -301,7 +301,7 @@ static void free_mqd(struct mqd_manager *mm, void *mqd, struct kfd_dev *kfd = mm->dev; if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); @@ -313,7 +313,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_is_occupied( - mm->dev->kgd, queue_address, + mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -375,7 +375,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -418,14 +418,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index cd9220eb8a7a..d456e950ce1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -162,7 +162,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); - return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, wptr_shift, wptr_mask, mms); } @@ -265,7 +265,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, mqd, type, timeout, + (mm->dev->adev, mqd, type, timeout, pipe_id, queue_id); } @@ -280,7 +280,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_is_occupied( - mm->dev->kgd, queue_address, + mm->dev->adev, queue_address, pipe_id, queue_id); } @@ -347,7 +347,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd, (uint32_t __user *)p->write_ptr, mms); } @@ -389,14 +389,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout); } static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e547f1f8c49f..1439420925a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -223,7 +223,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { - switch (dqm->dev->device_info->asic_family) { + switch (dqm->dev->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: /* PM4 packet structures on CIK are the same as on VI */ @@ -236,31 +236,16 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_VEGAM: pm->pmf = &kfd_vi_pm_funcs; break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - pm->pmf = &kfd_v9_pm_funcs; - break; - case CHIP_ALDEBARAN: - pm->pmf = &kfd_aldebaran_pm_funcs; - break; default: - WARN(1, "Unexpected ASIC family %u", - dqm->dev->device_info->asic_family); - return -EINVAL; + if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) + pm->pmf = &kfd_aldebaran_pm_funcs; + else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) + pm->pmf = &kfd_v9_pm_funcs; + else { + WARN(1, "Unexpected ASIC family %u", + dqm->dev->adev->asic_type); + return -EINVAL; + } } pm->dqm = dqm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 08442e7d9944..3c0658e32e93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -110,8 +110,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, return 0; } -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, - struct scheduling_resources *res) +static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) { struct pm4_mes_set_resources *packet; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 94e92c0812db..0c3f911e3bf4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -183,7 +183,8 @@ enum cache_policy { cache_policy_noncoherent }; -#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) +#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0]) +#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1))) struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, @@ -194,8 +195,6 @@ struct kfd_event_interrupt_class { }; struct kfd_device_info { - enum amd_asic_type asic_family; - const char *asic_name; uint32_t gfx_target_version; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; @@ -208,11 +207,12 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; uint32_t no_atomic_fw_version; - unsigned int num_sdma_engines; - unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; }; +unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev); +unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev); + struct kfd_mem_obj { uint32_t range_start; uint32_t range_end; @@ -228,9 +228,9 @@ struct kfd_vmid_info { }; struct kfd_dev { - struct kgd_dev *kgd; + struct amdgpu_device *adev; - const struct kfd_device_info *device_info; + struct kfd_device_info device_info; struct pci_dev *pdev; struct drm_device *ddev; @@ -766,7 +766,7 @@ struct svm_range_list { struct list_head deferred_range_list; spinlock_t deferred_list_lock; atomic_t evicted_ranges; - bool drain_pagefaults; + atomic_t drain_pagefaults; struct delayed_work restore_work; DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); struct task_struct *faulting_task; @@ -891,7 +891,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); -int kfd_process_gpuid_from_kgd(struct kfd_process *p, +int kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev, uint32_t *gpuid, uint32_t *gpuidx); static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, @@ -984,7 +984,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain( struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev); int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); int kfd_numa_node_to_apic_id(int numa_node_id); void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b993011cfa64..f1930ff2c74a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -251,14 +251,13 @@ cleanup: } /** - * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device + * kfd_get_cu_occupancy - Collect number of waves in-flight on this device * by current process. Translates acquired wave count into number of compute units * that are occupied. * - * @atr: Handle of attribute that allows reporting of wave count. The attribute + * @attr: Handle of attribute that allows reporting of wave count. The attribute * handle encapsulates GPU device it is associated with, thereby allowing collection * of waves in flight, etc - * * @buffer: Handle of user provided buffer updated with wave count * * Return: Number of bytes written to user buffer or an error value @@ -288,7 +287,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Collect wave count from device if it supports */ wave_cnt = 0; max_waves_per_cu = 0; - dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt, + dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, &max_waves_per_cu); /* Translate wave count to number of compute units */ @@ -692,12 +691,12 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, struct kfd_dev *dev = pdd->dev; if (kptr) { - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem); kptr = NULL; } - amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv, + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv, NULL); } @@ -714,24 +713,24 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, struct kfd_dev *kdev = pdd->dev; int err; - err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, pdd->drm_priv, mem, NULL, flags); if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem, + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem, pdd->drm_priv, NULL); if (err) goto err_map_mem; - err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true); + err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } if (kptr) { - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev, (struct kgd_mem *)*mem, kptr, NULL); if (err) { pr_debug("Map GTT BO to kernel failed\n"); @@ -742,10 +741,10 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, return err; sync_memory_failed: - amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv); + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv); err_map_mem: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv, NULL); err_alloc_mem: *mem = NULL; @@ -940,10 +939,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) if (!peer_pdd->drm_priv) continue; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer_pdd->dev->kgd, mem, peer_pdd->drm_priv); + peer_pdd->dev->adev, mem, peer_pdd->drm_priv); } - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, pdd->drm_priv, NULL); kfd_process_device_remove_obj_handle(pdd, id); } @@ -974,7 +973,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p) if (!mem) goto out; - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem); out: mutex_unlock(&p->mutex); @@ -1003,7 +1002,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->drm_file) { amdgpu_amdkfd_gpuvm_release_process_vm( - pdd->dev->kgd, pdd->drm_priv); + pdd->dev->adev, pdd->drm_priv); fput(pdd->drm_file); } @@ -1011,7 +1010,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); - kfree(pdd->qpd.doorbell_bitmap); + bitmap_free(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); @@ -1317,14 +1316,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * support the SVM APIs and don't need to be considered * for the XNACK mode selection. */ - if (dev->device_info->asic_family < CHIP_VEGA10) + if (!KFD_IS_SOC15(dev)) continue; /* Aldebaran can always support XNACK because it can support * per-process XNACK mode selection. But let the dev->noretry * setting still influence the default XNACK mode. */ - if (supported && - dev->device_info->asic_family == CHIP_ALDEBARAN) + if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) continue; /* GFXv10 and later GPUs do not support shader preemption @@ -1332,7 +1330,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * management and memory-manager-related preemptions or * even deadlocks. */ - if (dev->device_info->asic_family >= CHIP_NAVI10) + if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) return false; if (dev->noretry) @@ -1431,12 +1429,11 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, int range_start = dev->shared_resources.non_cp_doorbells_start; int range_end = dev->shared_resources.non_cp_doorbells_end; - if (!KFD_IS_SOC15(dev->device_info->asic_family)) + if (!KFD_IS_SOC15(dev)) return 0; - qpd->doorbell_bitmap = - kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - BITS_PER_BYTE), GFP_KERNEL); + qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + GFP_KERNEL); if (!qpd->doorbell_bitmap) return -ENOMEM; @@ -1448,9 +1445,9 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { if (i >= range_start && i <= range_end) { - set_bit(i, qpd->doorbell_bitmap); - set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, - qpd->doorbell_bitmap); + __set_bit(i, qpd->doorbell_bitmap); + __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, + qpd->doorbell_bitmap); } } @@ -1547,7 +1544,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, dev = pdd->dev; ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( - dev->kgd, drm_file, p->pasid, + dev->adev, drm_file, p->pasid, &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); @@ -1779,14 +1776,13 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) } int -kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, +kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev, uint32_t *gpuid, uint32_t *gpuidx) { - struct kgd_dev *kgd = (struct kgd_dev *)adev; int i; for (i = 0; i < p->n_pdds; i++) - if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { + if (p->pdds[i] && p->pdds[i]->dev->adev == adev) { *gpuid = p->pdds[i]->dev->id; *gpuidx = i; return 0; @@ -1951,10 +1947,10 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) * only happens when the first queue is created. */ if (pdd->qpd.vmid) - amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, pdd->qpd.vmid); } else { - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, pdd->process->pasid, type); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 3627e7ac161b..5e5c84a8e1ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -118,7 +118,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, return ret; pqn->q->gws = mem; - pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; + pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, NULL); @@ -135,9 +135,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p) int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); - pqm->queue_slot_bitmap = - kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - BITS_PER_BYTE), GFP_KERNEL); + pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + GFP_KERNEL); if (!pqm->queue_slot_bitmap) return -ENOMEM; pqm->process = p; @@ -159,7 +158,7 @@ void pqm_uninit(struct process_queue_manager *pqm) kfree(pqn); } - kfree(pqm->queue_slot_bitmap); + bitmap_free(pqm->queue_slot_bitmap); pqm->queue_slot_bitmap = NULL; } @@ -220,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, * Hence we also check the type as well */ if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) - max_queues = dev->device_info->max_no_of_hqd/2; + max_queues = dev->device_info.max_no_of_hqd/2; if (pdd->qpd.queue_count >= max_queues) return -ENOSPC; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ed4bc5f844ce..deae12dc777d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -207,7 +207,6 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint64_t throttle_bitmask) { - struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; /* * ThermalThrottle msg = throttle_bitmask(8): * thermal_interrupt_count(16): @@ -223,14 +222,13 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, - atomic64_read(&adev->smu.throttle_int_counter)); + atomic64_read(&dev->adev->smu.throttle_int_counter)); add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len); } void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) { - struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; struct amdgpu_task_info task_info; /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n + @@ -243,7 +241,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) return; memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, pasid, &task_info); + amdgpu_vm_get_task_info(dev->adev, pasid, &task_info); /* Report VM faults from user applications, not retry from kernel */ if (!task_info.pid) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 16137c4247bb..7e92dcea4ce8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -193,7 +193,6 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; pr_debug("mapping to gpu idx 0x%x\n", gpuidx); pdd = kfd_process_device_from_gpuidx(p, gpuidx); @@ -201,9 +200,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_dma_map_dev(adev, prange, offset, npages, + r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, hmm_pfns, gpuidx); if (r) break; @@ -581,7 +579,7 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) return NULL; } - return (struct amdgpu_device *)pdd->dev->kgd; + return pdd->dev->adev; } struct kfd_process_device * @@ -593,7 +591,7 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx); + r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx); if (r) { pr_debug("failed to get device id by adev %p\n", adev); return NULL; @@ -706,6 +704,61 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, } } +static bool +svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + uint32_t i; + int gpuidx; + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + if (prange->preferred_loc != attrs[i].value) + return false; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + /* Prefetch should always trigger a migration even + * if the value of the attribute didn't change. + */ + return false; + case KFD_IOCTL_SVM_ATTR_ACCESS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { + if (test_bit(gpuidx, prange->bitmap_access) || + test_bit(gpuidx, prange->bitmap_aip)) + return false; + } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) { + if (!test_bit(gpuidx, prange->bitmap_access)) + return false; + } else { + if (!test_bit(gpuidx, prange->bitmap_aip)) + return false; + } + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + if ((prange->flags & attrs[i].value) != attrs[i].value) + return false; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + if ((prange->flags & attrs[i].value) != 0) + return false; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + if (prange->granularity != attrs[i].value) + return false; + break; + default: + WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); + } + } + + return true; +} + /** * svm_range_debug_dump - print all range information from svms * @svms: svm range list header @@ -743,14 +796,6 @@ static void svm_range_debug_dump(struct svm_range_list *svms) } } -static bool -svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) -{ - return (old->prefetch_loc == new->prefetch_loc && - old->flags == new->flags && - old->granularity == new->granularity); -} - static int svm_range_split_array(void *ppnew, void *ppold, size_t size, uint64_t old_start, uint64_t old_n, @@ -943,7 +988,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, } static int -svm_range_split_tail(struct svm_range *prange, struct svm_range *new, +svm_range_split_tail(struct svm_range *prange, uint64_t new_last, struct list_head *insert_list) { struct svm_range *tail; @@ -955,7 +1000,7 @@ svm_range_split_tail(struct svm_range *prange, struct svm_range *new, } static int -svm_range_split_head(struct svm_range *prange, struct svm_range *new, +svm_range_split_head(struct svm_range *prange, uint64_t new_start, struct list_head *insert_list) { struct svm_range *head; @@ -1053,8 +1098,8 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, if (domain == SVM_RANGE_VRAM_DOMAIN) bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); - switch (adev->asic_type) { - case CHIP_ARCTURUS: + switch (KFD_GC_VERSION(adev->kfd.dev)) { + case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? @@ -1070,7 +1115,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; - case CHIP_ALDEBARAN: + case IP_VERSION(9, 4, 2): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? @@ -1129,7 +1174,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct kfd_process_device *pdd; struct dma_fence *fence = NULL; - struct amdgpu_device *adev; struct kfd_process *p; uint32_t gpuidx; int r = 0; @@ -1145,9 +1189,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + r = svm_range_unmap_from_gpu(pdd->dev->adev, + drm_priv_to_vm(pdd->drm_priv), start, last, &fence); if (r) break; @@ -1159,7 +1203,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, + amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev, p->pasid, TLB_FLUSH_HEAVYWEIGHT); } @@ -1172,7 +1216,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned long npages, bool readonly, dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, struct dma_fence **fence) { - struct amdgpu_bo_va bo_va; bool table_freed = false; uint64_t pte_flags; unsigned long last_start; @@ -1185,9 +1228,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); - if (prange->svm_bo && prange->ttm_res) - bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); - for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; @@ -1243,8 +1283,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct kfd_process *p; p = container_of(prange->svms, struct kfd_process, svms); - amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, - p->pasid, TLB_FLUSH_LEGACY); + amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, TLB_FLUSH_LEGACY); } out: return r; @@ -1257,7 +1296,6 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, { struct kfd_process_device *pdd; struct amdgpu_device *bo_adev; - struct amdgpu_device *adev; struct kfd_process *p; struct dma_fence *fence = NULL; uint32_t gpuidx; @@ -1276,19 +1314,18 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; pdd = kfd_bind_process_to_device(pdd->dev, p); if (IS_ERR(pdd)) return -EINVAL; - if (bo_adev && adev != bo_adev && - !amdgpu_xgmi_same_hive(adev, bo_adev)) { + if (bo_adev && pdd->dev->adev != bo_adev && + !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) { pr_debug("cannot map to device idx %d\n", gpuidx); continue; } - r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv), prange, offset, npages, readonly, prange->dma_addr[gpuidx], bo_adev, wait ? &fence : NULL); @@ -1322,7 +1359,6 @@ struct svm_validate_context { static int svm_range_reserve_bos(struct svm_validate_context *ctx) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; struct amdgpu_vm *vm; uint32_t gpuidx; int r; @@ -1334,7 +1370,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx) pr_debug("failed to find device idx %d\n", gpuidx); return -EINVAL; } - adev = (struct amdgpu_device *)pdd->dev->kgd; vm = drm_priv_to_vm(pdd->drm_priv); ctx->tv[gpuidx].bo = &vm->root.bo->tbo; @@ -1356,9 +1391,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx) r = -EINVAL; goto unreserve_out; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv), + r = amdgpu_vm_validate_pt_bos(pdd->dev->adev, + drm_priv_to_vm(pdd->drm_priv), svm_range_bo_validate, NULL); if (r) { pr_debug("failed %d validate pt bos\n", r); @@ -1381,12 +1416,10 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx) static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; pdd = kfd_process_device_from_gpuidx(p, gpuidx); - adev = (struct amdgpu_device *)pdd->dev->kgd; - return SVM_ADEV_PGMAP_OWNER(adev); + return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev); } /* @@ -1574,7 +1607,6 @@ retry_flush_work: static void svm_range_restore_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct amdkfd_process_info *process_info; struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; @@ -1594,12 +1626,10 @@ static void svm_range_restore_work(struct work_struct *work) * the lifetime of this thread, kfd_process and mm will be valid. */ p = container_of(svms, struct kfd_process, svms); - process_info = p->kgd_process_info; mm = p->mm; if (!mm) return; - mutex_lock(&process_info->lock); svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(&svms->lock); @@ -1652,7 +1682,6 @@ static void svm_range_restore_work(struct work_struct *work) out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - mutex_unlock(&process_info->lock); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { @@ -1664,6 +1693,10 @@ out_reschedule: /** * svm_range_evict - evict svm range + * @prange: svm range structure + * @mm: current process mm_struct + * @start: starting process queue number + * @last: last process queue number * * Stop all queues of the process to ensure GPU doesn't access the memory, then * return to let CPU evict the buffer and proceed CPU pagetable update. @@ -1768,46 +1801,49 @@ static struct svm_range *svm_range_clone(struct svm_range *old) } /** - * svm_range_handle_overlap - split overlap ranges - * @svms: svm range list header - * @new: range added with this attributes - * @start: range added start address, in pages - * @last: range last address, in pages - * @update_list: output, the ranges attributes are updated. For set_attr, this - * will do validation and map to GPUs. For unmap, this will be - * removed and unmap from GPUs - * @insert_list: output, the ranges will be inserted into svms, attributes are - * not changes. For set_attr, this will add into svms. - * @remove_list:output, the ranges will be removed from svms - * @left: the remaining range after overlap, For set_attr, this will be added - * as new range. + * svm_range_add - add svm range and handle overlap + * @p: the range add to this process svms + * @start: page size aligned + * @size: page size aligned + * @nattr: number of attributes + * @attrs: array of attributes + * @update_list: output, the ranges need validate and update GPU mapping + * @insert_list: output, the ranges need insert to svms + * @remove_list: output, the ranges are replaced and need remove from svms * - * Total have 5 overlap cases. + * Check if the virtual address range has overlap with any existing ranges, + * split partly overlapping ranges and add new ranges in the gaps. All changes + * should be applied to the range_list and interval tree transactionally. If + * any range split or allocation fails, the entire update fails. Therefore any + * existing overlapping svm_ranges are cloned and the original svm_ranges left + * unchanged. * - * This function handles overlap of an address interval with existing - * struct svm_ranges for applying new attributes. This may require - * splitting existing struct svm_ranges. All changes should be applied to - * the range_list and interval tree transactionally. If any split operation - * fails, the entire update fails. Therefore the existing overlapping - * svm_ranges are cloned and the original svm_ranges left unchanged. If the - * transaction succeeds, the modified clones are added and the originals - * freed. Otherwise the clones are removed and the old svm_ranges remain. + * If the transaction succeeds, the caller can update and insert clones and + * new ranges, then free the originals. * - * Context: The caller must hold svms->lock + * Otherwise the caller can free the clones and new ranges, while the old + * svm_ranges remain unchanged. + * + * Context: Process context, caller must hold svms->lock + * + * Return: + * 0 - OK, otherwise error code */ static int -svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, - unsigned long start, unsigned long last, - struct list_head *update_list, - struct list_head *insert_list, - struct list_head *remove_list, - unsigned long *left) +svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, + struct list_head *update_list, struct list_head *insert_list, + struct list_head *remove_list) { + unsigned long last = start + size - 1UL; + struct svm_range_list *svms = &p->svms; struct interval_tree_node *node; struct svm_range *prange; struct svm_range *tmp; int r = 0; + pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last); + INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); INIT_LIST_HEAD(remove_list); @@ -1815,18 +1851,24 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, node = interval_tree_iter_first(&svms->objects, start, last); while (node) { struct interval_tree_node *next; - struct svm_range *old; unsigned long next_start; pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start, node->last); - old = container_of(node, struct svm_range, it_node); + prange = container_of(node, struct svm_range, it_node); next = interval_tree_iter_next(node, start, last); next_start = min(node->last, last) + 1; - if (node->start < start || node->last > last) { - /* node intersects the updated range, clone+split it */ + if (svm_range_is_same_attrs(p, prange, nattr, attrs)) { + /* nothing to do */ + } else if (node->start < start || node->last > last) { + /* node intersects the update range and its attributes + * will change. Clone and split it, apply updates only + * to the overlapping part + */ + struct svm_range *old = prange; + prange = svm_range_clone(old); if (!prange) { r = -ENOMEM; @@ -1835,17 +1877,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, list_add(&old->remove_list, remove_list); list_add(&prange->insert_list, insert_list); + list_add(&prange->update_list, update_list); if (node->start < start) { pr_debug("change old range start\n"); - r = svm_range_split_head(prange, new, start, + r = svm_range_split_head(prange, start, insert_list); if (r) goto out; } if (node->last > last) { pr_debug("change old range last\n"); - r = svm_range_split_tail(prange, new, last, + r = svm_range_split_tail(prange, last, insert_list); if (r) goto out; @@ -1854,16 +1897,12 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, /* The node is contained within start..last, * just update it */ - prange = old; - } - - if (!svm_range_is_same_attrs(prange, new)) list_add(&prange->update_list, update_list); + } /* insert a new node if needed */ if (node->start > start) { - prange = svm_range_new(prange->svms, start, - node->start - 1); + prange = svm_range_new(svms, start, node->start - 1); if (!prange) { r = -ENOMEM; goto out; @@ -1877,8 +1916,16 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, start = next_start; } - if (left && start <= last) - *left = last - start + 1; + /* add a final range at the end if needed */ + if (start <= last) { + prange = svm_range_new(svms, start, last); + if (!prange) { + r = -ENOMEM; + goto out; + } + list_add(&prange->insert_list, insert_list); + list_add(&prange->update_list, update_list); + } out: if (r) @@ -1966,23 +2013,30 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; - struct amdgpu_device *adev; struct kfd_process *p; + int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); +restart: + drain = atomic_read(&svms->drain_pagefaults); + if (!drain) + return; + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) continue; pr_debug("drain retry fault gpu %d svms %p\n", i, svms); - adev = (struct amdgpu_device *)pdd->dev->kgd; - amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, + &pdd->dev->adev->irq.ih1); pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } + if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) + goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -1990,43 +2044,41 @@ static void svm_range_deferred_list_work(struct work_struct *work) struct svm_range_list *svms; struct svm_range *prange; struct mm_struct *mm; + struct kfd_process *p; svms = container_of(work, struct svm_range_list, deferred_list_work); pr_debug("enter svms 0x%p\n", svms); + p = container_of(svms, struct kfd_process, svms); + /* Avoid mm is gone when inserting mmu notifier */ + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("svms 0x%p process mm gone\n", svms); + return; + } +retry: + mmap_write_lock(mm); + + /* Checking for the need to drain retry faults must be inside + * mmap write lock to serialize with munmap notifiers. + */ + if (unlikely(atomic_read(&svms->drain_pagefaults))) { + mmap_write_unlock(mm); + svm_range_drain_retry_fault(svms); + goto retry; + } + spin_lock(&svms->deferred_list_lock); while (!list_empty(&svms->deferred_range_list)) { prange = list_first_entry(&svms->deferred_range_list, struct svm_range, deferred_list); + list_del_init(&prange->deferred_list); spin_unlock(&svms->deferred_list_lock); + pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, prange->start, prange->last, prange->work_item.op); - mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); mutex_lock(&svms->lock); - - /* Checking for the need to drain retry faults must be in - * mmap write lock to serialize with munmap notifiers. - * - * Remove from deferred_list must be inside mmap write lock, - * otherwise, svm_range_list_lock_and_flush_work may hold mmap - * write lock, and continue because deferred_list is empty, then - * deferred_list handle is blocked by mmap write lock. - */ - spin_lock(&svms->deferred_list_lock); - if (unlikely(svms->drain_pagefaults)) { - svms->drain_pagefaults = false; - spin_unlock(&svms->deferred_list_lock); - mutex_unlock(&svms->lock); - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } - list_del_init(&prange->deferred_list); - spin_unlock(&svms->deferred_list_lock); - mutex_lock(&prange->migrate_mutex); while (!list_empty(&prange->child_list)) { struct svm_range *pchild; @@ -2042,12 +2094,13 @@ retry: svm_range_handle_list_op(svms, prange); mutex_unlock(&svms->lock); - mmap_write_unlock(mm); spin_lock(&svms->deferred_list_lock); } spin_unlock(&svms->deferred_list_lock); + mmap_write_unlock(mm); + mmput(mm); pr_debug("exit svms 0x%p\n", svms); } @@ -2056,12 +2109,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, enum svm_work_list_ops op) { spin_lock(&svms->deferred_list_lock); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". - */ - if (op == SVM_OP_UNMAP_RANGE) - svms->drain_pagefaults = true; /* if prange is on the deferred list */ if (!list_empty(&prange->deferred_list)) { pr_debug("update exist prange 0x%p work op %d\n", prange, op); @@ -2140,6 +2187,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); + /* Make sure pending page faults are drained in the deferred worker + * before the range is freed to avoid straggler interrupts on + * unmapped memory causing "phantom faults". + */ + atomic_inc(&svms->drain_pagefaults); + unmap_parent = start <= prange->start && last >= prange->last; list_for_each_entry(pchild, &prange->child_list, child_list) { @@ -2169,6 +2222,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, /** * svm_range_cpu_invalidate_pagetables - interval notifier callback + * @mni: mmu_interval_notifier struct + * @range: mmu_notifier_range struct + * @cur_seq: value to pass to mmu_interval_set_seq() * * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it * is from migration, or CPU page invalidation callback. @@ -2301,7 +2357,7 @@ svm_range_best_restore_location(struct svm_range *prange, p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); + r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx); if (r < 0) { pr_debug("failed to get gpuid from kgd\n"); return -1; @@ -2478,7 +2534,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, pr_debug("Failed to create prange in address [0x%llx]\n", addr); return NULL; } - if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) { + if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { pr_debug("failed to get gpuid from kgd\n"); svm_range_free(prange); return NULL; @@ -2545,7 +2601,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, uint32_t gpuid; int r; - r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx); + r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx); if (r < 0) return; } @@ -2559,20 +2615,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, } static bool -svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault) +svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) { unsigned long requested = VM_READ; - struct vm_area_struct *vma; if (write_fault) requested |= VM_WRITE; - vma = find_vma(mm, addr << PAGE_SHIFT); - if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { - pr_debug("address 0x%llx VMA is removed\n", addr); - return true; - } - pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested, vma->vm_flags); return (vma->vm_flags & requested) == requested; @@ -2590,6 +2639,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, int32_t best_loc; int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; + struct vm_area_struct *vma; int r = 0; if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { @@ -2600,7 +2650,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, p = kfd_lookup_process_by_pasid(pasid); if (!p) { pr_debug("kfd process not founded pasid 0x%x\n", pasid); - return -ESRCH; + return 0; } if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); @@ -2611,10 +2661,19 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); + if (atomic_read(&svms->drain_pagefaults)) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } + + /* p->lead_thread is available as kfd_process_wq_release flush the work + * before releasing task ref. + */ mm = get_task_mm(p->lead_thread); if (!mm) { pr_debug("svms 0x%p failed to get mm\n", svms); - r = -ESRCH; + r = 0; goto out; } @@ -2652,6 +2711,7 @@ retry_write_locked: if (svm_range_skip_recover(prange)) { amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + r = 0; goto out_unlock_range; } @@ -2660,10 +2720,21 @@ retry_write_locked: if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { pr_debug("svms 0x%p [0x%lx %lx] already restored\n", svms, prange->start, prange->last); + r = 0; + goto out_unlock_range; + } + + /* __do_munmap removed VMA, return success as we are handling stale + * retry fault. + */ + vma = find_vma(mm, addr << PAGE_SHIFT); + if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + pr_debug("address 0x%llx VMA is removed\n", addr); + r = 0; goto out_unlock_range; } - if (!svm_fault_allowed(mm, addr, write_fault)) { + if (!svm_fault_allowed(vma, write_fault)) { pr_debug("fault addr 0x%llx no %s permission\n", addr, write_fault ? "write" : "read"); r = -EPERM; @@ -2741,6 +2812,14 @@ void svm_range_list_fini(struct kfd_process *p) /* Ensure list work is finished before process is destroyed */ flush_work(&p->svms.deferred_list_work); + /* + * Ensure no retry fault comes in afterwards, as page fault handler will + * not find kfd process and take mm lock to recover fault. + */ + atomic_inc(&p->svms.drain_pagefaults); + svm_range_drain_retry_fault(&p->svms); + + list_for_each_entry_safe(prange, next, &p->svms.list, list) { svm_range_unlink(prange); svm_range_remove_notifier(prange); @@ -2761,6 +2840,7 @@ int svm_range_list_init(struct kfd_process *p) mutex_init(&svms->lock); INIT_LIST_HEAD(&svms->list); atomic_set(&svms->evicted_ranges, 0); + atomic_set(&svms->drain_pagefaults, 0); INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); INIT_LIST_HEAD(&svms->deferred_range_list); @@ -2868,59 +2948,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size) } /** - * svm_range_add - add svm range and handle overlap - * @p: the range add to this process svms - * @start: page size aligned - * @size: page size aligned - * @nattr: number of attributes - * @attrs: array of attributes - * @update_list: output, the ranges need validate and update GPU mapping - * @insert_list: output, the ranges need insert to svms - * @remove_list: output, the ranges are replaced and need remove from svms - * - * Check if the virtual address range has overlap with the registered ranges, - * split the overlapped range, copy and adjust pages address and vram nodes in - * old and new ranges. - * - * Context: Process context, caller must hold svms->lock - * - * Return: - * 0 - OK, otherwise error code - */ -static int -svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, - uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, - struct list_head *update_list, struct list_head *insert_list, - struct list_head *remove_list) -{ - uint64_t last = start + size - 1UL; - struct svm_range_list *svms; - struct svm_range new = {0}; - struct svm_range *prange; - unsigned long left = 0; - int r = 0; - - pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last); - - svm_range_apply_attrs(p, &new, nattr, attrs); - - svms = &p->svms; - - r = svm_range_handle_overlap(svms, &new, start, last, update_list, - insert_list, remove_list, &left); - if (r) - return r; - - if (left) { - prange = svm_range_new(svms, last - left + 1, last); - list_add(&prange->insert_list, insert_list); - list_add(&prange->update_list, update_list); - } - - return 0; -} - -/** * svm_range_best_prefetch_location - decide the best prefetch location * @prange: svm range structure * @@ -2953,7 +2980,6 @@ svm_range_best_prefetch_location(struct svm_range *prange) uint32_t best_loc = prange->prefetch_loc; struct kfd_process_device *pdd; struct amdgpu_device *bo_adev; - struct amdgpu_device *adev; struct kfd_process *p; uint32_t gpuidx; @@ -2981,12 +3007,11 @@ svm_range_best_prefetch_location(struct svm_range *prange) pr_debug("failed to get device by idx 0x%x\n", gpuidx); continue; } - adev = (struct amdgpu_device *)pdd->dev->kgd; - if (adev == bo_adev) + if (pdd->dev->adev == bo_adev) continue; - if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { + if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) { best_loc = 0; break; } @@ -3150,7 +3175,6 @@ static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) { - struct amdkfd_process_info *process_info = p->kgd_process_info; struct mm_struct *mm = current->mm; struct list_head update_list; struct list_head insert_list; @@ -3169,8 +3193,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, svms = &p->svms; - mutex_lock(&process_info->lock); - svm_range_list_lock_and_flush_work(svms, mm); r = svm_range_is_valid(p, start, size); @@ -3246,8 +3268,6 @@ out_unlock_range: mutex_unlock(&svms->lock); mmap_read_unlock(mm); out: - mutex_unlock(&process_info->lock); - pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index dd593ad0614a..948fbb39336e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -113,7 +113,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev) { struct kfd_topology_device *top_dev; struct kfd_dev *device = NULL; @@ -121,7 +121,7 @@ struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->kgd == kgd) { + if (top_dev->gpu && top_dev->gpu->adev == adev) { device = top_dev->gpu; break; } @@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (dev->gpu) { log_max_watch_addr = - __ilog2_u32(dev->gpu->device_info->num_of_watch_points); + __ilog2_u32(dev->gpu->device_info.num_of_watch_points); if (log_max_watch_addr) { dev->node_props.capability |= @@ -515,7 +515,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } - if (dev->gpu->device_info->asic_family == CHIP_TONGA) + if (dev->gpu->adev->asic_type == CHIP_TONGA) dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; @@ -531,7 +531,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", dev->gpu->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", - amdgpu_amdkfd_get_unique_id(dev->gpu->kgd)); + dev->gpu->adev->unique_id); } @@ -1106,7 +1106,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) if (!gpu) return 0; - amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(gpu->adev, &local_mem_info); local_mem_size = local_mem_info.local_mem_size_private + local_mem_info.local_mem_size_public; @@ -1189,7 +1189,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; @@ -1217,8 +1217,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, /* set gpu (dev) flags. */ } else { if (!dev->gpu->pci_atomic_requested || - dev->gpu->device_info->asic_family == - CHIP_HAWAII) + dev->gpu->adev->asic_type == CHIP_HAWAII) link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; } @@ -1239,7 +1238,7 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, */ if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) { + KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; } @@ -1286,7 +1285,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) void *crat_image = NULL; size_t image_size = 0; int proximity_domain; - struct amdgpu_device *adev; + int i; + const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; INIT_LIST_HEAD(&temp_topology_device_list); @@ -1296,10 +1296,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); - adev = (struct amdgpu_device *)(gpu->kgd); - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ - if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) { + if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) { struct kfd_topology_device *top_dev; down_read(&topology_lock); @@ -1372,45 +1370,48 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * needed for the topology */ - amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info); - strncpy(dev->node_props.name, gpu->device_info->asic_name, - KFD_TOPOLOGY_PUBLIC_NAME_SIZE); + for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { + dev->node_props.name[i] = __tolower(asic_name[i]); + if (asic_name[i] == '\0') + break; + } + dev->node_props.name[i] = '\0'; dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; - dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version; + dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; dev->node_props.capability |= - ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << - HSA_CAP_ASIC_REVISION_SHIFT) & + ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->pdev); dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); dev->node_props.max_engine_clk_fcompute = - amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); + amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; dev->node_props.drm_render_minor = gpu->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->hive_id; - dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; + dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); dev->node_props.num_sdma_xgmi_engines = - gpu->device_info->num_xgmi_sdma_engines; + kfd_get_num_xgmi_sdma_engines(gpu); dev->node_props.num_sdma_queues_per_engine = - gpu->device_info->num_sdma_queues_per_engine; + gpu->device_info.num_sdma_queues_per_engine; dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? - amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; + dev->gpu->adev->gds.gws_size : 0; dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); - switch (dev->gpu->device_info->asic_family) { + switch (dev->gpu->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: case CHIP_TONGA: @@ -1429,30 +1430,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << - HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & - HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->gpu->device_info->asic_family); + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + else + WARN(1, "Unexpected ASIC family %u", + dev->gpu->adev->asic_type); } /* @@ -1469,7 +1454,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * because it doesn't consider masked out CUs * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd */ - if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { dev->node_props.simd_count = cu_info.simd_per_cu * cu_info.cu_active_number; dev->node_props.max_waves_per_simd = 10; @@ -1477,16 +1462,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu) /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ dev->node_props.capability |= - ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? + ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? HSA_CAP_SRAM_EDCSUPPORTED : 0; - dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? + dev->node_props.capability |= + ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? HSA_CAP_MEM_EDCSUPPORTED : 0; - if (adev->asic_type != CHIP_VEGA10) - dev->node_props.capability |= (adev->ras_enabled != 0) ? + if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) + dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; - if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) + if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev)) dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; kfd_debug_print_topology(); @@ -1592,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) gpu->use_iommu_v2 = false; - if (!gpu->device_info->needs_iommu_device) + if (!gpu->device_info.needs_iommu_device) return; down_read(&topology_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index a8db017c9b8e..f0cc59d2fd5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,38 +25,11 @@ #include <linux/types.h> #include <linux/list.h> +#include <linux/kfd_sysfs.h> #include "kfd_crat.h" #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32 -#define HSA_CAP_HOT_PLUGGABLE 0x00000001 -#define HSA_CAP_ATS_PRESENT 0x00000002 -#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 -#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 -#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 -#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 -#define HSA_CAP_VA_LIMIT 0x00000040 -#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 - -#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 -#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 -#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 -#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 - -#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 /* Old buggy user mode depends on this being 0 */ -#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 -#define HSA_CAP_RASEVENTNOTIFY 0x00200000 -#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 -#define HSA_CAP_ASIC_REVISION_SHIFT 22 -#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 -#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 -#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 -#define HSA_CAP_RESERVED 0xe00f8000 - struct kfd_node_properties { uint64_t hive_id; uint32_t cpu_cores_count; @@ -93,17 +66,6 @@ struct kfd_node_properties { char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; -#define HSA_MEM_HEAP_TYPE_SYSTEM 0 -#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 -#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 -#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 -#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 -#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 - -#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 -#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 -#define HSA_MEM_FLAGS_RESERVED 0xfffffffc - struct kfd_mem_properties { struct list_head list; uint32_t heap_type; @@ -116,12 +78,6 @@ struct kfd_mem_properties { struct attribute attr; }; -#define HSA_CACHE_TYPE_DATA 0x00000001 -#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 -#define HSA_CACHE_TYPE_CPU 0x00000004 -#define HSA_CACHE_TYPE_HSACU 0x00000008 -#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 - struct kfd_cache_properties { struct list_head list; uint32_t processor_id_low; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4595c59f2bf0..2f0b14f8f833 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -51,6 +51,7 @@ #include <drm/drm_hdcp.h> #endif #include "amdgpu_pm.h" +#include "amdgpu_atombios.h" #include "amd_shared.h" #include "amdgpu_dm_irq.h" @@ -623,7 +624,7 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) #endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */ /** - * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command. + * dmub_aux_setconfig_callback - Callback for AUX or SET_CONFIG command. * @adev: amdgpu_device pointer * @notify: dmub notification structure * @@ -631,7 +632,8 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) * Copies dmub notification to DM which is to be read by AUX command. * issuing thread and also signals the event to wake up the thread. */ -void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notification *notify) +static void dmub_aux_setconfig_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) { if (adev->dm.dmub_notify) memcpy(adev->dm.dmub_notify, notify, sizeof(struct dmub_notification)); @@ -647,7 +649,8 @@ void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notific * Dmub Hpd interrupt processing callback. Gets displayindex through the * ink index and calls helper to do the processing. */ -void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *notify) +static void dmub_hpd_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) { struct amdgpu_dm_connector *aconnector; struct amdgpu_dm_connector *hpd_aconnector = NULL; @@ -704,8 +707,10 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *not * to dmub interrupt handling thread * Return: true if successfully registered, false if there is existing registration */ -bool register_dmub_notify_callback(struct amdgpu_device *adev, enum dmub_notification_type type, -dmub_notify_interrupt_callback_t callback, bool dmub_int_thread_offload) +static bool register_dmub_notify_callback(struct amdgpu_device *adev, + enum dmub_notification_type type, + dmub_notify_interrupt_callback_t callback, + bool dmub_int_thread_offload) { if (callback != NULL && type < ARRAY_SIZE(adev->dm.dmub_thread_offload)) { adev->dm.dmub_callback[type] = callback; @@ -789,8 +794,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) plink = adev->dm.dc->links[notify.link_index]; if (plink) { plink->hpd_status = - notify.hpd_status == - DP_HPD_PLUG ? true : false; + notify.hpd_status == DP_HPD_PLUG; } } queue_work(adev->dm.delayed_hpd_wq, &dmub_hpd_wrk->handle_hpd_work); @@ -1050,6 +1054,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } + /* Reset DMCUB if it was previously running - before we overwrite its memory. */ + status = dmub_srv_hw_reset(dmub_srv); + if (status != DMUB_STATUS_OK) + DRM_WARN("Error resetting DMUB HW: %d\n", status); + hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; fw_inst_const = dmub_fw->data + @@ -1453,8 +1462,21 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING) init_data.flags.edp_no_power_sequencing = true; +#ifdef CONFIG_DRM_AMD_DC_DCN + if (amdgpu_dc_feature_mask & DC_DISABLE_LTTPR_DP1_4A) + init_data.flags.allow_lttpr_non_transparent_mode.bits.DP1_4A = true; + if (amdgpu_dc_feature_mask & DC_DISABLE_LTTPR_DP2_0) + init_data.flags.allow_lttpr_non_transparent_mode.bits.DP2_0 = true; +#endif + init_data.flags.power_down_display_on_boot = true; + if (check_seamless_boot_capability(adev)) { + init_data.flags.power_down_display_on_boot = false; + init_data.flags.allow_seamless_boot_optimization = true; + DRM_INFO("Seamless boot condition check passed\n"); + } + INIT_LIST_HEAD(&adev->dm.da_list); /* Display Core create. */ adev->dm.dc = dc_create(&init_data); @@ -1479,8 +1501,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER) adev->dm.dc->debug.disable_stutter = true; - if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) + if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) { adev->dm.dc->debug.disable_dsc = true; + adev->dm.dc->debug.disable_dsc_edp = true; + } if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING) adev->dm.dc->debug.disable_clock_gate = true; @@ -2303,14 +2327,6 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) goto fail; } - - res = dc_validate_global_state(dc, context, false); - - if (res != DC_OK) { - DRM_ERROR("%s:resource validation failed, dc_status:%d\n", __func__, res); - goto fail; - } - res = dc_commit_state(dc, context); fail: @@ -2561,6 +2577,23 @@ static int dm_resume(void *handle) if (amdgpu_in_reset(adev)) { dc_state = dm->cached_dc_state; + /* + * The dc->current_state is backed up into dm->cached_dc_state + * before we commit 0 streams. + * + * DC will clear link encoder assignments on the real state + * but the changes won't propagate over to the copy we made + * before the 0 streams commit. + * + * DC expects that link encoder assignments are *not* valid + * when committing a state, so as a workaround it needs to be + * cleared here. + */ + link_enc_cfg_init(dm->dc, dc_state); + + if (dc_enable_dmub_notifications(adev->dm.dc)) + amdgpu_dm_outbox_init(adev); + r = dm_dmub_hw_init(adev); if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); @@ -2572,20 +2605,11 @@ static int dm_resume(void *handle) for (i = 0; i < dc_state->stream_count; i++) { dc_state->streams[i]->mode_changed = true; - for (j = 0; j < dc_state->stream_status->plane_count; j++) { - dc_state->stream_status->plane_states[j]->update_flags.raw + for (j = 0; j < dc_state->stream_status[i].plane_count; j++) { + dc_state->stream_status[i].plane_states[j]->update_flags.raw = 0xffffffff; } } -#if defined(CONFIG_DRM_AMD_DC_DCN) - /* - * Resource allocation happens for link encoders for newer ASIC in - * dc_validate_global_state, so we need to revalidate it. - * - * This shouldn't fail (it passed once before), so warn if it does. - */ - WARN_ON(dc_validate_global_state(dm->dc, dc_state, false) != DC_OK); -#endif WARN_ON(!dc_commit_state(dm->dc, dc_state)); @@ -2608,6 +2632,10 @@ static int dm_resume(void *handle) /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); + /* Re-enable outbox interrupts for DPIA. */ + if (dc_enable_dmub_notifications(adev->dm.dc)) + amdgpu_dm_outbox_init(adev); + /* Before powering on DC we need to re-initialize DMUB. */ r = dm_dmub_hw_init(adev); if (r) @@ -2938,13 +2966,12 @@ void amdgpu_dm_update_connector_after_detect( aconnector->edid = (struct edid *)sink->dc_edid.raw_edid; - drm_connector_update_edid_property(connector, - aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, aconnector->edid); } + drm_connector_update_edid_property(connector, aconnector->edid); amdgpu_dm_update_freesync_caps(connector, aconnector->edid); update_connector_ext_caps(aconnector); } else { @@ -3909,6 +3936,9 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, caps = dm->backlight_caps[bl_idx]; dm->brightness[bl_idx] = user_brightness; + /* update scratch register */ + if (bl_idx == 0) + amdgpu_atombios_scratch_regs_set_backlight_level(dm->adev, dm->brightness[bl_idx]); brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]); link = (struct dc_link *)dm->backlight_link[bl_idx]; @@ -4251,6 +4281,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } + /* + * Disable vblank IRQs aggressively for power-saving. + * + * TODO: Fix vblank control helpers to delay PSR entry to allow this when PSR + * is also supported. + */ + adev_to_drm(adev)->vblank_disable_immediate = !psr_feature_enabled; + /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) @@ -6036,11 +6074,72 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, { stream->timing.flags.DSC = 0; - if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { - dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, - aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw, - aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw, - dsc_caps); + if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || + sink->sink_signal == SIGNAL_TYPE_EDP)) { + if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE || + sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) + dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, + aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw, + aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw, + dsc_caps); + } +} + +static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps, + uint32_t max_dsc_target_bpp_limit_override) +{ + const struct dc_link_settings *verified_link_cap = NULL; + uint32_t link_bw_in_kbps; + uint32_t edp_min_bpp_x16, edp_max_bpp_x16; + struct dc *dc = sink->ctx->dc; + struct dc_dsc_bw_range bw_range = {0}; + struct dc_dsc_config dsc_cfg = {0}; + + verified_link_cap = dc_link_get_link_cap(stream->link); + link_bw_in_kbps = dc_link_bandwidth_kbps(stream->link, verified_link_cap); + edp_min_bpp_x16 = 8 * 16; + edp_max_bpp_x16 = 8 * 16; + + if (edp_max_bpp_x16 > dsc_caps->edp_max_bits_per_pixel) + edp_max_bpp_x16 = dsc_caps->edp_max_bits_per_pixel; + + if (edp_max_bpp_x16 < edp_min_bpp_x16) + edp_min_bpp_x16 = edp_max_bpp_x16; + + if (dc_dsc_compute_bandwidth_range(dc->res_pool->dscs[0], + dc->debug.dsc_min_slice_height_override, + edp_min_bpp_x16, edp_max_bpp_x16, + dsc_caps, + &stream->timing, + &bw_range)) { + + if (bw_range.max_kbps < link_bw_in_kbps) { + if (dc_dsc_compute_config(dc->res_pool->dscs[0], + dsc_caps, + dc->debug.dsc_min_slice_height_override, + max_dsc_target_bpp_limit_override, + 0, + &stream->timing, + &dsc_cfg)) { + stream->timing.dsc_cfg = dsc_cfg; + stream->timing.flags.DSC = 1; + stream->timing.dsc_cfg.bits_per_pixel = edp_max_bpp_x16; + } + return; + } + } + + if (dc_dsc_compute_config(dc->res_pool->dscs[0], + dsc_caps, + dc->debug.dsc_min_slice_height_override, + max_dsc_target_bpp_limit_override, + link_bw_in_kbps, + &stream->timing, + &dsc_cfg)) { + stream->timing.dsc_cfg = dsc_cfg; + stream->timing.flags.DSC = 1; } } @@ -6051,6 +6150,9 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, struct drm_connector *drm_connector = &aconnector->base; uint32_t link_bandwidth_kbps; uint32_t max_dsc_target_bpp_limit_override = 0; + struct dc *dc = sink->ctx->dc; + uint32_t max_supported_bw_in_kbps, timing_bw_in_kbps; + uint32_t dsc_max_supported_bw_in_kbps; link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)); @@ -6063,17 +6165,43 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_dsc_policy_set_enable_dsc_when_not_needed( aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE); - if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { + if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_EDP && !dc->debug.disable_dsc_edp && + dc->caps.edp_dsc_support && aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE) { - if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], + apply_dsc_policy_for_edp(aconnector, sink, stream, dsc_caps, max_dsc_target_bpp_limit_override); + + } else if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { + if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) { + if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], dsc_caps, aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override, max_dsc_target_bpp_limit_override, link_bandwidth_kbps, &stream->timing, &stream->timing.dsc_cfg)) { - stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); + stream->timing.flags.DSC = 1; + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", + __func__, drm_connector->name); + } + } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { + timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing); + max_supported_bw_in_kbps = link_bandwidth_kbps; + dsc_max_supported_bw_in_kbps = link_bandwidth_kbps; + + if (timing_bw_in_kbps > max_supported_bw_in_kbps && + max_supported_bw_in_kbps > 0 && + dsc_max_supported_bw_in_kbps > 0) + if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], + dsc_caps, + aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override, + max_dsc_target_bpp_limit_override, + dsc_max_supported_bw_in_kbps, + &stream->timing, + &stream->timing.dsc_cfg)) { + stream->timing.flags.DSC = 1; + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from DP-HDMI PCON\n", + __func__, drm_connector->name); + } } } @@ -8217,15 +8345,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, break; case DRM_MODE_CONNECTOR_DisplayPort: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; - if (link->is_dig_mapping_flexible && - link->dc->res_pool->funcs->link_encs_assign) { - link->link_enc = - link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, link); - if (!link->link_enc) - link->link_enc = - link_enc_cfg_get_next_avail_link_enc(link->ctx->dc); - } - + link->link_enc = dp_get_link_enc(link); + ASSERT(link->link_enc); if (link->link_enc) aconnector->base.ycbcr_420_allowed = link->link_enc->features.dp_ycbcr420_supported ? true : false; @@ -10628,6 +10749,24 @@ static int dm_update_plane_state(struct dc *dc, return ret; } +static void dm_get_oriented_plane_size(struct drm_plane_state *plane_state, + int *src_w, int *src_h) +{ + switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { + case DRM_MODE_ROTATE_90: + case DRM_MODE_ROTATE_270: + *src_w = plane_state->src_h >> 16; + *src_h = plane_state->src_w >> 16; + break; + case DRM_MODE_ROTATE_0: + case DRM_MODE_ROTATE_180: + default: + *src_w = plane_state->src_w >> 16; + *src_h = plane_state->src_h >> 16; + break; + } +} + static int dm_check_crtc_cursor(struct drm_atomic_state *state, struct drm_crtc *crtc, struct drm_crtc_state *new_crtc_state) @@ -10636,6 +10775,8 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, struct drm_plane_state *new_cursor_state, *new_underlying_state; int i; int cursor_scale_w, cursor_scale_h, underlying_scale_w, underlying_scale_h; + int cursor_src_w, cursor_src_h; + int underlying_src_w, underlying_src_h; /* On DCE and DCN there is no dedicated hardware cursor plane. We get a * cursor per pipe but it's going to inherit the scaling and @@ -10647,10 +10788,9 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, return 0; } - cursor_scale_w = new_cursor_state->crtc_w * 1000 / - (new_cursor_state->src_w >> 16); - cursor_scale_h = new_cursor_state->crtc_h * 1000 / - (new_cursor_state->src_h >> 16); + dm_get_oriented_plane_size(new_cursor_state, &cursor_src_w, &cursor_src_h); + cursor_scale_w = new_cursor_state->crtc_w * 1000 / cursor_src_w; + cursor_scale_h = new_cursor_state->crtc_h * 1000 / cursor_src_h; for_each_new_plane_in_state_reverse(state, underlying, new_underlying_state, i) { /* Narrow down to non-cursor planes on the same CRTC as the cursor */ @@ -10661,10 +10801,10 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, if (!new_underlying_state->fb) continue; - underlying_scale_w = new_underlying_state->crtc_w * 1000 / - (new_underlying_state->src_w >> 16); - underlying_scale_h = new_underlying_state->crtc_h * 1000 / - (new_underlying_state->src_h >> 16); + dm_get_oriented_plane_size(new_underlying_state, + &underlying_src_w, &underlying_src_h); + underlying_scale_w = new_underlying_state->crtc_w * 1000 / underlying_src_w; + underlying_scale_h = new_underlying_state->crtc_h * 1000 / underlying_src_h; if (cursor_scale_w != underlying_scale_w || cursor_scale_h != underlying_scale_h) { @@ -10759,8 +10899,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, trace_amdgpu_dm_atomic_check_begin(state); ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_helper_check_modeset() failed\n"); goto fail; + } /* Check connector changes */ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { @@ -10776,6 +10918,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state = drm_atomic_get_crtc_state(state, new_con_state->crtc); if (IS_ERR(new_crtc_state)) { + DRM_DEBUG_DRIVER("drm_atomic_get_crtc_state() failed\n"); ret = PTR_ERR(new_crtc_state); goto fail; } @@ -10790,8 +10933,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = add_affected_mst_dsc_crtcs(state, crtc); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("add_affected_mst_dsc_crtcs() failed\n"); goto fail; + } } } } @@ -10806,19 +10951,25 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, continue; ret = amdgpu_dm_verify_lut_sizes(new_crtc_state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("amdgpu_dm_verify_lut_sizes() failed\n"); goto fail; + } if (!new_crtc_state->enable) continue; ret = drm_atomic_add_affected_connectors(state, crtc); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_add_affected_connectors() failed\n"); goto fail; + } ret = drm_atomic_add_affected_planes(state, crtc); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_add_affected_planes() failed\n"); goto fail; + } if (dm_old_crtc_state->dsc_force_changed) new_crtc_state->mode_changed = true; @@ -10855,6 +11006,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (IS_ERR(new_plane_state)) { ret = PTR_ERR(new_plane_state); + DRM_DEBUG_DRIVER("new_plane_state is BAD\n"); goto fail; } } @@ -10867,8 +11019,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_plane_state, false, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_update_plane_state() failed\n"); goto fail; + } } /* Disable all crtcs which require disable */ @@ -10878,8 +11032,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state, false, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("DISABLE: dm_update_crtc_state() failed\n"); goto fail; + } } /* Enable all crtcs which require enable */ @@ -10889,8 +11045,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state, true, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("ENABLE: dm_update_crtc_state() failed\n"); goto fail; + } } /* Add new/modified planes */ @@ -10900,20 +11058,26 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_plane_state, true, &lock_and_validation_needed); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_update_plane_state() failed\n"); goto fail; + } } /* Run this here since we want to validate the streams we created */ ret = drm_atomic_helper_check_planes(dev, state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_atomic_helper_check_planes() failed\n"); goto fail; + } /* Check cursor planes scaling */ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { ret = dm_check_crtc_cursor(state, crtc, new_crtc_state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_check_crtc_cursor() failed\n"); goto fail; + } } if (state->legacy_cursor_update) { @@ -11000,20 +11164,28 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, */ if (lock_and_validation_needed) { ret = dm_atomic_get_state(state, &dm_state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_atomic_get_state() failed\n"); goto fail; + } ret = do_aquire_global_lock(dev, state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("do_aquire_global_lock() failed\n"); goto fail; + } #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) + if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); goto fail; + } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("dm_update_mst_vcpi_slots_for_dsc() failed\n"); goto fail; + } #endif /* @@ -11023,12 +11195,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, * to get stuck in an infinite loop and hang eventually. */ ret = drm_dp_mst_atomic_check(state); - if (ret) + if (ret) { + DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); goto fail; - status = dc_validate_global_state(dc, dm_state->context, false); + } + status = dc_validate_global_state(dc, dm_state->context, true); if (status != DC_OK) { - drm_dbg_atomic(dev, - "DC global validation failure: %s (%d)", + DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", dc_status_to_str(status), status); ret = -EINVAL; goto fail; @@ -11150,7 +11323,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, sizeof(cmd.edid_cea) - sizeof(cmd.edid_cea.header); input->offset = offset; input->length = length; - input->total_length = total_length; + input->cea_total_length = total_length; memcpy(input->payload, data, length); res = dc_dmub_srv_cmd_with_reply_data(dm->dc->ctx->dmub_srv, &cmd); @@ -11457,8 +11630,10 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, return value; } -int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, struct dc_context *ctx, - uint8_t status_type, uint32_t *operation_result) +static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, + struct dc_context *ctx, + uint8_t status_type, + uint32_t *operation_result) { struct amdgpu_device *adev = ctx->driver_context; int return_status = -1; @@ -11529,3 +11704,24 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, (uint32_t *)operation_result); } + +/* + * Check whether seamless boot is supported. + * + * So far we only support seamless boot on CHIP_VANGOGH. + * If everything goes well, we may consider expanding + * seamless boot to other ASICs. + */ +bool check_seamless_boot_capability(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VANGOGH: + if (!adev->mman.keep_stolen_vga_memory) + return true; + break; + default: + break; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 37e61a88d49e..c98e402eab0c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,9 +50,9 @@ #define AMDGPU_DMUB_NOTIFICATION_MAX 5 -/** +/* * DMUB Async to Sync Mechanism Status - **/ + */ #define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 #define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 #define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 @@ -731,4 +731,7 @@ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx, unsigned int link_index, void *payload, void *operation_result); + +bool check_seamless_boot_capability(struct amdgpu_device *adev); + #endif /* __AMDGPU_DM_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a022e5bb30a5..a71177305bcd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -285,8 +285,12 @@ static int __set_input_tf(struct dc_transfer_func *func, } /** + * amdgpu_dm_verify_lut_sizes + * @crtc_state: the DRM CRTC state + * * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of * the expected size. + * * Returns 0 on success. */ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index cce062adc439..8a441a22c46e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -314,6 +314,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) ret = -EINVAL; goto cleanup; } + + if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) && + (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) { + DRM_DEBUG_DRIVER("No DP connector available for CRC source\n"); + ret = -EINVAL; + goto cleanup; + } + } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 0277685864c5..26719efa5396 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -824,6 +824,48 @@ static int dmub_fw_state_show(struct seq_file *m, void *data) return seq_write(m, state_base, state_size); } +/* psr_capability_show() - show eDP panel PSR capability + * + * The read function: sink_psr_capability_show + * Shows if sink has PSR capability or not. + * If yes - the PSR version is appended + * + * cat /sys/kernel/debug/dri/0/eDP-X/psr_capability + * + * Expected output: + * "Sink support: no\n" - if panel doesn't support PSR + * "Sink support: yes [0x01]\n" - if panel supports PSR1 + * "Driver support: no\n" - if driver doesn't support PSR + * "Driver support: yes [0x01]\n" - if driver supports PSR1 + */ +static int psr_capability_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct dc_link *link = aconnector->dc_link; + + if (!link) + return -ENODEV; + + if (link->type == dc_connection_none) + return -ENODEV; + + if (!(link->connector_signal & SIGNAL_TYPE_EDP)) + return -ENODEV; + + seq_printf(m, "Sink support: %s", yesno(link->dpcd_caps.psr_caps.psr_version != 0)); + if (link->dpcd_caps.psr_caps.psr_version) + seq_printf(m, " [0x%02x]", link->dpcd_caps.psr_caps.psr_version); + seq_puts(m, "\n"); + + seq_printf(m, "Driver support: %s", yesno(link->psr_settings.psr_feature_enabled)); + if (link->psr_settings.psr_version) + seq_printf(m, " [0x%02x]", link->psr_settings.psr_version); + seq_puts(m, "\n"); + + return 0; +} + /* * Returns the current and maximum output bpc for the connector. * Example usage: cat /sys/kernel/debug/dri/0/DP-1/output_bpc @@ -2467,6 +2509,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status); DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif DEFINE_SHOW_ATTRIBUTE(internal_display); +DEFINE_SHOW_ATTRIBUTE(psr_capability); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2712,6 +2755,138 @@ static const struct { {"internal_display", &internal_display_fops} }; +/* + * Returns supported customized link rates by this eDP panel. + * Example usage: cat /sys/kernel/debug/dri/0/eDP-x/ilr_setting + */ +static int edp_ilr_show(struct seq_file *m, void *unused) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private); + struct dc_link *link = aconnector->dc_link; + uint8_t supported_link_rates[16]; + uint32_t link_rate_in_khz; + uint32_t entry = 0; + uint8_t dpcd_rev; + + memset(supported_link_rates, 0, sizeof(supported_link_rates)); + dm_helpers_dp_read_dpcd(link->ctx, link, DP_SUPPORTED_LINK_RATES, + supported_link_rates, sizeof(supported_link_rates)); + + dpcd_rev = link->dpcd_caps.dpcd_rev.raw; + + if (dpcd_rev >= DP_DPCD_REV_13 && + (supported_link_rates[entry+1] != 0 || supported_link_rates[entry] != 0)) { + + for (entry = 0; entry < 16; entry += 2) { + link_rate_in_khz = (supported_link_rates[entry+1] * 0x100 + + supported_link_rates[entry]) * 200; + seq_printf(m, "[%d] %d kHz\n", entry/2, link_rate_in_khz); + } + } else { + seq_printf(m, "ILR is not supported by this eDP panel.\n"); + } + + return 0; +} + +/* + * Set supported customized link rate to eDP panel. + * + * echo <lane_count> <link_rate option> > ilr_setting + * + * for example, supported ILR : [0] 1620000 kHz [1] 2160000 kHz [2] 2430000 kHz ... + * echo 4 1 > /sys/kernel/debug/dri/0/eDP-x/ilr_setting + * to set 4 lanes and 2.16 GHz + */ +static ssize_t edp_ilr_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + struct dc_link *link = connector->dc_link; + struct amdgpu_device *adev = drm_to_adev(connector->base.dev); + struct dc *dc = (struct dc *)link->dc; + struct dc_link_settings prefer_link_settings; + char *wr_buf = NULL; + const uint32_t wr_buf_size = 40; + /* 0: lane_count; 1: link_rate */ + int max_param_num = 2; + uint8_t param_nums = 0; + long param[2]; + bool valid_input = true; + + if (size == 0) + return -EINVAL; + + wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL); + if (!wr_buf) + return -ENOMEM; + + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, + (long *)param, buf, + max_param_num, + ¶m_nums)) { + kfree(wr_buf); + return -EINVAL; + } + + if (param_nums <= 0) { + kfree(wr_buf); + return -EINVAL; + } + + switch (param[0]) { + case LANE_COUNT_ONE: + case LANE_COUNT_TWO: + case LANE_COUNT_FOUR: + break; + default: + valid_input = false; + break; + } + + if (param[1] >= link->dpcd_caps.edp_supported_link_rates_count) + valid_input = false; + + if (!valid_input) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("Invalid Input value. No HW will be programmed\n"); + prefer_link_settings.use_link_rate_set = false; + dc_link_set_preferred_training_settings(dc, NULL, NULL, link, true); + return size; + } + + /* save user force lane_count, link_rate to preferred settings + * spread spectrum will not be changed + */ + prefer_link_settings.link_spread = link->cur_link_settings.link_spread; + prefer_link_settings.lane_count = param[0]; + prefer_link_settings.use_link_rate_set = true; + prefer_link_settings.link_rate_set = param[1]; + prefer_link_settings.link_rate = link->dpcd_caps.edp_supported_link_rates[param[1]]; + + mutex_lock(&adev->dm.dc_lock); + dc_link_set_preferred_training_settings(dc, &prefer_link_settings, + NULL, link, false); + mutex_unlock(&adev->dm.dc_lock); + + kfree(wr_buf); + return size; +} + +static int edp_ilr_open(struct inode *inode, struct file *file) +{ + return single_open(file, edp_ilr_show, inode->i_private); +} + +static const struct file_operations edp_ilr_debugfs_fops = { + .owner = THIS_MODULE, + .open = edp_ilr_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = edp_ilr_write +}; + void connector_debugfs_init(struct amdgpu_dm_connector *connector) { int i; @@ -2726,11 +2901,14 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) } } if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) { + debugfs_create_file_unsafe("psr_capability", 0444, dir, connector, &psr_capability_fops); debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops); debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector, ¤t_backlight_fops); debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector, &target_backlight_fops); + debugfs_create_file("ilr_setting", 0644, dir, connector, + &edp_ilr_debugfs_fops); } for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) { @@ -2909,10 +3087,13 @@ static int crc_win_update_set(void *data, u64 val) struct amdgpu_device *adev = drm_to_adev(new_crtc->dev); struct crc_rd_work *crc_rd_wrk = adev->dm.crc_rd_wrk; + if (!crc_rd_wrk) + return 0; + if (val) { spin_lock_irq(&adev_to_drm(adev)->event_lock); spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock); - if (crc_rd_wrk && crc_rd_wrk->crtc) { + if (crc_rd_wrk->crtc) { old_crtc = crc_rd_wrk->crtc; old_acrtc = to_amdgpu_crtc(old_crtc); } @@ -3190,6 +3371,32 @@ static int disable_hpd_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(disable_hpd_ops, disable_hpd_get, disable_hpd_set, "%llu\n"); +#if defined(CONFIG_DRM_AMD_DC_DCN) +/* + * Temporary w/a to force sst sequence in M42D DP2 mst receiver + * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dp_set_mst_en_for_sst + */ +static int dp_force_sst_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + + adev->dm.dc->debug.set_mst_en_for_sst = val; + + return 0; +} + +static int dp_force_sst_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = data; + + *val = adev->dm.dc->debug.set_mst_en_for_sst; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(dp_set_mst_en_for_sst_ops, dp_force_sst_get, + dp_force_sst_set, "%llu\n"); +#endif + /* * Sets the DC visual confirm debug option from the given string. * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_visual_confirm @@ -3299,6 +3506,10 @@ void dtn_debugfs_init(struct amdgpu_device *adev) adev, &mst_topo_fops); debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev, &dtn_log_fops); +#if defined(CONFIG_DRM_AMD_DC_DCN) + debugfs_create_file("amdgpu_dm_dp_set_mst_en_for_sst", 0644, root, adev, + &dp_set_mst_en_for_sst_ops); +#endif debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, adev, &visual_confirm_fops); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 8cbeeb7c986d..29f07c26d080 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -83,16 +83,17 @@ static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps) * void * */ enum dc_edid_status dm_helpers_parse_edid_caps( - struct dc_context *ctx, + struct dc_link *link, const struct dc_edid *edid, struct dc_edid_caps *edid_caps) { + struct amdgpu_dm_connector *aconnector = link->priv; + struct drm_connector *connector = &aconnector->base; struct edid *edid_buf = (struct edid *) edid->raw_edid; struct cea_sad *sads; int sad_count = -1; int sadb_count = -1; int i = 0; - int j = 0; uint8_t *sadb = NULL; enum dc_edid_status result = EDID_OK; @@ -111,23 +112,11 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->manufacture_week = edid_buf->mfg_week; edid_caps->manufacture_year = edid_buf->mfg_year; - /* One of the four detailed_timings stores the monitor name. It's - * stored in an array of length 13. */ - for (i = 0; i < 4; i++) { - if (edid_buf->detailed_timings[i].data.other_data.type == 0xfc) { - while (j < 13 && edid_buf->detailed_timings[i].data.other_data.data.str.str[j]) { - if (edid_buf->detailed_timings[i].data.other_data.data.str.str[j] == '\n') - break; - - edid_caps->display_name[j] = - edid_buf->detailed_timings[i].data.other_data.data.str.str[j]; - j++; - } - } - } + drm_edid_get_monitor_name(edid_buf, + edid_caps->display_name, + AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS); - edid_caps->edid_hdmi = drm_detect_hdmi_monitor( - (struct edid *) edid->raw_edid); + edid_caps->edid_hdmi = connector->display_info.is_hdmi; sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) @@ -584,9 +573,18 @@ bool dm_helpers_dp_write_dsc_enable( ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); } - if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) { - ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); - DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable"); + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) { +#endif + ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Send DSC %s to SST RX\n", enable_dsc ? "enable" : "disable"); +#if defined(CONFIG_DRM_AMD_DC_DCN) + } else if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { + ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Send DSC %s to DP-HDMI PCON\n", enable_dsc ? "enable" : "disable"); + } +#endif } return (ret > 0); @@ -650,14 +648,8 @@ enum dc_edid_status dm_helpers_read_local_edid( /* We don't need the original edid anymore */ kfree(edid); - /* connector->display_info will be parsed from EDID and saved - * into drm_connector->display_info from edid by call stack - * below: - * drm_parse_ycbcr420_deep_color_info - * drm_parse_hdmi_forum_vsdb - * drm_parse_cea_ext - * drm_add_display_info - * drm_connector_update_edid_property + /* connector->display_info is parsed from EDID and saved + * into drm_connector->display_info * * drm_connector->display_info will be used by amdgpu_dm funcs, * like fill_stream_properties_from_drm_display_mode @@ -665,7 +657,7 @@ enum dc_edid_status dm_helpers_read_local_edid( amdgpu_dm_update_connector_after_detect(aconnector); edid_status = dm_helpers_parse_edid_caps( - ctx, + link, &sink->dc_edid, &sink->edid_caps); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 32a5ce09a62a..cc34a35d0bcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,8 @@ #include "dm_helpers.h" #include "dc_link_ddc.h" +#include "ddc_service_types.h" +#include "dpcd_defs.h" #include "i2caux_interface.h" #include "dmub_cmd.h" @@ -157,6 +159,16 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { }; #if defined(CONFIG_DRM_AMD_DC_DCN) +static bool needs_dsc_aux_workaround(struct dc_link *link) +{ + if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + (link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) && + link->dpcd_caps.sink_count.bits.SINK_COUNT >= 2) + return true; + + return false; +} + static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) { struct dc_sink *dc_sink = aconnector->dc_sink; @@ -166,7 +178,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto u8 *dsc_branch_dec_caps = NULL; aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port); -#if defined(CONFIG_HP_HOOK_WORKAROUND) + /* * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs * because it only check the dsc/fec caps of the "port variable" and not the dock @@ -176,10 +188,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux * */ - - if (!aconnector->dsc_aux && !port->parent->port_parent) + if (!aconnector->dsc_aux && !port->parent->port_parent && + needs_dsc_aux_workaround(aconnector->dc_link)) aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux; -#endif + if (!aconnector->dsc_aux) return false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index c022e56f9459..c510638b4f99 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -26,6 +26,73 @@ #include "amdgpu_dm_psr.h" #include "dc.h" #include "dm_helpers.h" +#include "amdgpu_dm.h" + +static bool link_get_psr_caps(struct dc_link *link) +{ + uint8_t psr_dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE]; + uint8_t edp_rev_dpcd_data; + + + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT, + psr_dpcd_data, sizeof(psr_dpcd_data))) + return false; + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_EDP_DPCD_REV, + &edp_rev_dpcd_data, sizeof(edp_rev_dpcd_data))) + return false; + + link->dpcd_caps.psr_caps.psr_version = psr_dpcd_data[0]; + link->dpcd_caps.psr_caps.edp_revision = edp_rev_dpcd_data; + +#ifdef CONFIG_DRM_AMD_DC_DCN + if (link->dpcd_caps.psr_caps.psr_version > 0x1) { + uint8_t alpm_dpcd_data; + uint8_t su_granularity_dpcd_data; + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_RECEIVER_ALPM_CAP, + &alpm_dpcd_data, sizeof(alpm_dpcd_data))) + return false; + + if (!dm_helpers_dp_read_dpcd(NULL, link, DP_PSR2_SU_Y_GRANULARITY, + &su_granularity_dpcd_data, sizeof(su_granularity_dpcd_data))) + return false; + + link->dpcd_caps.psr_caps.y_coordinate_required = psr_dpcd_data[1] & DP_PSR2_SU_Y_COORDINATE_REQUIRED; + link->dpcd_caps.psr_caps.su_granularity_required = psr_dpcd_data[1] & DP_PSR2_SU_GRANULARITY_REQUIRED; + + link->dpcd_caps.psr_caps.alpm_cap = alpm_dpcd_data & DP_ALPM_CAP; + link->dpcd_caps.psr_caps.standby_support = alpm_dpcd_data & (1 << 1); + + link->dpcd_caps.psr_caps.su_y_granularity = su_granularity_dpcd_data; + } +#endif + return true; +} + +#ifdef CONFIG_DRM_AMD_DC_DCN +static bool link_supports_psrsu(struct dc_link *link) +{ + struct dc *dc = link->ctx->dc; + + if (!dc->caps.dmcub_support) + return false; + + if (dc->ctx->dce_version < DCN_VERSION_3_1) + return false; + + if (!link->dpcd_caps.psr_caps.alpm_cap || + !link->dpcd_caps.psr_caps.y_coordinate_required) + return false; + + if (link->dpcd_caps.psr_caps.su_granularity_required && + !link->dpcd_caps.psr_caps.su_y_granularity) + return false; + + return true; +} +#endif /* * amdgpu_dm_set_psr_caps() - set link psr capabilities @@ -34,26 +101,34 @@ */ void amdgpu_dm_set_psr_caps(struct dc_link *link) { - uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE]; - if (!(link->connector_signal & SIGNAL_TYPE_EDP)) return; + if (link->type == dc_connection_none) return; - if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT, - dpcd_data, sizeof(dpcd_data))) { - link->dpcd_caps.psr_caps.psr_version = dpcd_data[0]; - - if (dpcd_data[0] == 0) { - link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; - link->psr_settings.psr_feature_enabled = false; - } else { + + if (!link_get_psr_caps(link)) { + DRM_ERROR("amdgpu: Failed to read PSR Caps!\n"); + return; + } + + if (link->dpcd_caps.psr_caps.psr_version == 0) { + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; + link->psr_settings.psr_feature_enabled = false; + + } else { +#ifdef CONFIG_DRM_AMD_DC_DCN + if (link_supports_psrsu(link)) + link->psr_settings.psr_version = DC_PSR_VERSION_SU_1; + else +#endif link->psr_settings.psr_version = DC_PSR_VERSION_1; - link->psr_settings.psr_feature_enabled = true; - } - DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled); + link->psr_settings.psr_feature_enabled = true; } + + DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled); + } /* diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index a4bef4364afd..1e385d55e7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2995,7 +2995,7 @@ static bool bios_parser2_construct( &bp->object_info_tbl.revision); if (bp->object_info_tbl.revision.major == 1 - && bp->object_info_tbl.revision.minor >= 4) { + && bp->object_info_tbl.revision.minor == 4) { struct display_object_info_table_v1_4 *tbl_v1_4; tbl_v1_4 = GET_IMAGE(struct display_object_info_table_v1_4, @@ -3004,8 +3004,10 @@ static bool bios_parser2_construct( return false; bp->object_info_tbl.v1_4 = tbl_v1_4; - } else + } else { + ASSERT(0); return false; + } dal_firmware_parser_init_cmd_tbl(bp); dal_bios_parser_init_cmd_tbl_helper2(&bp->cmd_helper, dce_version); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 6b248cd2a461..ec19678a0702 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -739,7 +739,9 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz); } -unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, uint32_t hw_internal_rev, uint32_t pci_revision_id) +static unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, + uint32_t hw_internal_rev, + uint32_t pci_revision_id) { /* for low power RV2 variants, the highest voltage level we want is 0 */ if ((chip_family == FAMILY_RV) && @@ -763,7 +765,7 @@ unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, uint32_t hw return 4; } -bool dcn_validate_bandwidth( +bool dcn10_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 26f96ee32472..9200c8ce02ba 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -308,8 +308,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base) case FAMILY_NV: if (ASICREV_IS_SIENNA_CICHLID_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { dcn3_clk_mgr_destroy(clk_mgr); - } - if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { + } else if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { dcn3_clk_mgr_destroy(clk_mgr); } if (ASICREV_IS_BEIGE_GOBY_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c index 76ec8ec92efd..60761ff3cbf1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c @@ -34,7 +34,7 @@ #include "rv1_clk_mgr_vbios_smu.h" #include "rv1_clk_mgr_clk.h" -void rv1_init_clocks(struct clk_mgr *clk_mgr) +static void rv1_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c index fe18bb9e19aa..06bab24d8e27 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c @@ -28,6 +28,8 @@ #include "reg_helper.h" #include <linux/delay.h> +#include "rv1_clk_mgr_vbios_smu.h" + #define MAX_INSTANCE 5 #define MAX_SEGMENT 5 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 2108bff49d4e..9f35f2e8f971 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -409,7 +409,7 @@ void dcn2_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.prev_p_state_change_support = true; } -void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) +static void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct pp_smu_funcs_nv *pp_smu = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c index db9950244c7b..fbdd0a92d146 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c @@ -74,42 +74,6 @@ static const struct clk_mgr_mask clk_mgr_mask = { CLK_COMMON_MASK_SH_LIST_DCN201_BASE(_MASK) }; -void dcn201_update_clocks_vbios(struct clk_mgr *clk_mgr, - struct dc_state *context, - bool safe_to_lower) -{ - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; - - bool update_dppclk = false; - bool update_dispclk = false; - - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->clks.dppclk_khz)) { - clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; - update_dppclk = true; - } - - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz)) { - clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; - update_dispclk = true; - } - - if (update_dppclk || update_dispclk) { - struct bp_set_dce_clock_parameters dce_clk_params; - struct dc_bios *bp = clk_mgr->ctx->dc_bios; - - if (update_dispclk) { - memset(&dce_clk_params, 0, sizeof(dce_clk_params)); - dce_clk_params.target_clock_frequency = new_clocks->dispclk_khz; - dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; - dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; - bp->funcs->set_dce_clock(bp, &dce_clk_params); - } - /* currently there is no DCECLOCK_TYPE_DPPCLK type defined in VBIOS interface. - * vbios program DPPCLK to the same DispCLK limitation - */ - } -} - static void dcn201_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); @@ -126,10 +90,8 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; bool update_dppclk = false; bool update_dispclk = false; - bool enter_display_off = false; bool dpp_clock_lowered = false; bool force_reset = false; bool p_state_change_support; @@ -145,10 +107,7 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base, dcn2_read_clocks_from_hw_dentist(clk_mgr_base); } - display_count = clk_mgr_helper_get_active_display_cnt(dc, context); - - if (display_count == 0) - enter_display_off = true; + clk_mgr_helper_get_active_display_cnt(dc, context); if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index ac2d4c4f04e4..fbda42313bfe 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -56,9 +56,7 @@ /* TODO: evaluate how to lower or disable all dcn clocks in screen off case */ -int rn_get_active_display_cnt_wa( - struct dc *dc, - struct dc_state *context) +static int rn_get_active_display_cnt_wa(struct dc *dc, struct dc_state *context) { int i, display_count; bool tmds_present = false; @@ -77,7 +75,8 @@ int rn_get_active_display_cnt_wa( const struct dc_link *link = dc->links[i]; /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->link_enc->funcs->is_dig_enabled(link->link_enc)) + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } @@ -88,7 +87,7 @@ int rn_get_active_display_cnt_wa( return display_count; } -void rn_set_low_power_state(struct clk_mgr *clk_mgr_base) +static void rn_set_low_power_state(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); @@ -122,7 +121,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, } -void rn_update_clocks(struct clk_mgr *clk_mgr_base, +static void rn_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) { @@ -437,25 +436,14 @@ static void rn_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an } } -/* This function produce translated logical clk state values*/ -void rn_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s) -{ - struct clk_state_registers_and_bypass sb = { 0 }; - struct clk_log_info log_info = { 0 }; - - rn_dump_clk_registers(&sb, clk_mgr_base, &log_info); - - s->dprefclk_khz = sb.dprefclk * 1000; -} - -void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base) +static void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); rn_vbios_smu_enable_pme_wa(clk_mgr); } -void rn_init_clocks(struct clk_mgr *clk_mgr) +static void rn_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 9f7eed6688c4..8161a6ae410d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -33,6 +33,8 @@ #include "mp/mp_12_0_0_offset.h" #include "mp/mp_12_0_0_sh_mask.h" +#include "rn_clk_mgr_vbios_smu.h" + #define REG(reg_name) \ (MP0_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) @@ -86,7 +88,9 @@ static uint32_t rn_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsig } -int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param) +static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + unsigned int msg_id, + unsigned int param) { uint32_t result; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 1861a147a7fa..f977f29907df 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -252,6 +252,7 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, bool update_dispclk = false; bool enter_display_off = false; bool dpp_clock_lowered = false; + bool update_pstate_unsupported_clk = false; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; bool force_reset = false; bool update_uclk = false; @@ -299,13 +300,28 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0); - if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + + // invalidate the current P-State forced min in certain dc_mode_softmax situations + if (dc->clk_mgr->dc_mode_softmax_enabled && safe_to_lower && !p_state_change_support) { + if ((new_clocks->dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) != + (clk_mgr_base->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)) + update_pstate_unsupported_clk = true; + } + + if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support) || + update_pstate_unsupported_clk) { clk_mgr_base->clks.p_state_change_support = p_state_change_support; /* to disable P-State switching, set UCLK min = max */ - if (!clk_mgr_base->clks.p_state_change_support) - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + if (!clk_mgr_base->clks.p_state_change_support) { + if (dc->clk_mgr->dc_mode_softmax_enabled && + new_clocks->dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + else + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz); + } } /* Always update saved value, even if new value not set due to P-State switching unsupported */ @@ -421,6 +437,24 @@ static void dcn3_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz); } +static void dcn3_set_max_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz); +} +static void dcn3_set_min_memclk(struct clk_mgr *clk_mgr_base, unsigned int memclk_mhz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, memclk_mhz); +} + /* Get current memclk states, update bounding box */ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) { @@ -436,6 +470,8 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &num_levels); clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; + clk_mgr_base->bw_params->dc_mode_softmax_memclk = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + /* Refresh bounding box */ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); @@ -505,6 +541,8 @@ static struct clk_mgr_funcs dcn3_funcs = { .notify_wm_ranges = dcn3_notify_wm_ranges, .set_hard_min_memclk = dcn3_set_hard_min_memclk, .set_hard_max_memclk = dcn3_set_hard_max_memclk, + .set_max_memclk = dcn3_set_max_memclk, + .set_min_memclk = dcn3_set_min_memclk, .get_memclk_states_from_smu = dcn3_get_memclk_states_from_smu, .are_clock_states_equal = dcn3_are_clock_states_equal, .enable_pme_wa = dcn3_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c index 6ea642615854..d9920d91838d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c @@ -88,9 +88,9 @@ static uint32_t dcn301_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, u return res_val; } -int dcn301_smu_send_msg_with_param( - struct clk_mgr_internal *clk_mgr, - unsigned int msg_id, unsigned int param) +static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + unsigned int msg_id, + unsigned int param) { uint32_t result; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 3eee32faa208..8f78e62b28b7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -89,9 +89,9 @@ static int vg_get_active_display_cnt_wa( return display_count; } -void vg_update_clocks(struct clk_mgr *clk_mgr_base, - struct dc_state *context, - bool safe_to_lower) +static void vg_update_clocks(struct clk_mgr *clk_mgr_base, + struct dc_state *context, + bool safe_to_lower) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; @@ -367,18 +367,6 @@ static void vg_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an } } -/* This function produce translated logical clk state values*/ -void vg_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s) -{ - - struct clk_state_registers_and_bypass sb = { 0 }; - struct clk_log_info log_info = { 0 }; - - vg_dump_clk_registers(&sb, clk_mgr_base, &log_info); - - s->dprefclk_khz = sb.dprefclk * 1000; -} - static void vg_enable_pme_wa(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); @@ -386,7 +374,7 @@ static void vg_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn301_smu_enable_pme_wa(clk_mgr); } -void vg_init_clocks(struct clk_mgr *clk_mgr) +static void vg_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index f4c9a458ace8..412cc6a716f7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -66,7 +66,7 @@ #define TO_CLK_MGR_DCN31(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn31, base) -int dcn31_get_active_display_cnt_wa( +static int dcn31_get_active_display_cnt_wa( struct dc *dc, struct dc_state *context) { @@ -118,7 +118,7 @@ static void dcn31_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) } } -static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, +void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) { @@ -284,7 +284,7 @@ static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn31_smu_enable_pme_wa(clk_mgr); } -static void dcn31_init_clocks(struct clk_mgr *clk_mgr) +void dcn31_init_clocks(struct clk_mgr *clk_mgr) { memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate @@ -294,7 +294,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; } -static bool dcn31_are_clock_states_equal(struct dc_clocks *a, +bool dcn31_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b) { if (a->dispclk_khz != b->dispclk_khz) @@ -540,10 +540,9 @@ static unsigned int find_clk_for_voltage( return clock; } -void dcn31_clk_mgr_helper_populate_bw_params( - struct clk_mgr_internal *clk_mgr, - struct integrated_info *bios_info, - const DpmClocks_t *clock_table) +static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, + struct integrated_info *bios_info, + const DpmClocks_t *clock_table) { int i, j; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index f8f100535526..961b10a49486 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -39,6 +39,13 @@ struct clk_mgr_dcn31 { struct dcn31_smu_watermark_set smu_wm_set; }; +bool dcn31_are_clock_states_equal(struct dc_clocks *a, + struct dc_clocks *b); +void dcn31_init_clocks(struct clk_mgr *clk_mgr); +void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, + struct dc_state *context, + bool safe_to_lower); + void dcn31_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_dcn31 *clk_mgr, struct pp_smu_funcs *pp_smu, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index 8c2b77eb9459..b7ace235a2d5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -95,9 +95,9 @@ static uint32_t dcn31_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un return res_val; } -int dcn31_smu_send_msg_with_param( - struct clk_mgr_internal *clk_mgr, - unsigned int msg_id, unsigned int param) +static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + unsigned int msg_id, + unsigned int param) { uint32_t result; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 0ded4decee05..c250f6de5136 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -221,9 +221,9 @@ static bool create_links( link = link_create(&link_init_params); if (link) { - dc->links[dc->link_count] = link; - link->dc = dc; - ++dc->link_count; + dc->links[dc->link_count] = link; + link->dc = dc; + ++dc->link_count; } } @@ -808,6 +808,10 @@ void dc_stream_set_static_screen_params(struct dc *dc, static void dc_destruct(struct dc *dc) { + // reset link encoder assignment table on destruct + if (dc->res_pool && dc->res_pool->funcs->link_encs_assign) + link_enc_cfg_init(dc, dc->current_state); + if (dc->current_state) { dc_release_state(dc->current_state); dc->current_state = NULL; @@ -1016,8 +1020,6 @@ static bool dc_construct(struct dc *dc, goto fail; } - dc_resource_state_construct(dc, dc->current_state); - if (!create_links(dc, init_params->num_virtual_links)) goto fail; @@ -1027,8 +1029,7 @@ static bool dc_construct(struct dc *dc, if (!create_link_encoders(dc)) goto fail; - /* Initialise DIG link encoder resource tracking variables. */ - link_enc_cfg_init(dc, dc->current_state); + dc_resource_state_construct(dc, dc->current_state); return true; @@ -1421,22 +1422,29 @@ static void program_timing_sync( status->timing_sync_info.master = false; } - /* remove any other unblanked pipes as they have already been synced */ - for (j = j + 1; j < group_size; j++) { - bool is_blanked; - if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) - is_blanked = - pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); - else - is_blanked = - pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); - if (!is_blanked) { - group_size--; - pipe_set[j] = pipe_set[group_size]; - j--; + /* remove any other pipes that are already been synced */ + if (dc->config.use_pipe_ctx_sync_logic) { + /* check pipe's syncd to decide which pipe to be removed */ + for (j = 1; j < group_size; j++) { + if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } else + /* link slave pipe's syncd with master pipe */ + pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd; } - } + } else { + /* remove any other pipes by checking valid plane */ + for (j = j + 1; j < group_size; j++) { + if (pipe_set[j]->plane_state) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } + } + } if (group_size > 1) { if (sync_type == TIMING_SYNCHRONIZABLE) { @@ -1830,6 +1838,19 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context) dc_stream_log(dc, stream); } + /* + * Previous validation was perfomred with fast_validation = true and + * the full DML state required for hardware programming was skipped. + * + * Re-validate here to calculate these parameters / watermarks. + */ + result = dc_validate_global_state(dc, context, false); + if (result != DC_OK) { + DC_LOG_ERROR("DC commit global validation failure: %s (%d)", + dc_status_to_str(result), result); + return result; + } + result = dc_commit_state_no_check(dc, context); return (result == DC_OK); @@ -2870,7 +2891,8 @@ static void commit_planes_for_stream(struct dc *dc, #endif if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -2979,12 +3001,12 @@ static void commit_planes_for_stream(struct dc *dc, #ifdef CONFIG_DRM_AMD_DC_DCN if (dc->debug.validate_dml_output) { for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx cur_pipe = context->res_ctx.pipe_ctx[i]; - if (cur_pipe.stream == NULL) + struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i]; + if (cur_pipe->stream == NULL) continue; - cur_pipe.plane_res.hubp->funcs->validate_dml_output( - cur_pipe.plane_res.hubp, dc->ctx, + cur_pipe->plane_res.hubp->funcs->validate_dml_output( + cur_pipe->plane_res.hubp, dc->ctx, &context->res_ctx.pipe_ctx[i].rq_regs, &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs); @@ -3426,7 +3448,7 @@ struct dc_sink *dc_link_add_remote_sink( goto fail_add_sink; edid_status = dm_helpers_parse_edid_caps( - link->ctx, + link, &dc_sink->dc_edid, &dc_sink->edid_caps); @@ -3583,6 +3605,98 @@ void dc_lock_memory_clock_frequency(struct dc *dc) core_link_enable_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); } +static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memclk_mhz) +{ + struct dc_state *context = dc->current_state; + struct hubp *hubp; + struct pipe_ctx *pipe; + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream != NULL) { + dc->hwss.disable_pixel_data(dc, pipe, true); + + // wait for double buffer + pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VACTIVE); + pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK); + pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VACTIVE); + + hubp = pipe->plane_res.hubp; + hubp->funcs->set_blank_regs(hubp, true); + } + } + + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); + dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream != NULL) { + dc->hwss.disable_pixel_data(dc, pipe, false); + + hubp = pipe->plane_res.hubp; + hubp->funcs->set_blank_regs(hubp, false); + } + } +} + + +/** + * dc_enable_dcmode_clk_limit() - lower clocks in dc (battery) mode + * @dc: pointer to dc of the dm calling this + * @enable: True = transition to DC mode, false = transition back to AC mode + * + * Some SoCs define additional clock limits when in DC mode, DM should + * invoke this function when the platform undergoes a power source transition + * so DC can apply/unapply the limit. This interface may be disruptive to + * the onscreen content. + * + * Context: Triggered by OS through DM interface, or manually by escape calls. + * Need to hold a dclock when doing so. + * + * Return: none (void function) + * + */ +void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) +{ + uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; + unsigned int softMax, maxDPM, funcMin; + bool p_state_change_support; + + if (!ASICREV_IS_BEIGE_GOBY_P(hw_internal_rev)) + return; + + softMax = dc->clk_mgr->bw_params->dc_mode_softmax_memclk; + maxDPM = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz; + funcMin = (dc->clk_mgr->clks.dramclk_khz + 999) / 1000; + p_state_change_support = dc->clk_mgr->clks.p_state_change_support; + + if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) { + if (p_state_change_support) { + if (funcMin <= softMax) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax); + // else: No-Op + } else { + if (funcMin <= softMax) + blank_and_force_memclk(dc, true, softMax); + // else: No-Op + } + } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) { + if (p_state_change_support) { + if (funcMin <= softMax) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM); + // else: No-Op + } else { + if (funcMin <= softMax) + blank_and_force_memclk(dc, true, maxDPM); + // else: No-Op + } + } + dc->clk_mgr->dc_mode_softmax_enabled = enable; +} bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_state *plane, struct dc_cursor_attributes *cursor_attr) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 60544788e911..3d75f56a939c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -270,10 +270,10 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) /* Link may not have physical HPD pin. */ if (link->ep_type != DISPLAY_ENDPOINT_PHY) { - if (link->hpd_status) - *type = dc_connection_single; - else + if (link->is_hpd_pending || !link->hpd_status) *type = dc_connection_none; + else + *type = dc_connection_single; return true; } @@ -758,6 +758,18 @@ static bool detect_dp(struct dc_link *link, dal_ddc_service_set_transaction_type(link->ddc, sink_caps->transaction_type); +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* Apply work around for tunneled MST on certain USB4 docks. Always use DSC if dock + * reports DSC support. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + link->type == dc_connection_mst_branch && + link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT && + !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around) + link->wa_flags.dpia_mst_dsc_always_on = true; +#endif + #if defined(CONFIG_DRM_AMD_DC_HDCP) /* In case of fallback to SST when topology discovery below fails * HDCP caps will be querried again later by the upper layer (caller @@ -1203,6 +1215,10 @@ static bool dc_link_detect_helper(struct dc_link *link, LINK_INFO("link=%d, mst branch is now Disconnected\n", link->link_index); + /* Disable work around which keeps DSC on for tunneled MST on certain USB4 docks. */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + link->wa_flags.dpia_mst_dsc_always_on = false; + dm_helpers_dp_mst_stop_top_mgr(link->ctx, link); link->mst_stream_alloc_table.stream_count = 0; @@ -1999,6 +2015,57 @@ static enum dc_status enable_link_dp_mst( return enable_link_dp(state, pipe_ctx); } +void dc_link_blank_all_dp_displays(struct dc *dc) +{ + unsigned int i; + uint8_t dpcd_power_state = '\0'; + enum dc_status status = DC_ERROR_UNEXPECTED; + + for (i = 0; i < dc->link_count; i++) { + if ((dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) || + (dc->links[i]->priv == NULL) || (dc->links[i]->local_sink == NULL)) + continue; + + /* DP 2.0 spec requires that we read LTTPR caps first */ + dp_retrieve_lttpr_cap(dc->links[i]); + /* if any of the displays are lit up turn them off */ + status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, + &dpcd_power_state, sizeof(dpcd_power_state)); + + if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) + dc_link_blank_dp_stream(dc->links[i], true); + } + +} + +void dc_link_blank_dp_stream(struct dc_link *link, bool hw_init) +{ + unsigned int j; + struct dc *dc = link->ctx->dc; + enum signal_type signal = link->connector_signal; + + if ((signal == SIGNAL_TYPE_EDP) || + (signal == SIGNAL_TYPE_DISPLAY_PORT)) { + if (link->ep_type == DISPLAY_ENDPOINT_PHY && + link->link_enc->funcs->get_dig_frontend && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { + unsigned int fe = link->link_enc->funcs->get_dig_frontend(link->link_enc); + + if (fe != ENGINE_ID_UNKNOWN) + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (fe == dc->res_pool->stream_enc[j]->id) { + dc->res_pool->stream_enc[j]->funcs->dp_blank(link, + dc->res_pool->stream_enc[j]); + break; + } + } + } + + if ((!link->wa_flags.dp_keep_receiver_powered) || hw_init) + dp_receiver_power_ctrl(link, false); + } +} + static bool get_ext_hdmi_settings(struct pipe_ctx *pipe_ctx, enum engine_id eng_id, struct ext_hdmi_settings *settings) @@ -2699,8 +2766,23 @@ static bool dp_active_dongle_validate_timing( return false; } +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps > 0) { // DP to HDMI FRL converter + struct dc_crtc_timing outputTiming = *timing; + + if (timing->flags.DSC && !timing->dsc_cfg.is_frl) + /* DP input has DSC, HDMI FRL output doesn't have DSC, remove DSC from output timing */ + outputTiming.flags.DSC = 0; + if (dc_bandwidth_in_kbps_from_timing(&outputTiming) > dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps) + return false; + } else { // DP to HDMI TMDS converter + if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk_in_khz * 10)) + return false; + } +#else if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk_in_khz * 10)) return false; +#endif #if defined(CONFIG_DRM_AMD_DC_DCN) } @@ -2946,7 +3028,7 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active link->psr_settings.psr_power_opt = *power_opts; if (psr != NULL && link->psr_settings.psr_feature_enabled && psr->funcs->psr_set_power_opt) - psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt); + psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt, panel_inst); } /* Enable or Disable PSR */ @@ -3334,7 +3416,8 @@ static void dc_log_vcp_x_y(const struct dc_link *link, struct fixed31_32 avg_tim /* * Payload allocation/deallocation for SST introduced in DP2.0 */ -enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, bool allocate) +static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, + bool allocate) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; @@ -3913,9 +3996,6 @@ static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp; #if defined(CONFIG_DRM_AMD_DC_DCN) struct link_encoder *link_enc = NULL; - struct dc_state *state = pipe_ctx->stream->ctx->dc->current_state; - struct link_enc_assignment link_enc_assign; - int i; #endif if (cp_psp && cp_psp->funcs.update_stream_config) { @@ -3943,18 +4023,15 @@ static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) pipe_ctx->stream->ctx->dc, pipe_ctx->stream); } + ASSERT(link_enc); + // Initialize PHY ID with ABCDE - 01234 mapping except when it is B0 config.phy_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; - //look up the link_enc_assignment for the current pipe_ctx - for (i = 0; i < state->stream_count; i++) { - if (pipe_ctx->stream == state->streams[i]) { - link_enc_assign = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i]; - } - } // Add flag to guard new A0 DIG mapping - if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true) { - config.dig_be = link_enc_assign.eng_id; + if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true && + pipe_ctx->stream->link->dc->ctx->dce_version == DCN_VERSION_3_1) { + config.dig_be = link_enc->preferred_engine; config.dio_output_type = pipe_ctx->stream->link->ep_type; config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; } else { @@ -3966,10 +4043,8 @@ static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == true && link_enc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - link_enc = link_enc_assign.stream->link_enc; - // enum ID 1-4 maps to DPIA PHY ID 0-3 - config.phy_idx = link_enc_assign.ep_id.link_id.enum_id - ENUM_ID_1; + config.phy_idx = pipe_ctx->stream->link->link_id.enum_id - ENUM_ID_1; } else { // for non DPIA mode over B0, ABCDE maps to 01564 switch (link_enc->transmitter) { @@ -4242,7 +4317,8 @@ void core_link_enable_stream( /* eDP lit up by bios already, no need to enable again. */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && apply_edp_fast_boot_optimization && - !pipe_ctx->stream->timing.flags.DSC) { + !pipe_ctx->stream->timing.flags.DSC && + !pipe_ctx->next_odm_pipe) { pipe_ctx->stream->dpms_off = false; #if defined(CONFIG_DRM_AMD_DC_HDCP) update_psp_stream_config(pipe_ctx, false); @@ -4749,6 +4825,8 @@ bool dc_link_should_enable_fec(const struct dc_link *link) link->local_sink && link->local_sink->edid_caps.panel_patch.disable_fec) || (link->connector_signal == SIGNAL_TYPE_EDP + // enable FEC for EDP if DSC is supported + && link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT == false )) is_fec_disable = true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 60539b1f2a80..24dc662ec3e4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -626,7 +626,7 @@ bool dal_ddc_submit_aux_command(struct ddc_service *ddc, do { struct aux_payload current_payload; bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) >= - payload->length ? true : false; + payload->length; uint32_t payload_length = is_end_of_payload ? payload->length - retrieved : DEFAULT_AUX_MAX_DATA_SIZE; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cb7bf9148904..8a35370da867 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -100,6 +100,7 @@ static const struct dp_lt_fallback_entry dp_lt_fallbacks[] = { #endif static bool decide_fallback_link_setting( + struct dc_link *link, struct dc_link_settings initial_link_settings, struct dc_link_settings *current_link_setting, enum link_training_result training_result); @@ -398,6 +399,223 @@ static uint8_t get_dpcd_link_rate(const struct dc_link_settings *link_settings) } #endif +static void vendor_specific_lttpr_wa_one_start(struct dc_link *link) +{ + const uint8_t vendor_lttpr_write_data[4] = {0x1, 0x50, 0x63, 0xff}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; + + if (offset != 0xFF) + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* W/A for certain LTTPR to reset their lane settings, part one of two */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data[0], + sizeof(vendor_lttpr_write_data)); +} + +static void vendor_specific_lttpr_wa_one_end( + struct dc_link *link, + uint8_t retry_count) +{ + const uint8_t vendor_lttpr_write_data[4] = {0x1, 0x50, 0x63, 0x0}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; + + if (!retry_count) { + if (offset != 0xFF) + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* W/A for certain LTTPR to reset their lane settings, part two of two */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data[0], + sizeof(vendor_lttpr_write_data)); + } +} + +static void vendor_specific_lttpr_wa_one_two( + struct dc_link *link, + const uint8_t rate) +{ + if (link->apply_vendor_specific_lttpr_link_rate_wa) { + uint8_t toggle_rate = 0x0; + + if (rate == 0x6) + toggle_rate = 0xA; + else + toggle_rate = 0x6; + + if (link->vendor_specific_lttpr_link_rate_wa == rate) { + /* W/A for certain LTTPR to reset internal state for link training */ + core_link_write_dpcd( + link, + DP_LINK_BW_SET, + &toggle_rate, + 1); + } + + /* Store the last attempted link rate for this link */ + link->vendor_specific_lttpr_link_rate_wa = rate; + } +} + +static void vendor_specific_lttpr_wa_three( + struct dc_link *link, + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX]) +{ + const uint8_t vendor_lttpr_write_data_vs[3] = {0x0, 0x53, 0x63}; + const uint8_t vendor_lttpr_write_data_pe[3] = {0x0, 0x54, 0x63}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; + uint32_t vendor_lttpr_read_address = 0xF0053; + uint8_t dprx_vs = 0; + uint8_t dprx_pe = 0; + uint8_t lane; + + if (offset != 0xFF) { + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + vendor_lttpr_read_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + } + + /* W/A to read lane settings requested by DPRX */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_read_dpcd( + link, + vendor_lttpr_read_address, + &dprx_vs, + 1); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + core_link_read_dpcd( + link, + vendor_lttpr_read_address, + &dprx_pe, + 1); + + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { + dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_LANE = (dprx_vs >> (2 * lane)) & 0x3; + dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_LANE = (dprx_pe >> (2 * lane)) & 0x3; + } +} + +static void vendor_specific_lttpr_wa_three_dpcd( + struct dc_link *link, + union dpcd_training_lane dpcd_lane_adjust[LANE_COUNT_DP_MAX]) +{ + union lane_adjust lane_adjust[LANE_COUNT_DP_MAX]; + uint8_t lane = 0; + + vendor_specific_lttpr_wa_three(link, lane_adjust); + + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { + dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_SET = lane_adjust[lane].bits.VOLTAGE_SWING_LANE; + dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_SET = lane_adjust[lane].bits.PRE_EMPHASIS_LANE; + } +} + +static void vendor_specific_lttpr_wa_four( + struct dc_link *link, + bool apply_wa) +{ + const uint8_t vendor_lttpr_write_data_one[4] = {0x1, 0x55, 0x63, 0x8}; + const uint8_t vendor_lttpr_write_data_two[4] = {0x1, 0x55, 0x63, 0x0}; + const uint8_t offset = dp_convert_to_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t vendor_lttpr_write_address = 0xF004F; +#if defined(CONFIG_DRM_AMD_DC_DP2_0) + uint8_t sink_status = 0; + uint8_t i; +#endif + + if (offset != 0xFF) + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* W/A to pass through DPCD write of TPS=0 to DPRX */ + if (apply_wa) { + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_one[0], + sizeof(vendor_lttpr_write_data_one)); + } + + /* clear training pattern set */ + dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + + if (apply_wa) { + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_two[0], + sizeof(vendor_lttpr_write_data_two)); + } + +#if defined(CONFIG_DRM_AMD_DC_DP2_0) + /* poll for intra-hop disable */ + for (i = 0; i < 10; i++) { + if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) && + (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0) + break; + udelay(1000); + } +#endif +} + +static void vendor_specific_lttpr_wa_five( + struct dc_link *link, + const union dpcd_training_lane dpcd_lane_adjust[LANE_COUNT_DP_MAX], + uint8_t lane_count) +{ + const uint32_t vendor_lttpr_write_address = 0xF004F; + const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF}; + uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0}; + uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0}; + uint8_t lane = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + dpcd_lane_adjust[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Force LTTPR to output desired VS and PE */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_reset[0], + sizeof(vendor_lttpr_write_data_reset)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); +} + enum dc_status dpcd_set_link_settings( struct dc_link *link, const struct link_training_settings *lt_settings) @@ -430,7 +648,7 @@ enum dc_status dpcd_set_link_settings( status = core_link_write_dpcd(link, DP_LANE_COUNT_SET, &lane_count_set.raw, 1); - if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13 && lt_settings->link_settings.use_link_rate_set == true) { rate = 0; /* WA for some MUX chips that will power down with eDP and lose supported @@ -452,6 +670,15 @@ enum dc_status dpcd_set_link_settings( #else rate = (uint8_t) (lt_settings->link_settings.link_rate); #endif + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) + vendor_specific_lttpr_wa_one_start(link); + + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)) + vendor_specific_lttpr_wa_one_two(link, rate); + status = core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); } @@ -1211,6 +1438,12 @@ static enum link_training_result perform_channel_equalization_sequence( dp_translate_training_aux_read_interval( link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + wait_time_microsec = 16000; + } + dp_wait_for_training_aux_rd_interval( link, wait_time_microsec); @@ -1314,6 +1547,11 @@ static enum link_training_result perform_clock_recovery_sequence( if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) wait_time_microsec = TRAINING_AUX_RD_INTERVAL; + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)) { + wait_time_microsec = 16000; + } + dp_wait_for_training_aux_rd_interval( link, wait_time_microsec); @@ -1329,6 +1567,13 @@ static enum link_training_result perform_clock_recovery_sequence( dpcd_lane_adjust, offset); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + vendor_specific_lttpr_wa_one_end(link, retry_count); + vendor_specific_lttpr_wa_three(link, dpcd_lane_adjust); + } + /* 5. check CR done*/ if (dp_is_cr_done(lane_count, dpcd_lane_status)) return LINK_TRAINING_SUCCESS; @@ -2138,7 +2383,7 @@ static enum link_training_result dp_perform_8b_10b_link_training( } for (lane = 0; lane < (uint8_t)lt_settings->link_settings.lane_count; lane++) - lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET = VOLTAGE_SWING_LEVEL0; + lt_settings->dpcd_lane_settings[lane].raw = 0; } if (status == LINK_TRAINING_SUCCESS) { @@ -2203,7 +2448,14 @@ enum link_training_result dc_link_dp_perform_link_training( <_settings); /* reset previous training states */ - dpcd_exit_training_mode(link); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + link->apply_vendor_specific_lttpr_link_rate_wa = true; + vendor_specific_lttpr_wa_four(link, true); + } else { + dpcd_exit_training_mode(link); + } /* configure link prior to entering training mode */ dpcd_configure_lttpr_mode(link, <_settings); @@ -2223,8 +2475,17 @@ enum link_training_result dc_link_dp_perform_link_training( else ASSERT(0); - /* exit training mode and switch to video idle */ - dpcd_exit_training_mode(link); + /* exit training mode */ + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + link->apply_vendor_specific_lttpr_link_rate_wa = false; + vendor_specific_lttpr_wa_four(link, (status != LINK_TRAINING_SUCCESS)); + } else { + dpcd_exit_training_mode(link); + } + + /* switch to video idle */ if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) status = dp_transition_to_video_idle(link, <_settings, @@ -2349,7 +2610,7 @@ bool perform_link_training_with_retries( uint32_t req_bw; uint32_t link_bw; - decide_fallback_link_setting(*link_setting, ¤t_setting, status); + decide_fallback_link_setting(link, *link_setting, ¤t_setting, status); /* Fail link training if reduced link bandwidth no longer meets * stream requirements. */ @@ -2864,7 +3125,7 @@ bool dp_verify_link_cap( * based on the actual mode we're driving */ dp_disable_link_phy(link, link->connector_signal); - } while (!success && decide_fallback_link_setting( + } while (!success && decide_fallback_link_setting(link, initial_link_settings, cur, status)); /* Link Training failed for all Link Settings @@ -3116,6 +3377,7 @@ static bool decide_fallback_link_setting_max_bw_policy( * and no further fallback could be done */ static bool decide_fallback_link_setting( + struct dc_link *link, struct dc_link_settings initial_link_settings, struct dc_link_settings *current_link_setting, enum link_training_result training_result) @@ -3123,7 +3385,8 @@ static bool decide_fallback_link_setting( if (!current_link_setting) return false; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING) + if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING || + link->dc->debug.force_dp2_lt_fallback_method) return decide_fallback_link_setting_max_bw_policy(&initial_link_settings, current_link_setting); #endif @@ -3346,6 +3609,148 @@ bool decide_edp_link_settings(struct dc_link *link, struct dc_link_settings *lin return false; } +static bool decide_edp_link_settings_with_dsc(struct dc_link *link, + struct dc_link_settings *link_setting, + uint32_t req_bw, + enum dc_link_rate max_link_rate) +{ + struct dc_link_settings initial_link_setting; + struct dc_link_settings current_link_setting; + uint32_t link_bw; + + unsigned int policy = 0; + + policy = link->ctx->dc->debug.force_dsc_edp_policy; + if (max_link_rate == LINK_RATE_UNKNOWN) + max_link_rate = link->verified_link_cap.link_rate; + /* + * edp_supported_link_rates_count is only valid for eDP v1.4 or higher. + * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h" + */ + if ((link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 || + link->dpcd_caps.edp_supported_link_rates_count == 0)) { + /* for DSC enabled case, we search for minimum lane count */ + memset(&initial_link_setting, 0, sizeof(initial_link_setting)); + initial_link_setting.lane_count = LANE_COUNT_ONE; + initial_link_setting.link_rate = LINK_RATE_LOW; + initial_link_setting.link_spread = LINK_SPREAD_DISABLED; + initial_link_setting.use_link_rate_set = false; + initial_link_setting.link_rate_set = 0; + current_link_setting = initial_link_setting; + if (req_bw > dc_link_bandwidth_kbps(link, &link->verified_link_cap)) + return false; + + /* search for the minimum link setting that: + * 1. is supported according to the link training result + * 2. could support the b/w requested by the timing + */ + while (current_link_setting.link_rate <= + max_link_rate) { + link_bw = dc_link_bandwidth_kbps( + link, + ¤t_link_setting); + if (req_bw <= link_bw) { + *link_setting = current_link_setting; + return true; + } + if (policy) { + /* minimize lane */ + if (current_link_setting.link_rate < max_link_rate) { + current_link_setting.link_rate = + increase_link_rate( + current_link_setting.link_rate); + } else { + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + current_link_setting.link_rate = initial_link_setting.link_rate; + } else + break; + } + } else { + /* minimize link rate */ + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + } else { + current_link_setting.link_rate = + increase_link_rate( + current_link_setting.link_rate); + current_link_setting.lane_count = + initial_link_setting.lane_count; + } + } + } + return false; + } + + /* if optimize edp link is supported */ + memset(&initial_link_setting, 0, sizeof(initial_link_setting)); + initial_link_setting.lane_count = LANE_COUNT_ONE; + initial_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[0]; + initial_link_setting.link_spread = LINK_SPREAD_DISABLED; + initial_link_setting.use_link_rate_set = true; + initial_link_setting.link_rate_set = 0; + current_link_setting = initial_link_setting; + + /* search for the minimum link setting that: + * 1. is supported according to the link training result + * 2. could support the b/w requested by the timing + */ + while (current_link_setting.link_rate <= + max_link_rate) { + link_bw = dc_link_bandwidth_kbps( + link, + ¤t_link_setting); + if (req_bw <= link_bw) { + *link_setting = current_link_setting; + return true; + } + if (policy) { + /* minimize lane */ + if (current_link_setting.link_rate_set < + link->dpcd_caps.edp_supported_link_rates_count + && current_link_setting.link_rate < max_link_rate) { + current_link_setting.link_rate_set++; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + } else { + if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + } else + break; + } + } else { + /* minimize link rate */ + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + } else { + if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting.link_rate_set++; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + current_link_setting.lane_count = + initial_link_setting.lane_count; + } else + break; + } + } + } + return false; +} + static bool decide_mst_link_settings(const struct dc_link *link, struct dc_link_settings *link_setting) { *link_setting = link->verified_link_cap; @@ -3380,7 +3785,25 @@ void decide_link_settings(struct dc_stream_state *stream, if (decide_mst_link_settings(link, link_setting)) return; } else if (link->connector_signal == SIGNAL_TYPE_EDP) { - if (decide_edp_link_settings(link, link_setting, req_bw)) + /* enable edp link optimization for DSC eDP case */ + if (stream->timing.flags.DSC) { + enum dc_link_rate max_link_rate = LINK_RATE_UNKNOWN; + + if (link->ctx->dc->debug.force_dsc_edp_policy) { + /* calculate link max link rate cap*/ + struct dc_link_settings tmp_link_setting; + struct dc_crtc_timing tmp_timing = stream->timing; + uint32_t orig_req_bw; + + tmp_link_setting.link_rate = LINK_RATE_UNKNOWN; + tmp_timing.flags.DSC = 0; + orig_req_bw = dc_bandwidth_in_kbps_from_timing(&tmp_timing); + decide_edp_link_settings(link, &tmp_link_setting, orig_req_bw); + max_link_rate = tmp_link_setting.link_rate; + } + if (decide_edp_link_settings_with_dsc(link, link_setting, req_bw, max_link_rate)) + return; + } else if (decide_edp_link_settings(link, link_setting, req_bw)) return; } else if (decide_dp_link_settings(link, link_setting, req_bw)) return; @@ -3421,7 +3844,6 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) &psr_configuration.raw, sizeof(psr_configuration.raw)); - if (psr_configuration.bits.ENABLE) { unsigned char dpcdbuf[3] = {0}; union psr_error_status psr_error_status; @@ -3453,10 +3875,12 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) sizeof(psr_error_status.raw)); /* PSR error, disable and re-enable PSR */ - allow_active = false; - dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); - allow_active = true; - dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); + if (link->psr_settings.psr_allow_active) { + allow_active = false; + dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); + allow_active = true; + dc_link_set_psr_allow_active(link, &allow_active, true, false, NULL); + } return true; } else if (psr_sink_psr_status.bits.SINK_SELF_REFRESH_STATUS == @@ -3534,6 +3958,13 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link) &dpcd_lane_adjustment[0].raw, sizeof(dpcd_lane_adjustment)); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) + vendor_specific_lttpr_wa_three_dpcd( + link, + link_training_settings.dpcd_lane_settings); + /*get post cursor 2 parameters * For DP 1.1a or eariler, this DPCD register's value is 0 * For DP 1.2 or later: @@ -4153,6 +4584,56 @@ static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) return -1; } +#if defined(CONFIG_DRM_AMD_DC_DCN) +uint32_t dc_link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw) +{ + switch (bw) { + case 0b001: + return 9000000; + case 0b010: + return 18000000; + case 0b011: + return 24000000; + case 0b100: + return 32000000; + case 0b101: + return 40000000; + case 0b110: + return 48000000; + } + + return 0; +} + +/** + * Return PCON's post FRL link training supported BW if its non-zero, otherwise return max_supported_frl_bw. + */ +static uint32_t intersect_frl_link_bw_support( + const uint32_t max_supported_frl_bw_in_kbps, + const union hdmi_encoded_link_bw hdmi_encoded_link_bw) +{ + uint32_t supported_bw_in_kbps = max_supported_frl_bw_in_kbps; + + // HDMI_ENCODED_LINK_BW bits are only valid if HDMI Link Configuration bit is 1 (FRL mode) + if (hdmi_encoded_link_bw.bits.FRL_MODE) { + if (hdmi_encoded_link_bw.bits.BW_48Gbps) + supported_bw_in_kbps = 48000000; + else if (hdmi_encoded_link_bw.bits.BW_40Gbps) + supported_bw_in_kbps = 40000000; + else if (hdmi_encoded_link_bw.bits.BW_32Gbps) + supported_bw_in_kbps = 32000000; + else if (hdmi_encoded_link_bw.bits.BW_24Gbps) + supported_bw_in_kbps = 24000000; + else if (hdmi_encoded_link_bw.bits.BW_18Gbps) + supported_bw_in_kbps = 18000000; + else if (hdmi_encoded_link_bw.bits.BW_9Gbps) + supported_bw_in_kbps = 9000000; + } + + return supported_bw_in_kbps; +} +#endif + static void read_dp_device_vendor_id(struct dc_link *link) { struct dp_device_vendor_id dp_id; @@ -4264,6 +4745,27 @@ static void get_active_converter_info( translate_dpcd_max_bpc( hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (link->dc->caps.hdmi_frl_pcon_support) { + union hdmi_encoded_link_bw hdmi_encoded_link_bw; + + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = + dc_link_bw_kbps_from_raw_frl_link_rate_data( + hdmi_color_caps.bits.MAX_ENCODED_LINK_BW_SUPPORT); + + // Intersect reported max link bw support with the supported link rate post FRL link training + if (core_link_read_dpcd(link, DP_PCON_HDMI_POST_FRL_STATUS, + &hdmi_encoded_link_bw.raw, sizeof(hdmi_encoded_link_bw)) == DC_OK) { + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, + hdmi_encoded_link_bw); + } + + if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) + link->dpcd_caps.dongle_caps.extendedCapValid = true; + } +#endif + if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz != 0) link->dpcd_caps.dongle_caps.extendedCapValid = true; } @@ -4454,7 +4956,7 @@ bool dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data, sizeof(lttpr_dpcd_data)); if (status != DC_OK) { - dm_error("%s: Read LTTPR caps data failed.\n", __func__); + DC_LOG_DP2("%s: Read LTTPR caps data failed.\n", __func__); return false; } @@ -5240,8 +5742,18 @@ bool dc_link_dp_set_test_pattern( if (is_dp_phy_pattern(test_pattern)) { /* Set DPCD Lane Settings before running test pattern */ if (p_link_settings != NULL) { - dp_set_hw_lane_settings(link, p_link_settings, DPRX); - dpcd_set_lane_settings(link, p_link_settings, DPRX); + if (link->dc->debug.apply_vendor_specific_lttpr_wa && + (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { + dpcd_set_lane_settings(link, p_link_settings, DPRX); + vendor_specific_lttpr_wa_five( + link, + p_link_settings->dpcd_lane_settings, + p_link_settings->link_settings.lane_count); + } else { + dp_set_hw_lane_settings(link, p_link_settings, DPRX); + dpcd_set_lane_settings(link, p_link_settings, DPRX); + } } /* Blank stream if running test pattern */ @@ -5665,6 +6177,23 @@ void dp_set_fec_enable(struct dc_link *link, bool enable) } } +struct link_encoder *dp_get_link_enc(struct dc_link *link) +{ + struct link_encoder *link_enc; + + link_enc = link->link_enc; + if (link->is_dig_mapping_flexible && + link->dc->res_pool->funcs->link_encs_assign) { + link_enc = link_enc_cfg_get_link_enc_used_by_link(link->ctx->dc, + link); + if (!link->link_enc) + link_enc = link_enc_cfg_get_next_avail_link_enc( + link->ctx->dc); + } + + return link_enc; +} + void dpcd_set_source_specific_data(struct dc_link *link) { if (!link->dc->vendor_signature.is_valid) { @@ -5885,7 +6414,10 @@ bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timin req_bw = dc_bandwidth_in_kbps_from_timing(crtc_timing); - decide_edp_link_settings(link, &link_setting, req_bw); + if (!crtc_timing->flags.DSC) + decide_edp_link_settings(link, &link_setting, req_bw); + else + decide_edp_link_settings_with_dsc(link, &link_setting, req_bw, LINK_RATE_UNKNOWN); if (link->dpcd_caps.edp_supported_link_rates[link_rate_set] != link_setting.link_rate || lane_count_set.bits.LANE_COUNT_SET != link_setting.lane_count) { @@ -6126,3 +6658,14 @@ bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx) dc_is_dp_signal(pipe_ctx->stream->signal)); } #endif + +void edp_panel_backlight_power_on(struct dc_link *link) +{ + if (link->connector_signal != SIGNAL_TYPE_EDP) + return; + + link->dc->hwss.edp_power_control(link, true); + link->dc->hwss.edp_wait_for_hpd_ready(link, true); + if (link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control(link, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c index b1c9f77d6bf4..d72122593959 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c @@ -94,17 +94,17 @@ static enum link_training_result dpia_configure_link(struct dc_link *link, lt_settings); status = dpcd_configure_channel_coding(link, lt_settings); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; /* Configure lttpr mode */ status = dpcd_configure_lttpr_mode(link, lt_settings); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; /* Set link rate, lane count and spread. */ status = dpcd_set_link_settings(link, lt_settings); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; if (link->preferred_training_settings.fec_enable) @@ -112,7 +112,7 @@ static enum link_training_result dpia_configure_link(struct dc_link *link, else fec_enable = true; status = dp_set_fec_ready(link, fec_enable); - if (status != DC_OK && !link->hpd_status) + if (status != DC_OK && link->is_hpd_pending) return LINK_TRAINING_ABORT; return LINK_TRAINING_SUCCESS; @@ -388,7 +388,7 @@ static enum link_training_result dpia_training_cr_non_transparent(struct dc_link } /* Abort link training if clock recovery failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n" @@ -490,7 +490,7 @@ static enum link_training_result dpia_training_cr_transparent(struct dc_link *li } /* Abort link training if clock recovery failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n" @@ -675,7 +675,7 @@ static enum link_training_result dpia_training_eq_non_transparent(struct dc_link } /* Abort link training if equalization failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n" @@ -758,7 +758,7 @@ static enum link_training_result dpia_training_eq_transparent(struct dc_link *li } /* Abort link training if equalization failed due to HPD unplug. */ - if (!link->hpd_status) + if (link->is_hpd_pending) result = LINK_TRAINING_ABORT; DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n" @@ -892,10 +892,10 @@ static void dpia_training_abort(struct dc_link *link, uint32_t hop) __func__, link->link_id.enum_id - ENUM_ID_1, link->lttpr_mode, - link->hpd_status); + link->is_hpd_pending); /* Abandon clean-up if sink unplugged. */ - if (!link->hpd_status) + if (link->is_hpd_pending) return; if (hop != DPRX) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 25e48a8cbb78..a55944da8d53 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -118,7 +118,10 @@ static void remove_link_enc_assignment( */ if (get_stream_using_link_enc(state, eng_id) == NULL) state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_idx] = eng_id; + stream->link_enc = NULL; + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN; + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL; break; } } @@ -148,6 +151,7 @@ static void add_link_enc_assignment( .ep_type = stream->link->ep_type}, .eng_id = eng_id, .stream = stream}; + dc_stream_retain(stream); state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_idx] = ENGINE_ID_UNKNOWN; stream->link_enc = stream->ctx->dc->res_pool->link_encoders[eng_idx]; break; @@ -227,7 +231,7 @@ static struct link_encoder *get_link_enc_used_by_link( .link_id = link->link_id, .ep_type = link->ep_type}; - for (i = 0; i < state->stream_count; i++) { + for (i = 0; i < MAX_PIPES; i++) { struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i]; if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id)) @@ -237,28 +241,18 @@ static struct link_encoder *get_link_enc_used_by_link( return link_enc; } /* Clear all link encoder assignments. */ -static void clear_enc_assignments(struct dc_state *state) +static void clear_enc_assignments(const struct dc *dc, struct dc_state *state) { int i; - enum engine_id eng_id; - struct dc_stream_state *stream; for (i = 0; i < MAX_PIPES; i++) { state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid = false; - eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id; - stream = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream; - if (eng_id != ENGINE_ID_UNKNOWN) - state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_id - ENGINE_ID_DIGA] = eng_id; - if (stream) - stream->link_enc = NULL; + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN; + if (state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream != NULL) { + dc_stream_release(state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream); + state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL; + } } -} - -void link_enc_cfg_init( - struct dc *dc, - struct dc_state *state) -{ - int i; for (i = 0; i < dc->res_pool->res_cap->num_dig_link_enc; i++) { if (dc->res_pool->link_encoders[i]) @@ -266,8 +260,13 @@ void link_enc_cfg_init( else state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i] = ENGINE_ID_UNKNOWN; } +} - clear_enc_assignments(state); +void link_enc_cfg_init( + const struct dc *dc, + struct dc_state *state) +{ + clear_enc_assignments(dc, state); state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY; } @@ -284,12 +283,9 @@ void link_enc_cfg_link_encs_assign( ASSERT(state->stream_count == stream_count); - if (stream_count == 0) - clear_enc_assignments(state); - /* Release DIG link encoder resources before running assignment algorithm. */ - for (i = 0; i < stream_count; i++) - dc->res_pool->funcs->link_enc_unassign(state, streams[i]); + for (i = 0; i < dc->current_state->stream_count; i++) + dc->res_pool->funcs->link_enc_unassign(state, dc->current_state->streams[i]); for (i = 0; i < MAX_PIPES; i++) ASSERT(state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid == false); @@ -544,6 +540,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) uint8_t dig_stream_count = 0; int matching_stream_ptrs = 0; int eng_ids_per_ep_id[MAX_PIPES] = {0}; + int valid_bitmap = 0; /* (1) No. valid entries same as stream count. */ for (i = 0; i < MAX_PIPES; i++) { @@ -625,5 +622,15 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) is_valid = valid_entries && valid_stream_ptrs && valid_uniqueness && valid_avail && valid_streams; ASSERT(is_valid); + if (is_valid == false) { + valid_bitmap = + (valid_entries & 0x1) | + ((valid_stream_ptrs & 0x1) << 1) | + ((valid_uniqueness & 0x1) << 2) | + ((valid_avail & 0x1) << 3) | + ((valid_streams & 0x1) << 4); + dm_error("Invalid link encoder assignments: 0x%x\n", valid_bitmap); + } + return is_valid; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index c32fdccd4d92..8b6b035bfa9c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -734,6 +734,10 @@ static void calculate_split_count_and_index(struct pipe_ctx *pipe_ctx, int *spli (*split_idx)++; split_pipe = split_pipe->top_pipe; } + + /* MPO window on right side of ODM split */ + if (split_pipe && split_pipe->prev_odm_pipe && !pipe_ctx->prev_odm_pipe) + (*split_idx)++; } else { /*Get odm split index*/ struct pipe_ctx *split_pipe = pipe_ctx->prev_odm_pipe; @@ -780,7 +784,11 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx) /* * Only the leftmost ODM pipe should be offset by a nonzero distance */ - if (!pipe_ctx->prev_odm_pipe || split_idx == split_count) { + if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->prev_odm_pipe && !pipe_ctx->prev_odm_pipe) { + /* MPO window on right side of ODM split */ + data->recout.x = stream->dst.x + (surf_clip.x - stream->dst.width/2) * + stream->dst.width / stream->src.width; + } else if (!pipe_ctx->prev_odm_pipe || split_idx == split_count) { data->recout.x = stream->dst.x; if (stream->src.x < surf_clip.x) data->recout.x += (surf_clip.x - stream->src.x) * stream->dst.width @@ -978,6 +986,8 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx) * stream->dst.height / stream->src.height; if (pipe_ctx->prev_odm_pipe && split_idx) ro_lb = data->h_active * split_idx - recout_full_x; + else if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->prev_odm_pipe) + ro_lb = data->h_active * split_idx - recout_full_x + data->recout.x; else ro_lb = data->recout.x - recout_full_x; ro_tb = data->recout.y - recout_full_y; @@ -1076,6 +1086,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) timing->v_border_top + timing->v_border_bottom; if (pipe_ctx->next_odm_pipe || pipe_ctx->prev_odm_pipe) pipe_ctx->plane_res.scl_data.h_active /= get_num_odm_splits(pipe_ctx) + 1; + /* ODM + windows MPO, where window is on either right or left ODM half */ + else if (pipe_ctx->top_pipe && (pipe_ctx->top_pipe->next_odm_pipe || pipe_ctx->top_pipe->prev_odm_pipe)) + pipe_ctx->plane_res.scl_data.h_active /= get_num_odm_splits(pipe_ctx->top_pipe) + 1; /* depends on h_active */ calculate_recout(pipe_ctx); @@ -1084,11 +1097,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) /* depends on scaling ratios and recout, does not calculate offset yet */ calculate_viewport_size(pipe_ctx); - /* Stopgap for validation of ODM + MPO on one side of screen case */ - if (pipe_ctx->plane_res.scl_data.viewport.height < 1 || - pipe_ctx->plane_res.scl_data.viewport.width < 1) - return false; - /* * LB calculations depend on vp size, h/v_active and scaling ratios * Setting line buffer pixel depth to 24bpp yields banding @@ -1437,23 +1445,54 @@ bool dc_add_plane_to_context( if (head_pipe != free_pipe) { tail_pipe = resource_get_tail_pipe(&context->res_ctx, head_pipe); ASSERT(tail_pipe); - free_pipe->stream_res.tg = tail_pipe->stream_res.tg; - free_pipe->stream_res.abm = tail_pipe->stream_res.abm; - free_pipe->stream_res.opp = tail_pipe->stream_res.opp; - free_pipe->stream_res.stream_enc = tail_pipe->stream_res.stream_enc; - free_pipe->stream_res.audio = tail_pipe->stream_res.audio; - free_pipe->clock_source = tail_pipe->clock_source; - free_pipe->top_pipe = tail_pipe; - tail_pipe->bottom_pipe = free_pipe; - if (!free_pipe->next_odm_pipe && tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { - free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; - tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; - } - if (!free_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { - free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; - tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + + /* ODM + window MPO, where MPO window is on right half only */ + if (free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.width/2) && + tail_pipe->next_odm_pipe) { + free_pipe->stream_res.tg = tail_pipe->next_odm_pipe->stream_res.tg; + free_pipe->stream_res.abm = tail_pipe->next_odm_pipe->stream_res.abm; + free_pipe->stream_res.opp = tail_pipe->next_odm_pipe->stream_res.opp; + free_pipe->stream_res.stream_enc = tail_pipe->next_odm_pipe->stream_res.stream_enc; + free_pipe->stream_res.audio = tail_pipe->next_odm_pipe->stream_res.audio; + free_pipe->clock_source = tail_pipe->next_odm_pipe->clock_source; + + free_pipe->top_pipe = tail_pipe->next_odm_pipe; + tail_pipe->next_odm_pipe->bottom_pipe = free_pipe; + } else { + free_pipe->stream_res.tg = tail_pipe->stream_res.tg; + free_pipe->stream_res.abm = tail_pipe->stream_res.abm; + free_pipe->stream_res.opp = tail_pipe->stream_res.opp; + free_pipe->stream_res.stream_enc = tail_pipe->stream_res.stream_enc; + free_pipe->stream_res.audio = tail_pipe->stream_res.audio; + free_pipe->clock_source = tail_pipe->clock_source; + + free_pipe->top_pipe = tail_pipe; + tail_pipe->bottom_pipe = free_pipe; + + if (!free_pipe->next_odm_pipe && tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; + tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; + } + if (!free_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { + free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; + tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + } } } + + /* ODM + window MPO, where MPO window is on left half only */ + if (free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + break; + } + /* ODM + window MPO, where MPO window is on right half only */ + if (free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.width/2)) { + break; + } + head_pipe = head_pipe->next_odm_pipe; } /* assign new surfaces*/ @@ -1664,6 +1703,10 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; + // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks + if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + return false; + return true; } @@ -2078,7 +2121,6 @@ static void mark_seamless_boot_stream( { struct dc_bios *dcb = dc->ctx->dc_bios; - /* TODO: Check Linux */ if (dc->config.allow_seamless_boot_optimization && !dcb->funcs->is_accelerated_mode(dcb)) { if (dc_validate_seamless_boot_timing(dc, stream->sink, &stream->timing)) @@ -2224,6 +2266,9 @@ void dc_resource_state_construct( struct dc_state *dst_ctx) { dst_ctx->clk_mgr = dc->clk_mgr; + + /* Initialise DIG link encoder resource tracking variables. */ + link_enc_cfg_init(dc, dst_ctx); } @@ -2252,16 +2297,6 @@ enum dc_status dc_validate_global_state( if (!new_ctx) return DC_ERROR_UNEXPECTED; -#if defined(CONFIG_DRM_AMD_DC_DCN) - - /* - * Update link encoder to stream assignment. - * TODO: Split out reason allocation from validation. - */ - if (dc->res_pool->funcs->link_encs_assign && fast_validate == false) - dc->res_pool->funcs->link_encs_assign( - dc, new_ctx, new_ctx->streams, new_ctx->stream_count); -#endif if (dc->res_pool->funcs->validate_global) { result = dc->res_pool->funcs->validate_global(dc, new_ctx); @@ -2313,6 +2348,16 @@ enum dc_status dc_validate_global_state( if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate)) result = DC_FAIL_BANDWIDTH_VALIDATE; +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* + * Only update link encoder to stream assignment after bandwidth validation passed. + * TODO: Split out assignment and validation. + */ + if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false) + dc->res_pool->funcs->link_encs_assign( + dc, new_ctx, new_ctx->streams, new_ctx->stream_count); +#endif + return result; } @@ -2506,17 +2551,7 @@ static void set_avi_info_frame( /* TODO : We should handle YCC quantization */ /* but we do not have matrix calculation */ - if (stream->qy_bit == 1) { - if (color_space == COLOR_SPACE_SRGB || - color_space == COLOR_SPACE_2020_RGB_FULLRANGE) - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - else if (color_space == COLOR_SPACE_SRGB_LIMITED || - color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - else - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; - } else - hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; ///VIC format = stream->timing.timing_3d_format; @@ -3126,3 +3161,57 @@ struct hpo_dp_link_encoder *resource_get_unused_hpo_dp_link_encoder( return enc; } #endif + +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context) +{ + int i, j; + struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd; + + /* If pipe backend is reset, need to reset pipe syncd status */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i]; + pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || + pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { + + /* Reset all the syncd pipes from the disabled pipe */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j); + } + } + } +} + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx) +{ + int i; + struct pipe_ctx *pipe_ctx, *pipe_ctx_check; + + pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx); + + /* for the pipe disabled, check if any slave pipe exists and assert */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_check = &context->res_ctx.pipe_ctx[i]; + + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) && + IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx)) + DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n", + i, disabled_master_pipe_idx); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c index a249a0e5edd0..4b5e4d8e7735 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c @@ -33,14 +33,6 @@ * Private functions ******************************************************************************/ -static void dc_sink_destruct(struct dc_sink *sink) -{ - if (sink->dc_container_id) { - kfree(sink->dc_container_id); - sink->dc_container_id = NULL; - } -} - static bool dc_sink_construct(struct dc_sink *sink, const struct dc_sink_init_data *init_params) { @@ -75,7 +67,7 @@ void dc_sink_retain(struct dc_sink *sink) static void dc_sink_free(struct kref *kref) { struct dc_sink *sink = container_of(kref, struct dc_sink, refcount); - dc_sink_destruct(sink); + kfree(sink->dc_container_id); kfree(sink); } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3aac3f4a2852..18e59d635ca2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.160" +#define DC_VER "3.2.166" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -75,6 +75,16 @@ enum dc_plane_type { DC_PLANE_TYPE_DCN_UNIVERSAL, }; +// Sizes defined as multiples of 64KB +enum det_size { + DET_SIZE_DEFAULT = 0, + DET_SIZE_192KB = 3, + DET_SIZE_256KB = 4, + DET_SIZE_320KB = 5, + DET_SIZE_384KB = 6 +}; + + struct dc_plane_cap { enum dc_plane_type type; uint32_t blends_with_above : 1; @@ -187,7 +197,9 @@ struct dc_caps { struct dc_color_caps color; #if defined(CONFIG_DRM_AMD_DC_DCN) bool dp_hpo; + bool hdmi_frl_pcon_support; #endif + bool edp_dsc_support; bool vbios_lttpr_aware; bool vbios_lttpr_enable; }; @@ -332,6 +344,7 @@ struct dc_config { uint8_t vblank_alignment_max_frame_time_diff; bool is_asymmetric_memory; bool is_single_rank_dimm; + bool use_pipe_ctx_sync_logic; }; enum visual_confirm { @@ -508,7 +521,9 @@ union dpia_debug_options { uint32_t disable_dpia:1; uint32_t force_non_lttpr:1; uint32_t extend_aux_rd_interval:1; - uint32_t reserved:29; + uint32_t disable_mst_dsc_work_around:1; + uint32_t hpd_delay_in_ms:12; + uint32_t reserved:16; } bits; uint32_t raw; }; @@ -573,6 +588,8 @@ struct dc_debug_options { bool native422_support; bool disable_dsc; enum visual_confirm visual_confirm; + int visual_confirm_rect_height; + bool sanity_checks; bool max_disp_clk; bool surface_trace; @@ -667,11 +684,14 @@ struct dc_debug_options { bool validate_dml_output; bool enable_dmcub_surface_flip; bool usbc_combo_phy_reset_wa; + bool disable_dsc_edp; + unsigned int force_dsc_edp_policy; bool enable_dram_clock_change_one_display_vactive; #if defined(CONFIG_DRM_AMD_DC_DCN) /* TODO - remove once tested */ bool legacy_dp2_lt; bool set_mst_en_for_sst; + bool force_dp2_lt_fallback_method; #endif union mem_low_power_enable_options enable_mem_low_power; union root_clock_optimization_options root_clock_optimization; @@ -684,11 +704,14 @@ struct dc_debug_options { /* FEC/PSR1 sequence enable delay in 100us */ uint8_t fec_enable_delay_in100us; bool enable_driver_sequence_debug; + enum det_size crb_alloc_policy; + int crb_alloc_policy_min_disp_count; #if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_z10; bool enable_sw_cntl_psr; union dpia_debug_options dpia_debug; #endif + bool apply_vendor_specific_lttpr_wa; }; struct gpu_info_soc_bounding_box_v1_0; @@ -1289,6 +1312,11 @@ struct dc_sink_dsc_caps { // 'true' if these are virtual DPCD's DSC caps (immediately upstream of sink in MST topology), // 'false' if they are sink's DSC caps bool is_virtual_dpcd_dsc; +#if defined(CONFIG_DRM_AMD_DC_DCN) + // 'true' if MST topology supports DSC passthrough for sink + // 'false' if MST topology does not support DSC passthrough + bool is_dsc_passthrough_supported; +#endif struct dsc_dec_dpcd_caps dsc_dec_caps; }; @@ -1404,6 +1432,9 @@ void dc_unlock_memory_clock_frequency(struct dc *dc); */ void dc_lock_memory_clock_frequency(struct dc *dc); +/* set soft max for memclk, to be used for AC/DC switching clock limitations */ +void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable); + /* cleanup on driver unload */ void dc_hardware_release(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 360f3199ea6f..541376fabbef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -115,13 +115,44 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) } } +void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv) +{ + struct dmub_srv *dmub = dmub_srv->dmub; + struct dc_context *dc_ctx = dmub_srv->ctx; + enum dmub_status status = DMUB_STATUS_OK; + + status = dmub_srv_clear_inbox0_ack(dmub); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error clearing INBOX0 ack: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dmub_srv); + } +} + +void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv) +{ + struct dmub_srv *dmub = dmub_srv->dmub; + struct dc_context *dc_ctx = dmub_srv->ctx; + enum dmub_status status = DMUB_STATUS_OK; + + status = dmub_srv_wait_for_inbox0_ack(dmub, 100000); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error waiting for INBOX0 HW Lock Ack\n"); + dc_dmub_srv_log_diagnostic_data(dmub_srv); + } +} + void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data) { struct dmub_srv *dmub = dmub_srv->dmub; - if (dmub->hw_funcs.send_inbox0_cmd) - dmub->hw_funcs.send_inbox0_cmd(dmub, data); - // TODO: Add wait command -- poll register for ACK + struct dc_context *dc_ctx = dmub_srv->ctx; + enum dmub_status status = DMUB_STATUS_OK; + + status = dmub_srv_send_inbox0_cmd(dmub, data); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error sending INBOX0 cmd\n"); + dc_dmub_srv_log_diagnostic_data(dmub_srv); + } } bool dc_dmub_srv_cmd_with_reply_data(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 3e35eee7188c..7e4e2ec5915d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -68,6 +68,8 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu void dc_dmub_trace_event_control(struct dc *dc, bool enable); +void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv); +void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv); void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data); bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *dmub_oca); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index e68e9a86a4d9..353dac420f34 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -378,7 +378,14 @@ enum dpcd_downstream_port_detailed_type { union dwnstream_port_caps_byte2 { struct { uint8_t MAX_BITS_PER_COLOR_COMPONENT:2; +#if defined(CONFIG_DRM_AMD_DC_DCN) + uint8_t MAX_ENCODED_LINK_BW_SUPPORT:3; + uint8_t SOURCE_CONTROL_MODE_SUPPORT:1; + uint8_t CONCURRENT_LINK_BRING_UP_SEQ_SUPPORT:1; + uint8_t RESERVED:1; +#else uint8_t RESERVED:6; +#endif } bits; uint8_t raw; }; @@ -416,6 +423,30 @@ union dwnstream_port_caps_byte3_hdmi { uint8_t raw; }; +#if defined(CONFIG_DRM_AMD_DC_DCN) +union hdmi_sink_encoded_link_bw_support { + struct { + uint8_t HDMI_SINK_ENCODED_LINK_BW_SUPPORT:3; + uint8_t RESERVED:5; + } bits; + uint8_t raw; +}; + +union hdmi_encoded_link_bw { + struct { + uint8_t FRL_MODE:1; // Bit 0 + uint8_t BW_9Gbps:1; + uint8_t BW_18Gbps:1; + uint8_t BW_24Gbps:1; + uint8_t BW_32Gbps:1; + uint8_t BW_40Gbps:1; + uint8_t BW_48Gbps:1; + uint8_t RESERVED:1; // Bit 7 + } bits; + uint8_t raw; +}; +#endif + /*4-byte structure for detailed capabilities of a down-stream port (DP-to-TMDS converter).*/ union dwnstream_portxcaps { @@ -852,6 +883,15 @@ struct psr_caps { unsigned char psr_version; unsigned int psr_rfb_setup_time; bool psr_exit_link_training_required; + unsigned char edp_revision; + unsigned char support_ver; + bool su_granularity_required; + bool y_coordinate_required; + uint8_t su_y_granularity; + bool alpm_cap; + bool standby_support; + uint8_t rate_control_caps; + unsigned int psr_power_opt_flag; }; /* Length of router topology ID read from DPCD in bytes. */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 52355fe6994c..eac34f591a3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -741,6 +741,9 @@ struct dc_dsc_config { uint32_t version_minor; /* DSC minor version. Full version is formed as 1.version_minor. */ bool ycbcr422_simple; /* Tell DSC engine to convert YCbCr 4:2:2 to 'YCbCr 4:2:2 simple'. */ int32_t rc_buffer_size; /* DSC RC buffer block size in bytes */ +#if defined(CONFIG_DRM_AMD_DC_DCN) + bool is_frl; /* indicate if DSC is applied based on HDMI FRL sink's capability */ +#endif bool is_dp; /* indicate if DSC is applied based on DP's capability */ }; struct dc_crtc_timing { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 180ecd860296..4ebba641538b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -113,6 +113,7 @@ struct dc_link { * DIG encoder. */ bool is_dig_mapping_flexible; bool hpd_status; /* HPD status of link without physical HPD pin. */ + bool is_hpd_pending; /* Indicates a new received hpd */ bool edp_sink_present; @@ -185,12 +186,18 @@ struct dc_link { /* Drive settings read from integrated info table */ struct dc_lane_settings bios_forced_drive_settings; + /* Vendor specific LTTPR workaround variables */ + uint8_t vendor_specific_lttpr_link_rate_wa; + bool apply_vendor_specific_lttpr_link_rate_wa; + /* MST record stream using this link */ struct link_flags { bool dp_keep_receiver_powered; bool dp_skip_DID2; bool dp_skip_reset_segment; bool dp_mot_reset_segment; + /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ + bool dpia_mst_dsc_always_on; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; @@ -224,6 +231,8 @@ static inline void get_edp_links(const struct dc *dc, *edp_num = 0; for (i = 0; i < dc->link_count; i++) { // report any eDP links, even unconnected DDI's + if (!dc->links[i]) + continue; if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) { edp_links[*edp_num] = dc->links[i]; if (++(*edp_num) == MAX_NUM_EDP) @@ -287,6 +296,10 @@ bool dc_link_setup_psr(struct dc_link *dc_link, void dc_link_get_psr_residency(const struct dc_link *link, uint32_t *residency); +void dc_link_blank_all_dp_displays(struct dc *dc); + +void dc_link_blank_dp_stream(struct dc_link *link, bool hw_init); + /* Request DC to detect if there is a Panel connected. * boot - If this call is during initial boot. * Return false for any type of detection failure or MST detection @@ -298,7 +311,7 @@ enum dc_detect_reason { DETECT_REASON_HPD, DETECT_REASON_HPDRX, DETECT_REASON_FALLBACK, - DETECT_REASON_RETRAIN + DETECT_REASON_RETRAIN, }; bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); @@ -438,6 +451,7 @@ bool dc_link_is_fec_supported(const struct dc_link *link); bool dc_link_should_enable_fec(const struct dc_link *link); #if defined(CONFIG_DRM_AMD_DC_DCN) +uint32_t dc_link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw); enum dp_link_encoding dc_link_dp_mst_decide_link_encoding_format(const struct dc_link *link); #endif #endif /* DC_LINK_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 388457ffc0a8..0285a4b38d05 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -430,6 +430,7 @@ struct dc_dongle_caps { uint32_t dp_hdmi_max_bpc; uint32_t dp_hdmi_max_pixel_clk_in_khz; #if defined(CONFIG_DRM_AMD_DC_DCN) + uint32_t dp_hdmi_frl_max_link_bw_in_kbps; struct dc_dongle_dfp_cap_ext dfp_cap_ext; #endif }; @@ -950,6 +951,7 @@ enum dc_gpu_mem_alloc_type { enum dc_psr_version { DC_PSR_VERSION_1 = 0, + DC_PSR_VERSION_SU_1 = 1, DC_PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 27218ede150a..70eaac017624 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -67,9 +67,6 @@ static void write_indirect_azalia_reg(struct audio *audio, /* AZALIA_F0_CODEC_ENDPOINT_DATA endpoint data */ REG_SET(AZALIA_F0_CODEC_ENDPOINT_DATA, 0, AZALIA_ENDPOINT_REG_DATA, reg_data); - - DC_LOG_HW_AUDIO("AUDIO:write_indirect_azalia_reg: index: %u data: %u\n", - reg_index, reg_data); } static uint32_t read_indirect_azalia_reg(struct audio *audio, uint32_t reg_index) @@ -85,9 +82,6 @@ static uint32_t read_indirect_azalia_reg(struct audio *audio, uint32_t reg_index /* AZALIA_F0_CODEC_ENDPOINT_DATA endpoint data */ value = REG_READ(AZALIA_F0_CODEC_ENDPOINT_DATA); - DC_LOG_HW_AUDIO("AUDIO:read_indirect_azalia_reg: index: %u data: %u\n", - reg_index, value); - return value; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h index 5622d5e32d81..dbd2cfed0603 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h @@ -113,6 +113,7 @@ struct dce_audio_shift { uint8_t DCCG_AUDIO_DTO2_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO0_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO1_USE_512FBR_DTO; + uint32_t CLOCK_GATING_DISABLE; }; struct dce_audio_mask { @@ -132,6 +133,7 @@ struct dce_audio_mask { uint32_t DCCG_AUDIO_DTO2_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO0_USE_512FBR_DTO; uint32_t DCCG_AUDIO_DTO1_USE_512FBR_DTO; + uint32_t CLOCK_GATING_DISABLE; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 1e77ffee71b3..f1c61d5aee6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -788,8 +788,9 @@ static bool dce110_link_encoder_validate_hdmi_output( crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) return false; - if (!enc110->base.features.flags.bits.HDMI_6GB_EN && - adjusted_pix_clk_khz >= 300000) + if ((!enc110->base.features.flags.bits.HDMI_6GB_EN || + enc110->base.ctx->dc->debug.hdmi20_disable) && + adjusted_pix_clk_khz >= 300000) return false; if (enc110->base.ctx->dc->debug.hdmi20_disable && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 9baf8ca0a920..b1b2e3c6f379 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -56,8 +56,11 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_cmd_lock_hw hw_lock_cmd) { union dmub_inbox0_data_register data = { 0 }; + data.inbox0_cmd_lock_hw = hw_lock_cmd; + dc_dmub_srv_clear_inbox0_ack(dmub_srv); dc_dmub_srv_send_inbox0_cmd(dmub_srv, data); + dc_dmub_srv_wait_for_inbox0_ack(dmub_srv); } bool should_use_dmub_lock(struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 90eb8eedacf2..87ed48d5530d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -230,7 +230,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_ /** * Set PSR power optimization flags. */ -static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt) +static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; @@ -239,7 +239,9 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt cmd.psr_set_power_opt.header.type = DMUB_CMD__PSR; cmd.psr_set_power_opt.header.sub_type = DMUB_CMD__SET_PSR_POWER_OPT; cmd.psr_set_power_opt.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_power_opt_data); + cmd.psr_set_power_opt.psr_set_power_opt_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt; + cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); @@ -327,6 +329,16 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->fec_enable_delay_in100us = link->dc->debug.fec_enable_delay_in100us; copy_settings_data->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; copy_settings_data->panel_inst = panel_inst; + copy_settings_data->dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); + + if (link->fec_state == dc_link_fec_enabled && + (!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1, + sizeof(link->dpcd_caps.sink_dev_id_str)) || + !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_2, + sizeof(link->dpcd_caps.sink_dev_id_str)))) + copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 1; + else + copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 0; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h index 5dbd479660f1..01acc01cc191 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h @@ -46,7 +46,7 @@ struct dmub_psr_funcs { void (*psr_force_static)(struct dmub_psr *dmub, uint8_t panel_inst); void (*psr_get_residency)(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst); - void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt); + void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst); }; struct dmub_psr *dmub_psr_create(struct dc_context *ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 24e47df526f6..f1593186e964 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -69,6 +69,8 @@ #include "dcn10/dcn10_hw_sequencer.h" +#include "dce110_hw_sequencer.h" + #define GAMMA_HW_POINTS_NUM 256 /* @@ -1564,6 +1566,10 @@ static enum dc_status apply_single_controller_ctx_to_hw( &pipe_ctx->stream->audio_info); } + /* make sure no pipes syncd to the pipe being enabled */ + if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic) + check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx); + #if defined(CONFIG_DRM_AMD_DC_DCN) /* DCN3.1 FPGA Workaround * Need to enable HPO DP Stream Encoder before setting OTG master enable. @@ -1602,6 +1608,11 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); + if (dc_is_dp_signal(pipe_ctx->stream->signal) && + pipe_ctx->stream_res.stream_enc->funcs->reset_fifo) + pipe_ctx->stream_res.stream_enc->funcs->reset_fifo( + pipe_ctx->stream_res.stream_enc); + if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); @@ -1655,30 +1666,12 @@ static enum dc_status apply_single_controller_ctx_to_hw( static void power_down_encoders(struct dc *dc) { - int i, j; + int i; for (i = 0; i < dc->link_count; i++) { enum signal_type signal = dc->links[i]->connector_signal; - if ((signal == SIGNAL_TYPE_EDP) || - (signal == SIGNAL_TYPE_DISPLAY_PORT)) { - if (dc->links[i]->link_enc->funcs->get_dig_frontend && - dc->links[i]->link_enc->funcs->is_dig_enabled(dc->links[i]->link_enc)) { - unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend( - dc->links[i]->link_enc); - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - - if (!dc->links[i]->wa_flags.dp_keep_receiver_powered) - dp_receiver_power_ctrl(dc->links[i], false); - } + dc_link_blank_dp_stream(dc->links[i], false); if (signal != SIGNAL_TYPE_EDP) signal = SIGNAL_TYPE_NONE; @@ -1805,7 +1798,6 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) struct dc_stream_state *edp_streams[MAX_NUM_EDP]; struct dc_link *edp_link_with_sink = NULL; struct dc_link *edp_link = NULL; - struct dc_stream_state *edp_stream = NULL; struct dce_hwseq *hws = dc->hwseq; int edp_with_sink_num; int edp_num; @@ -1826,27 +1818,29 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) get_edp_streams(context, edp_streams, &edp_stream_num); // Check fastboot support, disable on DCE8 because of blank screens - if (edp_num && dc->ctx->dce_version != DCE_VERSION_8_0 && + if (edp_num && edp_stream_num && dc->ctx->dce_version != DCE_VERSION_8_0 && dc->ctx->dce_version != DCE_VERSION_8_1 && dc->ctx->dce_version != DCE_VERSION_8_3) { for (i = 0; i < edp_num; i++) { edp_link = edp_links[i]; + if (edp_link != edp_streams[0]->link) + continue; // enable fastboot if backend is enabled on eDP - if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) { - /* Set optimization flag on eDP stream*/ - if (edp_stream_num && edp_link->link_status.link_active) { - edp_stream = edp_streams[0]; - can_apply_edp_fast_boot = !is_edp_ilr_optimization_required(edp_stream->link, &edp_stream->timing); - edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; - if (can_apply_edp_fast_boot) - DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n"); - - break; - } + if (edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + edp_link->link_status.link_active) { + struct dc_stream_state *edp_stream = edp_streams[0]; + + can_apply_edp_fast_boot = !is_edp_ilr_optimization_required(edp_stream->link, &edp_stream->timing); + edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; + if (can_apply_edp_fast_boot) + DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n"); + + break; } } // We are trying to enable eDP, don't power down VDD - if (edp_stream_num) + if (can_apply_edp_fast_boot) keep_edp_vdd_on = true; } @@ -2307,6 +2301,10 @@ enum dc_status dce110_apply_ctx_to_hw( enum dc_status status; int i; + /* reset syncd pipes from disabled pipes */ + if (dc->config.use_pipe_ctx_sync_logic) + reset_syncd_pipes_from_disabled_pipes(dc, context); + /* Reset old context */ /* look up the targets that have been removed since last commit */ hws->funcs.reset_hw_ctx_wrap(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 91fdfcd8a14e..db7ca4b0cdb9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -119,14 +119,6 @@ void dpp_read_state(struct dpp *dpp_base, } } -/* Program gamut remap in bypass mode */ -void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp) -{ - REG_SET(CM_GAMUT_REMAP_CONTROL, 0, - CM_GAMUT_REMAP_MODE, 0); - /* Gamut remap in bypass */ -} - #define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) bool dpp1_get_optimal_number_of_taps( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 44293d66b46b..f607a0e28f14 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -39,6 +39,10 @@ #define BLACK_OFFSET_RGB_Y 0x0 #define BLACK_OFFSET_CBCR 0x8000 +#define VISUAL_CONFIRM_RECT_HEIGHT_DEFAULT 3 +#define VISUAL_CONFIRM_RECT_HEIGHT_MIN 1 +#define VISUAL_CONFIRM_RECT_HEIGHT_MAX 10 + #define REG(reg)\ dpp->tf_regs->reg @@ -85,51 +89,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -static void dpp1_dscl_set_overscan( - struct dcn10_dpp *dpp, - const struct scaler_data *data) -{ - uint32_t left = data->recout.x; - uint32_t top = data->recout.y; - - int right = data->h_active - data->recout.x - data->recout.width; - int bottom = data->v_active - data->recout.y - data->recout.height; - - if (right < 0) { - BREAK_TO_DEBUGGER(); - right = 0; - } - if (bottom < 0) { - BREAK_TO_DEBUGGER(); - bottom = 0; - } - - REG_SET_2(DSCL_EXT_OVERSCAN_LEFT_RIGHT, 0, - EXT_OVERSCAN_LEFT, left, - EXT_OVERSCAN_RIGHT, right); - - REG_SET_2(DSCL_EXT_OVERSCAN_TOP_BOTTOM, 0, - EXT_OVERSCAN_BOTTOM, bottom, - EXT_OVERSCAN_TOP, top); -} - -static void dpp1_dscl_set_otg_blank( - struct dcn10_dpp *dpp, const struct scaler_data *data) -{ - uint32_t h_blank_start = data->h_active; - uint32_t h_blank_end = 0; - uint32_t v_blank_start = data->v_active; - uint32_t v_blank_end = 0; - - REG_SET_2(OTG_H_BLANK, 0, - OTG_H_BLANK_START, h_blank_start, - OTG_H_BLANK_END, h_blank_end); - - REG_SET_2(OTG_V_BLANK, 0, - OTG_V_BLANK_START, v_blank_start, - OTG_V_BLANK_END, v_blank_end); -} - static int dpp1_dscl_get_pixel_depth_val(enum lb_pixel_depth depth) { if (depth == LB_PIXEL_DEPTH_30BPP) @@ -551,58 +510,6 @@ static enum lb_memory_config dpp1_dscl_find_lb_memory_config(struct dcn10_dpp *d return LB_MEMORY_CONFIG_0; } -void dpp1_dscl_set_scaler_auto_scale( - struct dpp *dpp_base, - const struct scaler_data *scl_data) -{ - enum lb_memory_config lb_config; - struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode( - dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); - bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN - && scl_data->format <= PIXEL_FORMAT_VIDEO_END; - - dpp1_dscl_set_overscan(dpp, scl_data); - - dpp1_dscl_set_otg_blank(dpp, scl_data); - - REG_UPDATE(SCL_MODE, DSCL_MODE, dscl_mode); - - if (dscl_mode == DSCL_MODE_DSCL_BYPASS) - return; - - lb_config = dpp1_dscl_find_lb_memory_config(dpp, scl_data); - dpp1_dscl_set_lb(dpp, &scl_data->lb_params, lb_config); - - if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) - return; - - /* TODO: v_min */ - REG_SET_3(DSCL_AUTOCAL, 0, - AUTOCAL_MODE, AUTOCAL_MODE_AUTOSCALE, - AUTOCAL_NUM_PIPE, 0, - AUTOCAL_PIPE_ID, 0); - - /* Black offsets */ - if (ycbcr) - REG_SET_2(SCL_BLACK_OFFSET, 0, - SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, - SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_CBCR); - else - - REG_SET_2(SCL_BLACK_OFFSET, 0, - SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, - SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_RGB_Y); - - REG_SET_4(SCL_TAP_CONTROL, 0, - SCL_V_NUM_TAPS, scl_data->taps.v_taps - 1, - SCL_H_NUM_TAPS, scl_data->taps.h_taps - 1, - SCL_V_NUM_TAPS_C, scl_data->taps.v_taps_c - 1, - SCL_H_NUM_TAPS_C, scl_data->taps.h_taps_c - 1); - - dpp1_dscl_set_scl_filter(dpp, scl_data, ycbcr); -} - static void dpp1_dscl_set_manual_ratio_init( struct dcn10_dpp *dpp, const struct scaler_data *data) @@ -685,9 +592,17 @@ static void dpp1_dscl_set_recout(struct dcn10_dpp *dpp, const struct rect *recout) { int visual_confirm_on = 0; + unsigned short visual_confirm_rect_height = VISUAL_CONFIRM_RECT_HEIGHT_DEFAULT; + if (dpp->base.ctx->dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) visual_confirm_on = 1; + /* Check bounds to ensure the VC bar height was set to a sane value */ + if ((dpp->base.ctx->dc->debug.visual_confirm_rect_height >= VISUAL_CONFIRM_RECT_HEIGHT_MIN) && + (dpp->base.ctx->dc->debug.visual_confirm_rect_height <= VISUAL_CONFIRM_RECT_HEIGHT_MAX)) { + visual_confirm_rect_height = dpp->base.ctx->dc->debug.visual_confirm_rect_height; + } + REG_SET_2(RECOUT_START, 0, /* First pixel of RECOUT in the active OTG area */ RECOUT_START_X, recout->x, @@ -699,7 +614,7 @@ static void dpp1_dscl_set_recout(struct dcn10_dpp *dpp, RECOUT_WIDTH, recout->width, /* Number of RECOUT vertical lines */ RECOUT_HEIGHT, recout->height - - visual_confirm_on * 2 * (dpp->base.inst + 1)); + - visual_confirm_on * 2 * (dpp->base.inst + visual_confirm_rect_height)); } /** diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 0b788d794fb3..2d470f524367 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -77,9 +77,9 @@ #define PGFSM_POWER_ON 0 #define PGFSM_POWER_OFF 2 -void print_microsec(struct dc_context *dc_ctx, - struct dc_log_buffer_ctx *log_ctx, - uint32_t ref_cycle) +static void print_microsec(struct dc_context *dc_ctx, + struct dc_log_buffer_ctx *log_ctx, + uint32_t ref_cycle) { const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; static const unsigned int frac = 1000; @@ -132,7 +132,8 @@ static void log_mpc_crc(struct dc *dc, REG_READ(DPP_TOP0_DPP_CRC_VAL_B_A), REG_READ(DPP_TOP0_DPP_CRC_VAL_R_G)); } -void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx) +static void dcn10_log_hubbub_state(struct dc *dc, + struct dc_log_buffer_ctx *log_ctx) { struct dc_context *dc_ctx = dc->ctx; struct dcn_hubbub_wm wm; @@ -1362,11 +1363,48 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) tg->funcs->tg_init(tg); } + + /* Power gate DSCs */ + if (hws->funcs.dsc_pg_control != NULL) { + uint32_t num_opps = 0; + uint32_t opp_id_src0 = OPP_ID_INVALID; + uint32_t opp_id_src1 = OPP_ID_INVALID; + + // Step 1: To find out which OPTC is running & OPTC DSC is ON + for (i = 0; i < dc->res_pool->res_cap->num_timing_generator; i++) { + uint32_t optc_dsc_state = 0; + struct timing_generator *tg = dc->res_pool->timing_generators[i]; + + if (tg->funcs->is_tg_enabled(tg)) { + if (tg->funcs->get_dsc_status) + tg->funcs->get_dsc_status(tg, &optc_dsc_state); + // Only one OPTC with DSC is ON, so if we got one result, we would exit this block. + // non-zero value is DSC enabled + if (optc_dsc_state != 0) { + tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1); + break; + } + } + } + + // Step 2: To power down DSC but skip DSC of running OPTC + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { + struct dcn_dsc_state s = {0}; + + dc->res_pool->dscs[i]->funcs->dsc_read_state(dc->res_pool->dscs[i], &s); + + if ((s.dsc_opp_source == opp_id_src0 || s.dsc_opp_source == opp_id_src1) && + s.dsc_clock_en && s.dsc_fw_en) + continue; + + hws->funcs.dsc_pg_control(hws, dc->res_pool->dscs[i]->inst, false); + } + } } void dcn10_init_hw(struct dc *dc) { - int i, j; + int i; struct abm *abm = dc->res_pool->abm; struct dmcu *dmcu = dc->res_pool->dmcu; struct dce_hwseq *hws = dc->hwseq; @@ -1468,43 +1506,8 @@ void dcn10_init_hw(struct dc *dc) dmub_enable_outbox_notification(dc); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) { - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) - continue; - - /* DP 2.0 requires that LTTPR Caps be read first */ - dp_retrieve_lttpr_cap(dc->links[i]); - - /* - * If any of the displays are lit up turn them off. - * The reason is that some MST hubs cannot be turned off - * completely until we tell them to do so. - * If not turned off, then displays connected to MST hub - * won't light up. - */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { - /* blank dp stream before power off receiver*/ - if (dc->links[i]->link_enc->funcs->get_dig_frontend) { - unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend(dc->links[i]->link_enc); - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } + if (dc->config.power_down_display_on_boot) + dc_link_blank_all_dp_displays(dc); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -1637,7 +1640,7 @@ void dcn10_reset_hw_ctx_wrap( dcn10_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); if (hws->funcs.enable_stream_gating) - hws->funcs.enable_stream_gating(dc, pipe_ctx); + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); if (old_clk) old_clk->funcs->cs_power_down(old_clk); } @@ -1970,10 +1973,9 @@ static bool wait_for_reset_trigger_to_occur( return rc; } -uint64_t reduceSizeAndFraction( - uint64_t *numerator, - uint64_t *denominator, - bool checkUint32Bounary) +static uint64_t reduceSizeAndFraction(uint64_t *numerator, + uint64_t *denominator, + bool checkUint32Bounary) { int i; bool ret = checkUint32Bounary == false; @@ -2021,7 +2023,7 @@ uint64_t reduceSizeAndFraction( return ret; } -bool is_low_refresh_rate(struct pipe_ctx *pipe) +static bool is_low_refresh_rate(struct pipe_ctx *pipe) { uint32_t master_pipe_refresh_rate = pipe->stream->timing.pix_clk_100hz * 100 / @@ -2030,7 +2032,8 @@ bool is_low_refresh_rate(struct pipe_ctx *pipe) return master_pipe_refresh_rate <= 30; } -uint8_t get_clock_divider(struct pipe_ctx *pipe, bool account_low_refresh_rate) +static uint8_t get_clock_divider(struct pipe_ctx *pipe, + bool account_low_refresh_rate) { uint32_t clock_divider = 1; uint32_t numpipes = 1; @@ -2050,10 +2053,8 @@ uint8_t get_clock_divider(struct pipe_ctx *pipe, bool account_low_refresh_rate) return clock_divider; } -int dcn10_align_pixel_clocks( - struct dc *dc, - int group_size, - struct pipe_ctx *grouped_pipes[]) +static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, + struct pipe_ctx *grouped_pipes[]) { struct dc_context *dc_ctx = dc->ctx; int i, master = -1, embedded = -1; @@ -2342,7 +2343,7 @@ static void mmhub_read_vm_context0_settings(struct dcn10_hubp *hubp1, } -void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) +static void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); struct vm_system_aperture_param apt = {0}; @@ -2624,7 +2625,7 @@ static void dcn10_update_dchubp_dpp( /* new calculated dispclk, dppclk are stored in * context->bw_ctx.bw.dcn.clk.dispclk_khz / dppclk_khz. current * dispclk, dppclk are from dc->clk_mgr->clks.dispclk_khz. - * dcn_validate_bandwidth compute new dispclk, dppclk. + * dcn10_validate_bandwidth compute new dispclk, dppclk. * dispclk will put in use after optimize_bandwidth when * ramp_up_dispclk_with_dpp is called. * there are two places for dppclk be put in use. One location @@ -2638,7 +2639,7 @@ static void dcn10_update_dchubp_dpp( * for example, eDP + external dp, change resolution of DP from * 1920x1080x144hz to 1280x960x60hz. * before change: dispclk = 337889 dppclk = 337889 - * change mode, dcn_validate_bandwidth calculate + * change mode, dcn10_validate_bandwidth calculate * dispclk = 143122 dppclk = 143122 * update_dchubp_dpp be executed before dispclk be updated, * dispclk = 337889, but dppclk use new value dispclk /2 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 2dc4b4e4ba02..f4b34c110eae 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -646,8 +646,9 @@ static bool dcn10_link_encoder_validate_hdmi_output( crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) return false; - if (!enc10->base.features.flags.bits.HDMI_6GB_EN && - adjusted_pix_clk_100hz >= 3000000) + if ((!enc10->base.features.flags.bits.HDMI_6GB_EN || + enc10->base.ctx->dc->debug.hdmi20_disable) && + adjusted_pix_clk_100hz >= 3000000) return false; if (enc10->base.ctx->dc->debug.hdmi20_disable && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c index d54d731415d7..2c409356f512 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c @@ -348,36 +348,6 @@ void opp1_program_stereo( */ } -void opp1_program_oppbuf( - struct output_pixel_processor *opp, - struct oppbuf_params *oppbuf) -{ - struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); - - /* Program the oppbuf active width to be the frame width from mpc */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, oppbuf->active_width); - - /* Specifies the number of segments in multi-segment mode (DP-MSO operation) - * description "In 1/2/4 segment mode, specifies the horizontal active width in pixels of the display panel. - * In 4 segment split left/right mode, specifies the horizontal 1/2 active width in pixels of the display panel. - * Used to determine segment boundaries in multi-segment mode. Used to determine the width of the vertical active space in 3D frame packed modes. - * OPPBUF_ACTIVE_WIDTH must be integer divisible by the total number of segments." - */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_DISPLAY_SEGMENTATION, oppbuf->mso_segmentation); - - /* description "Specifies the number of overlap pixels (1-8 overlapping pixels supported), used in multi-segment mode (DP-MSO operation)" */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_OVERLAP_PIXEL_NUM, oppbuf->mso_overlap_pixel_num); - - /* description "Specifies the number of times a pixel is replicated (0-15 pixel replications supported). - * A value of 0 disables replication. The total number of times a pixel is output is OPPBUF_PIXEL_REPETITION + 1." - */ - REG_UPDATE(OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, oppbuf->pixel_repetition); - - /* Controls the number of padded pixels at the end of a segment */ - if (REG(OPPBUF_CONTROL1)) - REG_UPDATE(OPPBUF_CONTROL1, OPPBUF_NUM_SEGMENT_PADDED_PIXELS, oppbuf->num_segment_padded_pixels); -} - void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable) { struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 3d2a2848857a..b1671b00ce40 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -132,22 +132,6 @@ void optc1_setup_vertical_interrupt2( } /** - * Vupdate keepout can be set to a window to block the update lock for that pipe from changing. - * Start offset begins with vstartup and goes for x number of clocks, - * end offset starts from end of vupdate to x number of clocks. - */ -void optc1_set_vupdate_keepout(struct timing_generator *optc, - struct vupdate_keepout_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, params->start_offset, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, params->end_offset, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, params->enable); -} - -/** * program_timing_generator used by mode timing set * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition. * Including SYNC. Call BIOS command table to program Timings. @@ -876,7 +860,7 @@ void optc1_set_static_screen_control( OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); } -void optc1_setup_manual_trigger(struct timing_generator *optc) +static void optc1_setup_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -894,7 +878,7 @@ void optc1_setup_manual_trigger(struct timing_generator *optc) OTG_TRIGA_CLEAR, 1); } -void optc1_program_manual_trigger(struct timing_generator *optc) +static void optc1_program_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index f37551e00023..858b72149897 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -686,9 +686,8 @@ static struct output_pixel_processor *dcn10_opp_create( return &opp->base; } -struct dce_aux *dcn10_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_aux *dcn10_aux_engine_create(struct dc_context *ctx, + uint32_t inst) { struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); @@ -724,9 +723,8 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) }; -struct dce_i2c_hw *dcn10_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn10_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); @@ -805,7 +803,7 @@ static const struct encoder_feature_support link_enc_feature = { .flags.bits.IS_TPS4_CAPABLE = true }; -struct link_encoder *dcn10_link_encoder_create( +static struct link_encoder *dcn10_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn10_link_encoder *enc10 = @@ -847,7 +845,7 @@ static struct panel_cntl *dcn10_panel_cntl_create(const struct panel_cntl_init_d return &panel_cntl->base; } -struct clock_source *dcn10_clock_source_create( +static struct clock_source *dcn10_clock_source_create( struct dc_context *ctx, struct dc_bios *bios, enum clock_source_id id, @@ -945,7 +943,7 @@ static const struct resource_create_funcs res_create_maximus_funcs = { .create_hwseq = dcn10_hwseq_create, }; -void dcn10_clock_source_destroy(struct clock_source **clk_src) +static void dcn10_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); *clk_src = NULL; @@ -978,10 +976,8 @@ static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) pool->base.mpc = NULL; } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.opps[i] != NULL) @@ -1011,14 +1007,10 @@ static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) for (i = 0; i < pool->base.res_cap->num_ddc; i++) { if (pool->base.engines[i] != NULL) dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; } for (i = 0; i < pool->base.audio_count; i++) { @@ -1128,7 +1120,7 @@ static enum dc_status build_mapped_resource( return DC_OK; } -enum dc_status dcn10_add_stream_to_ctx( +static enum dc_status dcn10_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) @@ -1320,7 +1312,7 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .destroy = dcn10_destroy_resource_pool, .link_enc_create = dcn10_link_encoder_create, .panel_cntl_create = dcn10_panel_cntl_create, - .validate_bandwidth = dcn_validate_bandwidth, + .validate_bandwidth = dcn10_validate_bandwidth, .acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer, .validate_plane = dcn10_validate_plane, .validate_global = dcn10_validate_global, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index b0c08ee6bc2c..bf4436d7aaab 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -902,6 +902,19 @@ void enc1_stream_encoder_stop_dp_info_packets( } +void enc1_stream_encoder_reset_fifo( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* set DIG_START to 0x1 to reset FIFO */ + REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); + udelay(100); + + /* write 0 to take the FIFO out of reset */ + REG_UPDATE(DIG_FE_CNTL, DIG_START, 0); +} + void enc1_stream_encoder_dp_blank( struct dc_link *link, struct stream_encoder *enc) @@ -1587,6 +1600,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, + .reset_fifo = + enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index 687d7e4bf7ca..a146a41f68e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -626,6 +626,9 @@ void enc1_stream_encoder_send_immediate_sdp_message( void enc1_stream_encoder_stop_dp_info_packets( struct stream_encoder *enc); +void enc1_stream_encoder_reset_fifo( + struct stream_encoder *enc); + void enc1_stream_encoder_dp_blank( struct dc_link *link, struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index a9e420c7d75a..970b65efeac1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -251,20 +251,6 @@ static void dpp2_cnv_setup ( } -void dpp2_cnv_set_bias_scale( - struct dpp *dpp_base, - struct dc_bias_and_scale *bias_and_scale) -{ - struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); - - REG_UPDATE(FCNV_FP_BIAS_R, FCNV_FP_BIAS_R, bias_and_scale->bias_red); - REG_UPDATE(FCNV_FP_BIAS_G, FCNV_FP_BIAS_G, bias_and_scale->bias_green); - REG_UPDATE(FCNV_FP_BIAS_B, FCNV_FP_BIAS_B, bias_and_scale->bias_blue); - REG_UPDATE(FCNV_FP_SCALE_R, FCNV_FP_SCALE_R, bias_and_scale->scale_red); - REG_UPDATE(FCNV_FP_SCALE_G, FCNV_FP_SCALE_G, bias_and_scale->scale_green); - REG_UPDATE(FCNV_FP_SCALE_B, FCNV_FP_SCALE_B, bias_and_scale->scale_blue); -} - /*compute the maximum number of lines that we can fit in the line buffer*/ void dscl2_calc_lb_num_partitions( const struct scaler_data *scl_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index 79b640e202eb..ef5c4c0f4d6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -162,6 +162,8 @@ static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_ds REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, &s->dsc_pic_height); REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, &s->dsc_slice_bpg_offset); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &s->dsc_fw_en, + DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c index 880954ac0b02..994fb732a7cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c @@ -527,7 +527,7 @@ static const uint16_t filter_12tap_16p_183[108] = { 0, 84, 16328, 16032, 416, 1944, 1944, 416, 16032, 16328, 84, 0, }; -const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio) +static const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio) { if (ratio.value < dc_fixpt_one.value) return filter_3tap_16p_upscale; @@ -539,7 +539,7 @@ const uint16_t *wbscl_get_filter_3tap_16p(struct fixed31_32 ratio) return filter_3tap_16p_183; } -const uint16_t *wbscl_get_filter_4tap_16p(struct fixed31_32 ratio) +static const uint16_t *wbscl_get_filter_4tap_16p(struct fixed31_32 ratio) { if (ratio.value < dc_fixpt_one.value) return filter_4tap_16p_upscale; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 5adf42a7cc27..dc1752e9f461 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -192,9 +192,8 @@ void hubp2_vready_at_or_After_vsync(struct hubp *hubp, REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); } -void hubp2_program_requestor( - struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs) +static void hubp2_program_requestor(struct hubp *hubp, + struct _vcs_dpi_display_rq_regs_st *rq_regs) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -930,6 +929,16 @@ bool hubp2_is_flip_pending(struct hubp *hubp) void hubp2_set_blank(struct hubp *hubp, bool blank) { + hubp2_set_blank_regs(hubp, blank); + + if (blank) { + hubp->mpcc_id = 0xf; + hubp->opp_id = OPP_ID_INVALID; + } +} + +void hubp2_set_blank_regs(struct hubp *hubp, bool blank) +{ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); uint32_t blank_en = blank ? 1 : 0; @@ -951,9 +960,6 @@ void hubp2_set_blank(struct hubp *hubp, bool blank) HUBP_NO_OUTSTANDING_REQ, 1, 1, 200); } - - hubp->mpcc_id = 0xf; - hubp->opp_id = OPP_ID_INVALID; } } @@ -1285,7 +1291,7 @@ void hubp2_read_state(struct hubp *hubp) } -void hubp2_validate_dml_output(struct hubp *hubp, +static void hubp2_validate_dml_output(struct hubp *hubp, struct dc_context *ctx, struct _vcs_dpi_display_rq_regs_st *dml_rq_regs, struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr, @@ -1603,6 +1609,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .hubp_setup_interdependent = hubp2_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp2_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, + .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp2_dcc_control, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h index eea2254b15e4..9204c3ef323b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h @@ -330,6 +330,7 @@ void hubp2_program_surface_config( bool hubp2_is_flip_pending(struct hubp *hubp); void hubp2_set_blank(struct hubp *hubp, bool blank); +void hubp2_set_blank_regs(struct hubp *hubp, bool blank); void hubp2_cursor_set_position( struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 4f88376a118f..a17fe8ab2904 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -615,6 +615,11 @@ void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->pipe_idx); } +void dcn20_disable_pixel_data(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank) +{ + dcn20_blank_pixel_data(dc, pipe_ctx, blank); +} + static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, int opp_cnt) { @@ -1080,10 +1085,8 @@ static void dcn20_power_on_plane( } } -void dcn20_enable_plane( - struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct dc_state *context) +static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct dc_state *context) { //if (dc->debug.sanity_checks) { // dcn10_verify_allow_pstate_change_high(dc); @@ -1842,6 +1845,11 @@ void dcn20_optimize_bandwidth( dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + dc->clk_mgr->funcs->update_clocks( dc->clk_mgr, context, @@ -2270,7 +2278,7 @@ void dcn20_reset_hw_ctx_wrap( dcn20_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); if (hws->funcs.enable_stream_gating) - hws->funcs.enable_stream_gating(dc, pipe_ctx); + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); if (old_clk) old_clk->funcs->cs_power_down(old_clk); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h index 6bba191cd33e..33a36c02b2f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h @@ -53,6 +53,10 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx); void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn20_disable_pixel_data( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool blank); void dcn20_blank_pixel_data( struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 5cfd4b0afea5..91e4885b743e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -27,6 +27,8 @@ #include "dcn10/dcn10_hw_sequencer.h" #include "dcn20_hwseq.h" +#include "dcn20_init.h" + static const struct hw_sequencer_funcs dcn20_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 947eb0df3f12..15734db0cdea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -400,10 +400,9 @@ static void mpc20_program_ogam_pwl( } -void apply_DEDCN20_305_wa( - struct mpc *mpc, - int mpcc_id, enum dc_lut_mode current_mode, - enum dc_lut_mode next_mode) +static void apply_DEDCN20_305_wa(struct mpc *mpc, int mpcc_id, + enum dc_lut_mode current_mode, + enum dc_lut_mode next_mode) { struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); @@ -525,7 +524,7 @@ static void mpc2_init_mpcc(struct mpcc *mpcc, int mpcc_inst) mpcc->sm_cfg.enable = false; } -struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) +static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) { struct mpcc *tmp_mpcc = tree->opp_list; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index c90b8516dcc1..0340fdd3f5fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -73,21 +73,6 @@ bool optc2_enable_crtc(struct timing_generator *optc) } /** - * DRR double buffering control to select buffer point - * for V_TOTAL, H_TOTAL, VTOTAL_MIN, VTOTAL_MAX, VTOTAL_MIN_SEL and VTOTAL_MAX_SEL registers - * Options: anytime, start of frame, dp start of frame (range timing) - */ -void optc2_set_timing_db_mode(struct timing_generator *optc, bool enable) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - uint32_t blank_data_double_buffer_enable = enable ? 1 : 0; - - REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, - OTG_RANGE_TIMING_DBUF_UPDATE_MODE, blank_data_double_buffer_enable); -} - -/** *For the below, I'm not sure how your GSL parameters are stored in your env, * so I will assume a gsl_params struct for now */ @@ -110,30 +95,6 @@ void optc2_set_gsl(struct timing_generator *optc, } -/* Use the gsl allow flip as the master update lock */ -void optc2_use_gsl_as_master_update_lock(struct timing_generator *optc, - const struct gsl_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_GSL_CONTROL, - OTG_MASTER_UPDATE_LOCK_GSL_EN, params->master_update_lock_gsl_en); -} - -/* You can control the GSL timing by limiting GSL to a window (X,Y) */ -void optc2_set_gsl_window(struct timing_generator *optc, - const struct gsl_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_GSL_WINDOW_X, 0, - OTG_GSL_WINDOW_START_X, params->gsl_window_start_x, - OTG_GSL_WINDOW_END_X, params->gsl_window_end_x); - REG_SET_2(OTG_GSL_WINDOW_Y, 0, - OTG_GSL_WINDOW_START_Y, params->gsl_window_start_y, - OTG_GSL_WINDOW_END_Y, params->gsl_window_end_y); -} - void optc2_set_gsl_source_select( struct timing_generator *optc, int group_idx, @@ -156,18 +117,6 @@ void optc2_set_gsl_source_select( } } -/* DSC encoder frame start controls: x = h position, line_num = # of lines from vstartup */ -void optc2_set_dsc_encoder_frame_start(struct timing_generator *optc, - int x_position, - int line_num) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_DSC_START_POSITION, 0, - OTG_DSC_START_POSITION_X, x_position, - OTG_DSC_START_POSITION_LINE_NUM, line_num); -} - /* Set DSC-related configuration. * dsc_mode: 0 disables DSC, other values enable DSC in specified format * sc_bytes_per_pixel: Bytes per pixel in u3.28 format @@ -190,6 +139,19 @@ void optc2_set_dsc_config(struct timing_generator *optc, OPTC_DSC_SLICE_WIDTH, dsc_slice_width); } +/* Get DSC-related configuration. + * dsc_mode: 0 disables DSC, other values enable DSC in specified format + */ +void optc2_get_dsc_status(struct timing_generator *optc, + uint32_t *dsc_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OPTC_DATA_FORMAT_CONTROL, + OPTC_DSC_MODE, dsc_mode); +} + + /*TEMP: Need to figure out inheritance model here.*/ bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) { @@ -280,8 +242,8 @@ void optc2_get_optc_source(struct timing_generator *optc, *num_of_src_opp = 1; } -void optc2_set_dwb_source(struct timing_generator *optc, - uint32_t dwb_pipe_inst) +static void optc2_set_dwb_source(struct timing_generator *optc, + uint32_t dwb_pipe_inst) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -293,7 +255,7 @@ void optc2_set_dwb_source(struct timing_generator *optc, OPTC_DWB1_SOURCE_SELECT, optc->inst); } -void optc2_align_vblanks( +static void optc2_align_vblanks( struct timing_generator *optc_master, struct timing_generator *optc_slave, uint32_t master_pixel_clock_100Hz, @@ -579,6 +541,7 @@ static struct timing_generator_funcs dcn20_tg_funcs = { .get_crc = optc1_get_crc, .configure_crc = optc2_configure_crc, .set_dsc_config = optc2_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = optc2_set_dwb_source, .set_odm_bypass = optc2_set_odm_bypass, .set_odm_combine = optc2_set_odm_combine, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h index be19a6885fbf..f7968b9ca16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h @@ -98,6 +98,9 @@ void optc2_set_dsc_config(struct timing_generator *optc, uint32_t dsc_bytes_per_pixel, uint32_t dsc_slice_width); +void optc2_get_dsc_status(struct timing_generator *optc, + uint32_t *dsc_mode); + void optc2_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3883f918b3bb..40b122a708ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3796,6 +3796,8 @@ static bool dcn20_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.hdmi_frl_pcon_support = true; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) { dc->debug = debug_defaults_drv; } else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c index aab25ca8343a..8a70f92795c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c @@ -593,6 +593,8 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = { enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, + .reset_fifo = + enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c index f5bf04f7da25..9a3402148fde 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c @@ -44,7 +44,8 @@ #define DC_LOGGER \ dccg->ctx->logger -void dccg201_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) +static void dccg201_update_dpp_dto(struct dccg *dccg, int dpp_inst, + int req_dppclk) { /* vbios handles it */ } diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c index 6b6f74d4afd1..35dd4bac242a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c @@ -55,7 +55,7 @@ static void hubp201_program_surface_config( hubp1_program_pixel_format(hubp, format); } -void hubp201_program_deadline( +static void hubp201_program_deadline( struct hubp *hubp, struct _vcs_dpi_display_dlg_regs_st *dlg_attr, struct _vcs_dpi_display_ttu_regs_st *ttu_attr) @@ -63,9 +63,8 @@ void hubp201_program_deadline( hubp1_program_deadline(hubp, dlg_attr, ttu_attr); } -void hubp201_program_requestor( - struct hubp *hubp, - struct _vcs_dpi_display_rq_regs_st *rq_regs) +static void hubp201_program_requestor(struct hubp *hubp, + struct _vcs_dpi_display_rq_regs_st *rq_regs) { struct dcn201_hubp *hubp201 = TO_DCN201_HUBP(hubp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c index a65e8f7801db..7f9ec59ef443 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c @@ -50,8 +50,8 @@ #define IND_REG(index) \ (enc10->link_regs->index) -void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc, - struct dc_link_settings *link_settings) +static void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc, + struct dc_link_settings *link_settings) { uint32_t value1, value2; struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); @@ -66,7 +66,7 @@ void dcn201_link_encoder_get_max_link_cap(struct link_encoder *enc, } } -bool dcn201_link_encoder_is_in_alt_mode(struct link_encoder *enc) +static bool dcn201_link_encoder_is_in_alt_mode(struct link_encoder *enc) { uint32_t value; struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0fa381088d1d..d6acf9a8590a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -672,9 +672,8 @@ static struct output_pixel_processor *dcn201_opp_create( return &opp->base; } -struct dce_aux *dcn201_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_aux *dcn201_aux_engine_create(struct dc_context *ctx, + uint32_t inst) { struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); @@ -706,9 +705,8 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) }; -struct dce_i2c_hw *dcn201_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn201_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); @@ -789,7 +787,7 @@ static const struct encoder_feature_support link_enc_feature = { .flags.bits.IS_TPS4_CAPABLE = true }; -struct link_encoder *dcn201_link_encoder_create( +static struct link_encoder *dcn201_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -811,7 +809,7 @@ struct link_encoder *dcn201_link_encoder_create( return &enc10->base; } -struct clock_source *dcn201_clock_source_create( +static struct clock_source *dcn201_clock_source_create( struct dc_context *ctx, struct dc_bios *bios, enum clock_source_id id, @@ -906,7 +904,7 @@ static const struct resource_create_funcs res_create_maximus_funcs = { .create_hwseq = dcn201_hwseq_create, }; -void dcn201_clock_source_destroy(struct clock_source **clk_src) +static void dcn201_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); *clk_src = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index 36044cb8ec83..c5e200d09038 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -680,7 +680,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage); } -void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub) +static void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); uint32_t prog_wm_value; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 3de1bcf9b3d8..58e459c7e7d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -183,7 +183,7 @@ static void hubp21_setup( } -void hubp21_set_viewport( +static void hubp21_set_viewport( struct hubp *hubp, const struct rect *viewport, const struct rect *viewport_c) @@ -225,8 +225,8 @@ void hubp21_set_viewport( SEC_VIEWPORT_Y_START_C, viewport_c->y); } -void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, - struct vm_system_aperture_param *apt) +static void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, + struct vm_system_aperture_param *apt) { struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); @@ -248,7 +248,7 @@ void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, SYSTEM_ACCESS_MODE, 0x3); } -void hubp21_validate_dml_output(struct hubp *hubp, +static void hubp21_validate_dml_output(struct hubp *hubp, struct dc_context *ctx, struct _vcs_dpi_display_rq_regs_st *dml_rq_regs, struct _vcs_dpi_display_dlg_regs_st *dml_dlg_attr, @@ -664,7 +664,8 @@ static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS); } -void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs) +static void dmcub_PLAT_54186_wa(struct hubp *hubp, + struct surface_flip_registers *flip_regs) { struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); @@ -697,7 +698,7 @@ void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_ PERF_TRACE(); // TODO: remove after performance is stable. } -bool hubp21_program_surface_flip_and_addr( +static bool hubp21_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, bool flip_immediate) @@ -805,7 +806,7 @@ bool hubp21_program_surface_flip_and_addr( return true; } -void hubp21_init(struct hubp *hubp) +static void hubp21_init(struct hubp *hubp) { // DEDCN21-133: Inconsistent row starting line for flip between DPTE and Meta // This is a chicken bit to enable the ECO fix. diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 54c11ba550ae..b270f0b194dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -28,6 +28,8 @@ #include "dcn20/dcn20_hwseq.h" #include "dcn21_hwseq.h" +#include "dcn21_init.h" + static const struct hw_sequencer_funcs dcn21_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c index aa46c35b05a2..0a1ba6e7081c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c @@ -203,7 +203,7 @@ static bool update_cfg_data( return true; } -bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc) +static bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); int value; @@ -277,7 +277,7 @@ void dcn21_link_encoder_enable_dp_output( } -void dcn21_link_encoder_enable_dp_mst_output( +static void dcn21_link_encoder_enable_dp_mst_output( struct link_encoder *enc, const struct dc_link_settings *link_settings, enum clock_source_id clock_source) @@ -288,9 +288,8 @@ void dcn21_link_encoder_enable_dp_mst_output( dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source); } -void dcn21_link_encoder_disable_output( - struct link_encoder *enc, - enum signal_type signal) +static void dcn21_link_encoder_disable_output(struct link_encoder *enc, + enum signal_type signal) { dcn10_link_encoder_disable_output(enc, signal); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d452a0d1777e..ca1bbc942fd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -784,9 +784,8 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) }; -struct dce_i2c_hw *dcn21_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn21_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); @@ -1093,7 +1092,7 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s } } -void dcn21_calculate_wm( +static void dcn21_calculate_wm( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *out_pipe_cnt, @@ -1390,7 +1389,7 @@ validate_out: * with DC_FP_START()/DC_FP_END(). Use the same approach as for * dcn20_validate_bandwidth in dcn20_resource.c. */ -bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, +static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { bool voltage_supported; @@ -1480,8 +1479,8 @@ static struct hubbub *dcn21_hubbub_create(struct dc_context *ctx) return &hubbub->base; } -struct output_pixel_processor *dcn21_opp_create( - struct dc_context *ctx, uint32_t inst) +static struct output_pixel_processor *dcn21_opp_create(struct dc_context *ctx, + uint32_t inst) { struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); @@ -1496,9 +1495,8 @@ struct output_pixel_processor *dcn21_opp_create( return &opp->base; } -struct timing_generator *dcn21_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) +static struct timing_generator *dcn21_timing_generator_create(struct dc_context *ctx, + uint32_t instance) { struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); @@ -1518,7 +1516,7 @@ struct timing_generator *dcn21_timing_generator_create( return &tgn10->base; } -struct mpc *dcn21_mpc_create(struct dc_context *ctx) +static struct mpc *dcn21_mpc_create(struct dc_context *ctx) { struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), GFP_KERNEL); @@ -1545,8 +1543,8 @@ static void read_dce_straps( } -struct display_stream_compressor *dcn21_dsc_create( - struct dc_context *ctx, uint32_t inst) +static struct display_stream_compressor *dcn21_dsc_create(struct dc_context *ctx, + uint32_t inst) { struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); @@ -1683,9 +1681,8 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -struct stream_encoder *dcn21_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) +static struct stream_encoder *dcn21_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) { struct dcn10_stream_encoder *enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); @@ -1917,7 +1914,7 @@ static int dcn21_populate_dml_pipes_from_context( return pipe_cnt; } -enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state) +static enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state) { enum dc_status result = DC_OK; @@ -2028,6 +2025,8 @@ static bool dcn21_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.hdmi_frl_pcon_support = true; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c index ebd9c35c914f..8daa12730bc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c @@ -50,22 +50,6 @@ enc1->base.ctx -void convert_dc_info_packet_to_128( - const struct dc_info_packet *info_packet, - struct dc_info_packet_128 *info_packet_128) -{ - unsigned int i; - - info_packet_128->hb0 = info_packet->hb0; - info_packet_128->hb1 = info_packet->hb1; - info_packet_128->hb2 = info_packet->hb2; - info_packet_128->hb3 = info_packet->hb3; - - for (i = 0; i < 32; i++) { - info_packet_128->sb[i] = info_packet->sb[i]; - } - -} static void enc3_update_hdmi_info_packet( struct dcn10_stream_encoder *enc1, uint32_t packet_index, @@ -489,7 +473,7 @@ static void enc3_dp_set_odm_combine( } /* setup stream encoder in dvi mode */ -void enc3_stream_encoder_dvi_set_stream_attribute( +static void enc3_stream_encoder_dvi_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, bool is_dual_link) @@ -805,6 +789,8 @@ static const struct stream_encoder_funcs dcn30_str_enc_funcs = { enc3_stream_encoder_update_dp_info_packets, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, + .reset_fifo = + enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index c1d967ed6551..ab3918c0a15b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -41,8 +41,7 @@ dpp->tf_shift->field_name, dpp->tf_mask->field_name -void dpp30_read_state(struct dpp *dpp_base, - struct dcn_dpp_state *s) +static void dpp30_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s) { struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); @@ -373,7 +372,7 @@ void dpp3_set_cursor_attributes( } -bool dpp3_get_optimal_number_of_taps( +static bool dpp3_get_optimal_number_of_taps( struct dpp *dpp, struct scaler_data *scl_data, const struct scaling_taps *in_taps) @@ -474,22 +473,7 @@ bool dpp3_get_optimal_number_of_taps( return true; } -void dpp3_cnv_set_bias_scale( - struct dpp *dpp_base, - struct dc_bias_and_scale *bias_and_scale) -{ - struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); - - REG_UPDATE(FCNV_FP_BIAS_R, FCNV_FP_BIAS_R, bias_and_scale->bias_red); - REG_UPDATE(FCNV_FP_BIAS_G, FCNV_FP_BIAS_G, bias_and_scale->bias_green); - REG_UPDATE(FCNV_FP_BIAS_B, FCNV_FP_BIAS_B, bias_and_scale->bias_blue); - REG_UPDATE(FCNV_FP_SCALE_R, FCNV_FP_SCALE_R, bias_and_scale->scale_red); - REG_UPDATE(FCNV_FP_SCALE_G, FCNV_FP_SCALE_G, bias_and_scale->scale_green); - REG_UPDATE(FCNV_FP_SCALE_B, FCNV_FP_SCALE_B, bias_and_scale->scale_blue); -} - -void dpp3_deferred_update( - struct dpp *dpp_base) +static void dpp3_deferred_update(struct dpp *dpp_base) { int bypass_state; struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); @@ -751,8 +735,8 @@ static enum dc_lut_mode dpp3_get_blndgam_current(struct dpp *dpp_base) return mode; } -bool dpp3_program_blnd_lut( - struct dpp *dpp_base, const struct pwl_params *params) +static bool dpp3_program_blnd_lut(struct dpp *dpp_base, + const struct pwl_params *params) { enum dc_lut_mode current_mode; enum dc_lut_mode next_mode; @@ -1164,9 +1148,8 @@ static void dpp3_program_shaper_lutb_settings( } -bool dpp3_program_shaper( - struct dpp *dpp_base, - const struct pwl_params *params) +static bool dpp3_program_shaper(struct dpp *dpp_base, + const struct pwl_params *params) { enum dc_lut_mode current_mode; enum dc_lut_mode next_mode; @@ -1355,9 +1338,8 @@ static void dpp3_select_3dlut_ram_mask( REG_SET(CM_3DLUT_INDEX, 0, CM_3DLUT_INDEX, 0); } -bool dpp3_program_3dlut( - struct dpp *dpp_base, - struct tetrahedral_params *params) +static bool dpp3_program_3dlut(struct dpp *dpp_base, + struct tetrahedral_params *params) { enum dc_lut_mode mode; bool is_17x17x17; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index eac08926b574..6a4dcafb9bba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -490,6 +490,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_setup_interdependent = hubp2_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, .set_blank = hubp2_set_blank, + .set_blank_regs = hubp2_set_blank_regs, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index df2717116604..1db1ca19411d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -344,6 +344,17 @@ void dcn30_enable_writeback( dwb->funcs->enable(dwb, &wb_info->dwb_params); } +void dcn30_prepare_bandwidth(struct dc *dc, + struct dc_state *context) +{ + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); + + dcn20_prepare_bandwidth(dc, context); +} + void dcn30_disable_writeback( struct dc *dc, unsigned int dwb_pipe_inst) @@ -437,7 +448,7 @@ void dcn30_init_hw(struct dc *dc) struct dce_hwseq *hws = dc->hwseq; struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; - int i, j; + int i; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; @@ -534,41 +545,8 @@ void dcn30_init_hw(struct dc *dc) hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) { - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) - continue; - /* DP 2.0 states that LTTPR regs must be read first */ - dp_retrieve_lttpr_cap(dc->links[i]); - - /* if any of the displays are lit up turn them off */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { - /* blank dp stream before power off receiver*/ - if (dc->links[i]->link_enc->funcs->get_dig_frontend) { - unsigned int fe; - - fe = dc->links[i]->link_enc->funcs->get_dig_frontend( - dc->links[i]->link_enc); - if (fe == ENGINE_ID_UNKNOWN) - continue; - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } + if (dc->config.power_down_display_on_boot) + dc_link_blank_all_dp_displays(dc); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h index e9a0005288d3..73e7b690e82c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h @@ -27,7 +27,7 @@ #define __DC_HWSS_DCN30_H__ #include "hw_sequencer_private.h" - +#include "dcn20/dcn20_hwseq.h" struct dc; void dcn30_init_hw(struct dc *dc); @@ -47,6 +47,9 @@ void dcn30_disable_writeback( struct dc *dc, unsigned int dwb_pipe_inst); +void dcn30_prepare_bandwidth(struct dc *dc, + struct dc_state *context); + bool dcn30_mmhubbub_warmup( struct dc *dc, unsigned int num_dwb, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c index 93f32a312fee..bb347319de83 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c @@ -29,6 +29,8 @@ #include "dcn21/dcn21_hwseq.h" #include "dcn30_hwseq.h" +#include "dcn30_init.h" + static const struct hw_sequencer_funcs dcn30_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn30_init_hw, @@ -53,6 +55,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .enable_audio_stream = dce110_enable_audio_stream, .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, .pipe_control_lock = dcn20_pipe_control_lock, .interdependent_update_lock = dcn10_lock_all_pipes, .cursor_lock = dcn10_cursor_lock, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c index 1c4b171c68ad..7a93eff183d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c @@ -100,7 +100,7 @@ static void mmhubbub3_warmup_mcif(struct mcif_wb *mcif_wb, REG_UPDATE(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB_WARMUP_EN, false); } -void mmhubbub3_config_mcif_buf(struct mcif_wb *mcif_wb, +static void mmhubbub3_config_mcif_buf(struct mcif_wb *mcif_wb, struct mcif_buf_params *params, unsigned int dest_height) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index 95149734378b..0ce0d6165f43 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -1362,7 +1362,7 @@ uint32_t mpcc3_acquire_rmu(struct mpc *mpc, int mpcc_id, int rmu_idx) return -1; } -int mpcc3_release_rmu(struct mpc *mpc, int mpcc_id) +static int mpcc3_release_rmu(struct mpc *mpc, int mpcc_id) { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); int rmu_idx; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 5d9e6413d67a..f5e8916601d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -332,6 +332,7 @@ static struct timing_generator_funcs dcn30_tg_funcs = { .get_crc = optc1_get_crc, .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, .set_odm_bypass = optc3_set_odm_bypass, .set_odm_combine = optc3_set_odm_combine, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 79a66e0c4303..369ceeeddc7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -816,7 +816,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, @@ -875,7 +875,7 @@ static const struct dc_debug_options debug_defaults_diags = { .use_max_lb = true }; -void dcn30_dpp_destroy(struct dpp **dpp) +static void dcn30_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); *dpp = NULL; @@ -992,7 +992,7 @@ static struct mpc *dcn30_mpc_create( return &mpc30->base; } -struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) +static struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) { int i; @@ -1143,9 +1143,8 @@ static struct afmt *dcn30_afmt_create( return &afmt3->base; } -struct stream_encoder *dcn30_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) +static struct stream_encoder *dcn30_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) { struct dcn10_stream_encoder *enc1; struct vpg *vpg; @@ -1179,8 +1178,7 @@ struct stream_encoder *dcn30_stream_encoder_create( return &enc1->base; } -struct dce_hwseq *dcn30_hwseq_create( - struct dc_context *ctx) +static struct dce_hwseq *dcn30_hwseq_create(struct dc_context *ctx) { struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); @@ -1763,6 +1761,17 @@ static bool dcn30_split_stream_for_mpc_or_odm( int pipe_idx = sec_pipe->pipe_idx; const struct resource_pool *pool = dc->res_pool; + if (pri_pipe->plane_state) { + /* ODM + window MPO, where MPO window is on left half only */ + if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= + pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) + return true; + + /* ODM + window MPO, where MPO window is on right half only */ + if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.width/2) + return true; + } + *sec_pipe = *pri_pipe; sec_pipe->pipe_idx = pipe_idx; @@ -2639,6 +2648,8 @@ static bool dcn30_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.hdmi_frl_pcon_support = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c index e85b695f2351..3d42a1a337ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c @@ -30,6 +30,8 @@ #include "dcn30/dcn30_hwseq.h" #include "dcn301_hwseq.h" +#include "dcn301_init.h" + static const struct hw_sequencer_funcs dcn301_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c index 736bda30abc3..ad0df1a72a90 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c @@ -93,7 +93,7 @@ static unsigned int dcn301_get_16_bit_backlight_from_pwm(struct panel_cntl *pane return (uint32_t)(current_backlight); } -uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl) +static uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); uint32_t value; @@ -147,7 +147,7 @@ uint32_t dcn301_panel_cntl_hw_init(struct panel_cntl *panel_cntl) return current_backlight; } -void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl) +static void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(*panel_cntl); @@ -155,7 +155,7 @@ void dcn301_panel_cntl_destroy(struct panel_cntl **panel_cntl) *panel_cntl = NULL; } -bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl) +static bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); uint32_t value; @@ -165,7 +165,7 @@ bool dcn301_is_panel_backlight_on(struct panel_cntl *panel_cntl) return value; } -bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl) +static bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); uint32_t pwr_seq_state, dig_on, dig_on_ovrd; @@ -177,7 +177,7 @@ bool dcn301_is_panel_powered_on(struct panel_cntl *panel_cntl) return (pwr_seq_state == 1) || (dig_on == 1 && dig_on_ovrd == 1); } -void dcn301_store_backlight_level(struct panel_cntl *panel_cntl) +static void dcn301_store_backlight_level(struct panel_cntl *panel_cntl) { struct dcn301_panel_cntl *dcn301_panel_cntl = TO_DCN301_PANEL_CNTL(panel_cntl); diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index fbaa03f26d8b..b4001233867c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -656,7 +656,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -717,15 +717,13 @@ static const struct dc_debug_options debug_defaults_diags = { .use_max_lb = false, }; -void dcn301_dpp_destroy(struct dpp **dpp) +static void dcn301_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); *dpp = NULL; } -struct dpp *dcn301_dpp_create( - struct dc_context *ctx, - uint32_t inst) +static struct dpp *dcn301_dpp_create(struct dc_context *ctx, uint32_t inst) { struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); @@ -741,8 +739,8 @@ struct dpp *dcn301_dpp_create( kfree(dpp); return NULL; } -struct output_pixel_processor *dcn301_opp_create( - struct dc_context *ctx, uint32_t inst) +static struct output_pixel_processor *dcn301_opp_create(struct dc_context *ctx, + uint32_t inst) { struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); @@ -757,9 +755,7 @@ struct output_pixel_processor *dcn301_opp_create( return &opp->base; } -struct dce_aux *dcn301_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_aux *dcn301_aux_engine_create(struct dc_context *ctx, uint32_t inst) { struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); @@ -793,9 +789,7 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) }; -struct dce_i2c_hw *dcn301_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) +static struct dce_i2c_hw *dcn301_i2c_hw_create(struct dc_context *ctx, uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); @@ -829,7 +823,7 @@ static struct mpc *dcn301_mpc_create( return &mpc30->base; } -struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) +static struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) { int i; @@ -860,9 +854,8 @@ struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) return &hubbub3->base; } -struct timing_generator *dcn301_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) +static struct timing_generator *dcn301_timing_generator_create( + struct dc_context *ctx, uint32_t instance) { struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); @@ -894,7 +887,7 @@ static const struct encoder_feature_support link_enc_feature = { .flags.bits.IS_TPS4_CAPABLE = true }; -struct link_encoder *dcn301_link_encoder_create( +static struct link_encoder *dcn301_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -915,7 +908,7 @@ struct link_encoder *dcn301_link_encoder_create( return &enc20->enc10.base; } -struct panel_cntl *dcn301_panel_cntl_create(const struct panel_cntl_init_data *init_data) +static struct panel_cntl *dcn301_panel_cntl_create(const struct panel_cntl_init_data *init_data) { struct dcn301_panel_cntl *panel_cntl = kzalloc(sizeof(struct dcn301_panel_cntl), GFP_KERNEL); @@ -997,9 +990,8 @@ static struct afmt *dcn301_afmt_create( return &afmt3->base; } -struct stream_encoder *dcn301_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) +static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) { struct dcn10_stream_encoder *enc1; struct vpg *vpg; @@ -1033,8 +1025,7 @@ struct stream_encoder *dcn301_stream_encoder_create( return &enc1->base; } -struct dce_hwseq *dcn301_hwseq_create( - struct dc_context *ctx) +static struct dce_hwseq *dcn301_hwseq_create(struct dc_context *ctx) { struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); @@ -1182,9 +1173,7 @@ static void dcn301_destruct(struct dcn301_resource_pool *pool) dcn_dccg_destroy(&pool->base.dccg); } -struct hubp *dcn301_hubp_create( - struct dc_context *ctx, - uint32_t inst) +static struct hubp *dcn301_hubp_create(struct dc_context *ctx, uint32_t inst) { struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); @@ -1201,7 +1190,7 @@ struct hubp *dcn301_hubp_create( return NULL; } -bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +static bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) { int i; uint32_t pipe_count = pool->res_cap->num_dwb; @@ -1226,7 +1215,7 @@ bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) return true; } -bool dcn301_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +static bool dcn301_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) { int i; uint32_t pipe_count = pool->res_cap->num_dwb; @@ -1449,9 +1438,7 @@ static bool dcn301_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; -#ifdef CONFIG_DRM_AMD_DC_DMUB dc->caps.dmcub_support = true; -#endif /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; @@ -1487,6 +1474,23 @@ static bool dcn301_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c index d88b9011c502..eb375f30f5bc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c @@ -29,6 +29,8 @@ #include "dc.h" +#include "dcn302_init.h" + void dcn302_hw_sequencer_construct(struct dc *dc) { dcn30_hw_sequencer_construct(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index fcf96cf08c76..003e95368672 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -276,7 +276,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, .max_upscale_factor = { @@ -1557,6 +1557,24 @@ static bool dcn302_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c index aa5dbbade2bd..f499f8ab5e47 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c @@ -9,6 +9,8 @@ #include "dcn30/dcn30_init.h" #include "dc.h" +#include "dcn303_init.h" + void dcn303_hw_sequencer_construct(struct dc *dc) { dcn30_hw_sequencer_construct(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4a9b64023675..01ba9d656c72 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -254,7 +254,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, .max_upscale_factor = { @@ -1500,6 +1500,23 @@ static bool dcn303_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 815481a3ef54..ea4f8e06b07c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -462,7 +462,7 @@ void dccg31_set_physymclk( } /* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */ -void dccg31_set_dtbclk_dto( +static void dccg31_set_dtbclk_dto( struct dccg *dccg, int dtbclk_inst, int req_dtbclk_khz, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index ee6f13bef377..71c359f9cdd2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -67,6 +67,39 @@ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif +static uint8_t phy_id_from_transmitter(enum transmitter t) +{ + uint8_t phy_id; + + switch (t) { + case TRANSMITTER_UNIPHY_A: + phy_id = 0; + break; + case TRANSMITTER_UNIPHY_B: + phy_id = 1; + break; + case TRANSMITTER_UNIPHY_C: + phy_id = 2; + break; + case TRANSMITTER_UNIPHY_D: + phy_id = 3; + break; + case TRANSMITTER_UNIPHY_E: + phy_id = 4; + break; + case TRANSMITTER_UNIPHY_F: + phy_id = 5; + break; + case TRANSMITTER_UNIPHY_G: + phy_id = 6; + break; + default: + phy_id = 0; + break; + } + return phy_id; +} + void dcn31_link_encoder_set_dio_phy_mux( struct link_encoder *enc, enum encoder_type_select sel, @@ -141,7 +174,7 @@ void dcn31_link_encoder_set_dio_phy_mux( } } -void enc31_hw_init(struct link_encoder *enc) +static void enc31_hw_init(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); @@ -536,57 +569,45 @@ void dcn31_link_encoder_disable_output( bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); - uint32_t dp_alt_mode_disable; + struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv; + union dmub_rb_cmd cmd; bool is_usb_c_alt_mode = false; - if (enc->features.flags.bits.DP_IS_USB_C) { - if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { - // [Note] no need to check hw_internal_rev once phy mux selection is ready - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); - } else { - /* - * B0 phys use a new set of registers to check whether alt mode is disabled. - * if value == 1 alt mode is disabled, otherwise it is enabled. - */ - if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); - } else { - // [Note] need to change TRANSMITTER_UNIPHY_C/D to F/G once phy mux selection is ready - REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable); - } - } + if (enc->features.flags.bits.DP_IS_USB_C && dc_dmub_srv) { + memset(&cmd, 0, sizeof(cmd)); + cmd.query_dp_alt.header.type = DMUB_CMD__VBIOS; + cmd.query_dp_alt.header.sub_type = DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT; + cmd.query_dp_alt.header.payload_bytes = sizeof(cmd.panel_cntl.data); + cmd.query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter); - is_usb_c_alt_mode = (dp_alt_mode_disable == 0); + if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, &cmd)) + return false; + + is_usb_c_alt_mode = (cmd.query_dp_alt.data.is_dp_alt_disable == 0); } return is_usb_c_alt_mode; } -void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, - struct dc_link_settings *link_settings) +void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_link_settings *link_settings) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); - uint32_t is_in_usb_c_dp4_mode = 0; + struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv; + union dmub_rb_cmd cmd; dcn10_link_encoder_get_max_link_cap(enc, link_settings); - /* in usb c dp2 mode, max lane count is 2 */ - if (enc->funcs->is_in_alt_mode && enc->funcs->is_in_alt_mode(enc)) { - if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) { - // [Note] no need to check hw_internal_rev once phy mux selection is ready - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); - } else { - if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) - || (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) { - REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); - } else { - REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode); - } - } - if (!is_in_usb_c_dp4_mode) + if (enc->features.flags.bits.DP_IS_USB_C && dc_dmub_srv) { + memset(&cmd, 0, sizeof(cmd)); + cmd.query_dp_alt.header.type = DMUB_CMD__VBIOS; + cmd.query_dp_alt.header.sub_type = DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT; + cmd.query_dp_alt.header.payload_bytes = sizeof(cmd.panel_cntl.data); + cmd.query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter); + + if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, &cmd)) + return; + + if (cmd.query_dp_alt.data.is_usb && cmd.query_dp_alt.data.is_dp4 == 0) link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c index 565f12dd179a..5065904c7833 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c @@ -358,8 +358,8 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute( h_width = hw_crtc_timing.h_border_left + hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right; v_height = hw_crtc_timing.v_border_top + hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom; - hsp = hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? 0x80 : 0; - vsp = hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? 0x80 : 0; + hsp = hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? 0 : 0x80; + vsp = hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? 0 : 0x80; v_freq = hw_crtc_timing.pix_clk_100hz * 100; /* MSA Packet Mapping to 32-bit Link Symbols - DP2 spec, section 2.7.4.1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 5dd1ce9ddb53..4206ce5bf9a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -112,7 +112,7 @@ void dcn31_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; - int i, j; + int i; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -192,50 +192,13 @@ void dcn31_init_hw(struct dc *dc) link->link_status.link_active = true; } - /* Power gate DSCs */ - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); - /* Enables outbox notifications for usb4 dpia */ if (dc->res_pool->usb4_dpia_count) dmub_enable_outbox_notification(dc); /* we want to turn off all dp displays before doing detection */ - if (dc->config.power_down_display_on_boot) { - uint8_t dpcd_power_state = '\0'; - enum dc_status status = DC_ERROR_UNEXPECTED; - - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal != SIGNAL_TYPE_DISPLAY_PORT) - continue; - - /* if any of the displays are lit up turn them off */ - status = core_link_read_dpcd(dc->links[i], DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) { - /* blank dp stream before power off receiver*/ - if (dc->links[i]->ep_type == DISPLAY_ENDPOINT_PHY && - dc->links[i]->link_enc->funcs->get_dig_frontend) { - unsigned int fe; - - fe = dc->links[i]->link_enc->funcs->get_dig_frontend( - dc->links[i]->link_enc); - if (fe == ENGINE_ID_UNKNOWN) - continue; - - for (j = 0; j < dc->res_pool->stream_enc_count; j++) { - if (fe == dc->res_pool->stream_enc[j]->id) { - dc->res_pool->stream_enc[j]->funcs->dp_blank(dc->links[i], - dc->res_pool->stream_enc[j]); - break; - } - } - } - dp_receiver_power_ctrl(dc->links[i], false); - } - } - } + if (dc->config.power_down_display_on_boot) + dc_link_blank_all_dp_displays(dc); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -602,7 +565,7 @@ void dcn31_reset_hw_ctx_wrap( dcn31_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); if (hws->funcs.enable_stream_gating) - hws->funcs.enable_stream_gating(dc, pipe_ctx); + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); if (old_clk) old_clk->funcs->cs_power_down(old_clk); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 05335a8c3c2d..7a7a8c5edabd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -31,6 +31,8 @@ #include "dcn301/dcn301_hwseq.h" #include "dcn31/dcn31_hwseq.h" +#include "dcn31_init.h" + static const struct hw_sequencer_funcs dcn31_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn31_init_hw, @@ -101,6 +103,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; @@ -149,4 +152,9 @@ void dcn31_hw_sequencer_construct(struct dc *dc) dc->hwss.init_hw = dcn20_fpga_init_hw; dc->hwseq->funcs.init_pipes = NULL; } + if (dc->debug.disable_z10) { + /*hw not support z10 or sw disable it*/ + dc->hwss.z10_restore = NULL; + dc->hwss.z10_save_init = NULL; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index a4b1d98f0007..e8562fa11366 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -256,6 +256,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = { .get_crc = optc1_get_crc, .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, .set_odm_bypass = optc3_set_odm_bypass, .set_odm_combine = optc31_set_odm_combine, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 3b3721386571..83ece02380a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -65,7 +65,7 @@ static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cnt return cmd.panel_cntl.data.current_backlight; } -uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) +static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) { struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(panel_cntl); struct dc_dmub_srv *dc_dmub_srv = panel_cntl->ctx->dmub_srv; @@ -96,7 +96,7 @@ uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) return cmd.panel_cntl.data.current_backlight; } -void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl) +static void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl) { struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(*panel_cntl); @@ -104,7 +104,7 @@ void dcn31_panel_cntl_destroy(struct panel_cntl **panel_cntl) *panel_cntl = NULL; } -bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) +static bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) { union dmub_rb_cmd cmd; @@ -114,7 +114,7 @@ bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) return cmd.panel_cntl.data.is_backlight_on; } -bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) +static bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) { union dmub_rb_cmd cmd; @@ -124,7 +124,7 @@ bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) return cmd.panel_cntl.data.is_powered_on; } -void dcn31_store_backlight_level(struct panel_cntl *panel_cntl) +static void dcn31_store_backlight_level(struct panel_cntl *panel_cntl) { union dmub_rb_cmd cmd; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 18896294ae12..29ee3c22b0ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -485,7 +485,8 @@ static const struct dcn31_apg_mask apg_mask = { SE_DCN3_REG_LIST(id)\ } -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { +/* Some encoders won't be initialized here - but they're logical, not physical. */ +static const struct dcn10_stream_enc_registers stream_enc_regs[ENGINE_ID_COUNT] = { stream_enc_regs(0), stream_enc_regs(1), stream_enc_regs(2), @@ -968,7 +969,7 @@ static const struct dc_plane_cap plane_cap = { .argb8888 = true, .nv12 = true, .fp16 = true, - .p010 = false, + .p010 = true, .ayuv = false, }, @@ -1023,6 +1024,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, + .apply_vendor_specific_lttpr_wa = true, }; static const struct dc_debug_options debug_defaults_diags = { @@ -1270,7 +1272,7 @@ static struct link_encoder *dcn31_link_enc_create_minimal( return &enc20->enc10.base; } -struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) { struct dcn31_panel_cntl *panel_cntl = kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); @@ -1774,6 +1776,7 @@ static int dcn31_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; + bool upscaled = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -1785,6 +1788,11 @@ static int dcn31_populate_dml_pipes_from_context( pipe = &res_ctx->pipe_ctx[i]; timing = &pipe->stream->timing; + if (pipe->plane_state && + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) + upscaled = true; + /* * Immediate flip can be set dynamically after enabling the plane. * We need to require support for immediate flip or underflow can be @@ -1829,6 +1837,11 @@ static int dcn31_populate_dml_pipes_from_context( context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; pipes[0].pipe.src.unbounded_req_mode = true; } + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; + } else if (context->stream_count >= 3 && upscaled) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; } return pipe_cnt; @@ -2199,6 +2212,8 @@ static bool dcn31_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.dp_hpo = true; + dc->caps.hdmi_frl_pcon_support = true; + dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.is_apu = true; @@ -2237,6 +2252,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 0fe66b080a03..7f94e3f70d7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -59,7 +59,7 @@ void dm_helpers_free_gpu_mem( void *pvMem); enum dc_edid_status dm_helpers_parse_edid_caps( - struct dc_context *ctx, + struct dc_link *link, const struct dc_edid *edid, struct dc_edid_caps *edid_caps); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 46c433c0bcb0..8bc27de4c104 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1711,14 +1711,6 @@ void dml21_rq_dlg_get_dlg_reg( dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); } -void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param) -{ - memset(arb_param, 0, sizeof(*arb_param)); - arb_param->max_req_outstanding = 256; - arb_param->min_req_outstanding = 68; - arb_param->sat_level_us = 60; -} - static void calculate_ttu_cursor( struct display_mode_lib *mode_lib, double *refcyc_per_req_delivery_pre_cur, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 7e937bdcea00..6feb23432f8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -422,62 +422,8 @@ static void CalculateUrgentBurstFactor( static void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, - int MaxInterDCNTileRepeaters, int MaxPrefetchMode, - double FinalDRAMClockChangeLatency, - double SREnterPlusExitTime, - int ReturnBusWidth, - int RoundTripPingLatencyCycles, - int ReorderingBytes, - int PixelChunkSizeInKByte, - int MetaChunkSize, - bool GPUVMEnable, - int GPUVMMaxPageTableLevels, - bool HostVMEnable, - int NumberOfActivePlanes, - double HostVMMinPageSize, - int HostVMMaxNonCachedPageTableLevels, - bool DynamicMetadataVMEnabled, - enum immediate_flip_requirement ImmediateFlipRequirement, - bool ProgressiveToInterlaceUnitInOPP, - double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, - double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, - int VTotal[], - int VActive[], - int DynamicMetadataTransmittedBytes[], - int DynamicMetadataLinesBeforeActiveRequired[], - bool Interlace[], - double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], - double RequiredDISPCLK[][2], - double UrgLatency[], - unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], - double ProjectedDCFCLKDeepSleep[][2], - double MaximumVStartup[][2][DC__NUM_DPP__MAX], - double TotalVActivePixelBandwidth[][2], - double TotalVActiveCursorBandwidth[][2], - double TotalMetaRowBandwidth[][2], - double TotalDPTERowBandwidth[][2], - unsigned int TotalNumberOfActiveDPP[][2], - unsigned int TotalNumberOfDCCActiveDPP[][2], - int dpte_group_bytes[], - double PrefetchLinesY[][2][DC__NUM_DPP__MAX], - double PrefetchLinesC[][2][DC__NUM_DPP__MAX], - int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], - int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], - int BytePerPixelY[], - int BytePerPixelC[], - int HTotal[], - double PixelClock[], - double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], - double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], - double MetaRowBytes[][2][DC__NUM_DPP__MAX], - bool DynamicMetadataEnable[], - double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], - double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - double DCFCLKPerState[], - double DCFCLKState[][2]); + int ReorderingBytes); static void CalculatePixelDeliveryTimes( unsigned int NumberOfActivePlanes, @@ -3949,6 +3895,102 @@ static double TruncToValidBPP( return BPP_INVALID; } +static noinline void CalculatePrefetchSchedulePerPlane( + struct display_mode_lib *mode_lib, + double HostVMInefficiencyFactor, + int i, + unsigned j, + unsigned k) +{ + struct vba_vars_st *v = &mode_lib->vba; + Pipe myPipe; + + myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; + myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; + myPipe.PixelClock = v->PixelClock[k]; + myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; + myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; + myPipe.ScalerEnabled = v->ScalerEnabled[k]; + myPipe.SourceScan = v->SourceScan[k]; + myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; + myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; + myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; + myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; + myPipe.InterlaceEnable = v->Interlace[k]; + myPipe.NumberOfCursors = v->NumberOfCursors[k]; + myPipe.VBlank = v->VTotal[k] - v->VActive[k]; + myPipe.HTotal = v->HTotal[k]; + myPipe.DCCEnable = v->DCCEnable[k]; + myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 + || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; + myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; + myPipe.BytePerPixelY = v->BytePerPixelY[k]; + myPipe.BytePerPixelC = v->BytePerPixelC[k]; + myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; + v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( + mode_lib, + HostVMInefficiencyFactor, + &myPipe, + v->DSCDelayPerState[i][k], + v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, + v->DPPCLKDelaySCL, + v->DPPCLKDelaySCLLBOnly, + v->DPPCLKDelayCNVCCursor, + v->DISPCLKDelaySubtotal, + v->SwathWidthYThisState[k] / v->HRatio[k], + v->OutputFormat[k], + v->MaxInterDCNTileRepeaters, + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), + v->MaximumVStartup[i][j][k], + v->GPUVMMaxPageTableLevels, + v->GPUVMEnable, + v->HostVMEnable, + v->HostVMMaxNonCachedPageTableLevels, + v->HostVMMinPageSize, + v->DynamicMetadataEnable[k], + v->DynamicMetadataVMEnabled, + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->DynamicMetadataTransmittedBytes[k], + v->UrgLatency[i], + v->ExtraLatency, + v->TimeCalc, + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k], + v->PrefetchLinesY[i][j][k], + v->SwathWidthYThisState[k], + v->PrefillY[k], + v->MaxNumSwY[k], + v->PrefetchLinesC[i][j][k], + v->SwathWidthCThisState[k], + v->PrefillC[k], + v->MaxNumSwC[k], + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], + v->TWait, + &v->DSTXAfterScaler[k], + &v->DSTYAfterScaler[k], + &v->LineTimesForPrefetch[k], + &v->PrefetchBW[k], + &v->LinesForMetaPTE[k], + &v->LinesForMetaAndDPTERow[k], + &v->VRatioPreY[i][j][k], + &v->VRatioPreC[i][j][k], + &v->RequiredPrefetchPixelDataBWLuma[i][j][k], + &v->RequiredPrefetchPixelDataBWChroma[i][j][k], + &v->NoTimeForDynamicMetadata[i][j][k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], + &v->dummy7[k], + &v->dummy8[k], + &v->dummy13[k], + &v->VUpdateOffsetPix[k], + &v->VUpdateWidthPix[k], + &v->VReadyOffsetPix[k]); +} + void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; @@ -5079,66 +5121,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - if (v->UseMinimumRequiredDCFCLK == true) { - UseMinimumDCFCLK( - mode_lib, - v->MaxInterDCNTileRepeaters, - MaxPrefetchMode, - v->DRAMClockChangeLatency, - v->SREnterPlusExitTime, - v->ReturnBusWidth, - v->RoundTripPingLatencyCycles, - ReorderingBytes, - v->PixelChunkSizeInKByte, - v->MetaChunkSize, - v->GPUVMEnable, - v->GPUVMMaxPageTableLevels, - v->HostVMEnable, - v->NumberOfActivePlanes, - v->HostVMMinPageSize, - v->HostVMMaxNonCachedPageTableLevels, - v->DynamicMetadataVMEnabled, - v->ImmediateFlipRequirement[0], - v->ProgressiveToInterlaceUnitInOPP, - v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, - v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, - v->VTotal, - v->VActive, - v->DynamicMetadataTransmittedBytes, - v->DynamicMetadataLinesBeforeActiveRequired, - v->Interlace, - v->RequiredDPPCLK, - v->RequiredDISPCLK, - v->UrgLatency, - v->NoOfDPP, - v->ProjectedDCFCLKDeepSleep, - v->MaximumVStartup, - v->TotalVActivePixelBandwidth, - v->TotalVActiveCursorBandwidth, - v->TotalMetaRowBandwidth, - v->TotalDPTERowBandwidth, - v->TotalNumberOfActiveDPP, - v->TotalNumberOfDCCActiveDPP, - v->dpte_group_bytes, - v->PrefetchLinesY, - v->PrefetchLinesC, - v->swath_width_luma_ub_all_states, - v->swath_width_chroma_ub_all_states, - v->BytePerPixelY, - v->BytePerPixelC, - v->HTotal, - v->PixelClock, - v->PDEAndMetaPTEBytesPerFrame, - v->DPTEBytesPerRow, - v->MetaRowBytes, - v->DynamicMetadataEnable, - v->VActivePixelBandwidth, - v->VActiveCursorBandwidth, - v->ReadBandwidthLuma, - v->ReadBandwidthChroma, - v->DCFCLKPerState, - v->DCFCLKState); - } + if (v->UseMinimumRequiredDCFCLK == true) + UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { @@ -5276,92 +5260,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SREnterPlusExitTime); for (k = 0; k < v->NumberOfActivePlanes; k++) { - Pipe myPipe; - - myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; - myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; - myPipe.PixelClock = v->PixelClock[k]; - myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = v->ScalerEnabled[k]; - myPipe.SourceScan = v->SourceScan[k]; - myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; - myPipe.InterlaceEnable = v->Interlace[k]; - myPipe.NumberOfCursors = v->NumberOfCursors[k]; - myPipe.VBlank = v->VTotal[k] - v->VActive[k]; - myPipe.HTotal = v->HTotal[k]; - myPipe.DCCEnable = v->DCCEnable[k]; - myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 - || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; - myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; - v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( - mode_lib, - HostVMInefficiencyFactor, - &myPipe, - v->DSCDelayPerState[i][k], - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, - v->DPPCLKDelaySCL, - v->DPPCLKDelaySCLLBOnly, - v->DPPCLKDelayCNVCCursor, - v->DISPCLKDelaySubtotal, - v->SwathWidthYThisState[k] / v->HRatio[k], - v->OutputFormat[k], - v->MaxInterDCNTileRepeaters, - dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), - v->MaximumVStartup[i][j][k], - v->GPUVMMaxPageTableLevels, - v->GPUVMEnable, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->HostVMMinPageSize, - v->DynamicMetadataEnable[k], - v->DynamicMetadataVMEnabled, - v->DynamicMetadataLinesBeforeActiveRequired[k], - v->DynamicMetadataTransmittedBytes[k], - v->UrgLatency[i], - v->ExtraLatency, - v->TimeCalc, - v->PDEAndMetaPTEBytesPerFrame[i][j][k], - v->MetaRowBytes[i][j][k], - v->DPTEBytesPerRow[i][j][k], - v->PrefetchLinesY[i][j][k], - v->SwathWidthYThisState[k], - v->PrefillY[k], - v->MaxNumSwY[k], - v->PrefetchLinesC[i][j][k], - v->SwathWidthCThisState[k], - v->PrefillC[k], - v->MaxNumSwC[k], - v->swath_width_luma_ub_this_state[k], - v->swath_width_chroma_ub_this_state[k], - v->SwathHeightYThisState[k], - v->SwathHeightCThisState[k], - v->TWait, - &v->DSTXAfterScaler[k], - &v->DSTYAfterScaler[k], - &v->LineTimesForPrefetch[k], - &v->PrefetchBW[k], - &v->LinesForMetaPTE[k], - &v->LinesForMetaAndDPTERow[k], - &v->VRatioPreY[i][j][k], - &v->VRatioPreC[i][j][k], - &v->RequiredPrefetchPixelDataBWLuma[i][j][k], - &v->RequiredPrefetchPixelDataBWChroma[i][j][k], - &v->NoTimeForDynamicMetadata[i][j][k], - &v->Tno_bw[k], - &v->prefetch_vmrow_bw[k], - &v->dummy7[k], - &v->dummy8[k], - &v->dummy13[k], - &v->VUpdateOffsetPix[k], - &v->VUpdateWidthPix[k], - &v->VReadyOffsetPix[k]); + CalculatePrefetchSchedulePerPlane(mode_lib, + HostVMInefficiencyFactor, + i, j, k); } for (k = 0; k < v->NumberOfActivePlanes; k++) { @@ -7249,69 +7150,15 @@ static double CalculateUrgentLatency( static void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, - int MaxInterDCNTileRepeaters, int MaxPrefetchMode, - double FinalDRAMClockChangeLatency, - double SREnterPlusExitTime, - int ReturnBusWidth, - int RoundTripPingLatencyCycles, - int ReorderingBytes, - int PixelChunkSizeInKByte, - int MetaChunkSize, - bool GPUVMEnable, - int GPUVMMaxPageTableLevels, - bool HostVMEnable, - int NumberOfActivePlanes, - double HostVMMinPageSize, - int HostVMMaxNonCachedPageTableLevels, - bool DynamicMetadataVMEnabled, - enum immediate_flip_requirement ImmediateFlipRequirement, - bool ProgressiveToInterlaceUnitInOPP, - double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, - double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, - int VTotal[], - int VActive[], - int DynamicMetadataTransmittedBytes[], - int DynamicMetadataLinesBeforeActiveRequired[], - bool Interlace[], - double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], - double RequiredDISPCLK[][2], - double UrgLatency[], - unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], - double ProjectedDCFCLKDeepSleep[][2], - double MaximumVStartup[][2][DC__NUM_DPP__MAX], - double TotalVActivePixelBandwidth[][2], - double TotalVActiveCursorBandwidth[][2], - double TotalMetaRowBandwidth[][2], - double TotalDPTERowBandwidth[][2], - unsigned int TotalNumberOfActiveDPP[][2], - unsigned int TotalNumberOfDCCActiveDPP[][2], - int dpte_group_bytes[], - double PrefetchLinesY[][2][DC__NUM_DPP__MAX], - double PrefetchLinesC[][2][DC__NUM_DPP__MAX], - int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], - int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], - int BytePerPixelY[], - int BytePerPixelC[], - int HTotal[], - double PixelClock[], - double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], - double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], - double MetaRowBytes[][2][DC__NUM_DPP__MAX], - bool DynamicMetadataEnable[], - double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], - double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - double DCFCLKPerState[], - double DCFCLKState[][2]) + int ReorderingBytes) { struct vba_vars_st *v = &mode_lib->vba; int dummy1, i, j, k; double NormalEfficiency, dummy2, dummy3; double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; - NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; + NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; @@ -7329,61 +7176,61 @@ static void UseMinimumDCFCLK( double MinimumTvmPlus2Tr0; TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] - + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]); + + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); } - for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { - NoOfDPPState[k] = NoOfDPP[i][j][k]; + for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { + NoOfDPPState[k] = v->NoOfDPP[i][j][k]; } - MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime); - NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j]; - DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ? - TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j]; + MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); + NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; + DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? + TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; DCFCLKRequiredForAverageBandwidth = dml_max3( - ProjectedDCFCLKDeepSleep[i][j], - (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth - / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), - (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth); + v->ProjectedDCFCLKDeepSleep[i][j], + (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth + / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), + (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); ExtraLatencyBytes = CalculateExtraLatencyBytes( ReorderingBytes, - TotalNumberOfActiveDPP[i][j], - PixelChunkSizeInKByte, - TotalNumberOfDCCActiveDPP[i][j], - MetaChunkSize, - GPUVMEnable, - HostVMEnable, - NumberOfActivePlanes, + v->TotalNumberOfActiveDPP[i][j], + v->PixelChunkSizeInKByte, + v->TotalNumberOfDCCActiveDPP[i][j], + v->MetaChunkSize, + v->GPUVMEnable, + v->HostVMEnable, + v->NumberOfActivePlanes, NoOfDPPState, - dpte_group_bytes, + v->dpte_group_bytes, 1, - HostVMMinPageSize, - HostVMMaxNonCachedPageTableLevels); - ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; - for (k = 0; k < NumberOfActivePlanes; ++k) { + v->HostVMMinPageSize, + v->HostVMMaxNonCachedPageTableLevels); + ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { double DCFCLKCyclesRequiredInPrefetch; double ExpectedPrefetchBWAcceleration; double PrefetchTime; - PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] - + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth; + PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] + + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] - + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) - + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth - + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; - PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k]; - ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) - / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth + + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; + PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; + ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) + / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); DynamicMetadataVMExtraLatency[k] = - (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? - UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; - PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - - UrgLatency[i] - * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2) - * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) + (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? + v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; + PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait + - v->UrgLatency[i] + * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) + * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k]; if (PrefetchTime > 0) { @@ -7392,14 +7239,14 @@ static void UseMinimumDCFCLK( / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; - if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) { + if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] - + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth; + + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; } } else { - DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; + DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; } - if (DynamicMetadataEnable[k] == true) { + if (v->DynamicMetadataEnable[k] == true) { double TSetupPipe; double TdmbfPipe; double TdmsksPipe; @@ -7407,17 +7254,17 @@ static void UseMinimumDCFCLK( double AllowedTimeForUrgentExtraLatency; CalculateVupdateAndDynamicMetadataParameters( - MaxInterDCNTileRepeaters, - RequiredDPPCLK[i][j][k], - RequiredDISPCLK[i][j], - ProjectedDCFCLKDeepSleep[i][j], - PixelClock[k], - HTotal[k], - VTotal[k] - VActive[k], - DynamicMetadataTransmittedBytes[k], - DynamicMetadataLinesBeforeActiveRequired[k], - Interlace[k], - ProgressiveToInterlaceUnitInOPP, + v->MaxInterDCNTileRepeaters, + v->RequiredDPPCLK[i][j][k], + v->RequiredDISPCLK[i][j], + v->ProjectedDCFCLKDeepSleep[i][j], + v->PixelClock[k], + v->HTotal[k], + v->VTotal[k] - v->VActive[k], + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], + v->Interlace[k], + v->ProgressiveToInterlaceUnitInOPP, &TSetupPipe, &TdmbfPipe, &TdmecPipe, @@ -7425,31 +7272,31 @@ static void UseMinimumDCFCLK( &dummy1, &dummy2, &dummy3); - AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe + AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; if (AllowedTimeForUrgentExtraLatency > 0) { DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( DCFCLKRequiredForPeakBandwidthPerPlane[k], ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); } else { - DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; + DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; } } } DCFCLKRequiredForPeakBandwidth = 0; - for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { + for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; } - MinimumTvmPlus2Tr0 = UrgLatency[i] - * (GPUVMEnable == true ? - (HostVMEnable == true ? - (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : + MinimumTvmPlus2Tr0 = v->UrgLatency[i] + * (v->GPUVMEnable == true ? + (v->HostVMEnable == true ? + (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0); - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { double MaximumTvmPlus2Tr0PlusTsw; - MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; + MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { - DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; + DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; } else { DCFCLKRequiredForPeakBandwidth = dml_max3( DCFCLKRequiredForPeakBandwidth, @@ -7457,7 +7304,7 @@ static void UseMinimumDCFCLK( (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); } } - DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); + v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 6905ef1e75a6..d76251fd1566 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -73,6 +73,7 @@ struct display_mode_lib { struct vba_vars_st vba; struct dal_logger *logger; struct dml_funcs funcs; + struct _vcs_dpi_display_e2e_pipe_params_st dml_pipe_state[6]; }; void dml_init_instance(struct display_mode_lib *lib, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c new file mode 100644 index 000000000000..91810aaee5a3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c @@ -0,0 +1,1889 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml_wrapper.h" +#include "resource.h" +#include "core_types.h" +#include "dsc.h" +#include "clk_mgr.h" + +#ifndef DC_LOGGER_INIT +#define DC_LOGGER_INIT +#undef DC_LOG_WARNING +#define DC_LOG_WARNING +#endif + +#define DML_WRAPPER_TRANSLATION_ +#include "dml_wrapper_translation.c" +#undef DML_WRAPPER_TRANSLATION_ + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +static void build_clamping_params(struct dc_stream_state *stream) +{ + stream->clamping.clamping_level = CLAMPING_FULL_RANGE; + stream->clamping.c_depth = stream->timing.display_color_depth; + stream->clamping.pixel_encoding = stream->timing.pixel_encoding; +} + +static void get_pixel_clock_parameters( + const struct pipe_ctx *pipe_ctx, + struct pixel_clk_params *pixel_clk_params) +{ + const struct dc_stream_state *stream = pipe_ctx->stream; + + /*TODO: is this halved for YCbCr 420? in that case we might want to move + * the pixel clock normalization for hdmi up to here instead of doing it + * in pll_adjust_pix_clk + */ + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + pixel_clk_params->encoder_object_id = stream->link->link_enc->id; + pixel_clk_params->signal_type = pipe_ctx->stream->signal; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; + /* TODO: un-hardcode*/ + pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * + LINK_RATE_REF_FREQ_IN_KHZ; + pixel_clk_params->flags.ENABLE_SS = 0; + pixel_clk_params->color_depth = + stream->timing.display_color_depth; + pixel_clk_params->flags.DISPLAY_BLANKED = 1; + pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding == + PIXEL_ENCODING_YCBCR420); + pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { + pixel_clk_params->color_depth = COLOR_DEPTH_888; + } + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; + } + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pixel_clk_params->requested_pix_clk_100hz *= 2; + +} + +static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); + + if (pipe_ctx->clock_source) + pipe_ctx->clock_source->funcs->get_pix_clk_dividers( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); + + pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; + + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + build_clamping_params(pipe_ctx->stream); + + return DC_OK; +} + +static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, + struct bit_depth_reduction_params *fmt_bit_depth) +{ + enum dc_dither_option option = stream->dither_option; + enum dc_pixel_encoding pixel_encoding = + stream->timing.pixel_encoding; + + memset(fmt_bit_depth, 0, sizeof(*fmt_bit_depth)); + + if (option == DITHER_OPTION_DEFAULT) { + switch (stream->timing.display_color_depth) { + case COLOR_DEPTH_666: + option = DITHER_OPTION_SPATIAL6; + break; + case COLOR_DEPTH_888: + option = DITHER_OPTION_SPATIAL8; + break; + case COLOR_DEPTH_101010: + option = DITHER_OPTION_SPATIAL10; + break; + default: + option = DITHER_OPTION_DISABLE; + } + } + + if (option == DITHER_OPTION_DISABLE) + return; + + if (option == DITHER_OPTION_TRUN6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 0; + } else if (option == DITHER_OPTION_TRUN8 || + option == DITHER_OPTION_TRUN8_SPATIAL6 || + option == DITHER_OPTION_TRUN8_FM6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 1; + } else if (option == DITHER_OPTION_TRUN10 || + option == DITHER_OPTION_TRUN10_SPATIAL6 || + option == DITHER_OPTION_TRUN10_SPATIAL8 || + option == DITHER_OPTION_TRUN10_FM8 || + option == DITHER_OPTION_TRUN10_FM6 || + option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; + } + + /* special case - Formatter can only reduce by 4 bits at most. + * When reducing from 12 to 6 bits, + * HW recommends we use trunc with round mode + * (if we did nothing, trunc to 10 bits would be used) + * note that any 12->10 bit reduction is ignored prior to DCE8, + * as the input was 10 bits. + */ + if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL6 || + option == DITHER_OPTION_FM6) { + fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; + fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; + fmt_bit_depth->flags.TRUNCATE_MODE = 1; + } + + /* spatial dither + * note that spatial modes 1-3 are never used + */ + if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL6 || + option == DITHER_OPTION_TRUN10_SPATIAL6 || + option == DITHER_OPTION_TRUN8_SPATIAL6) { + fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; + fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 0; + fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; + fmt_bit_depth->flags.RGB_RANDOM = + (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; + } else if (option == DITHER_OPTION_SPATIAL8_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL8 || + option == DITHER_OPTION_SPATIAL8_FM6 || + option == DITHER_OPTION_TRUN10_SPATIAL8 || + option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { + fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; + fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 1; + fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; + fmt_bit_depth->flags.RGB_RANDOM = + (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; + } else if (option == DITHER_OPTION_SPATIAL10_FRAME_RANDOM || + option == DITHER_OPTION_SPATIAL10 || + option == DITHER_OPTION_SPATIAL10_FM8 || + option == DITHER_OPTION_SPATIAL10_FM6) { + fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; + fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 2; + fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; + fmt_bit_depth->flags.RGB_RANDOM = + (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; + } + + if (option == DITHER_OPTION_SPATIAL6 || + option == DITHER_OPTION_SPATIAL8 || + option == DITHER_OPTION_SPATIAL10) { + fmt_bit_depth->flags.FRAME_RANDOM = 0; + } else { + fmt_bit_depth->flags.FRAME_RANDOM = 1; + } + + ////////////////////// + //// temporal dither + ////////////////////// + if (option == DITHER_OPTION_FM6 || + option == DITHER_OPTION_SPATIAL8_FM6 || + option == DITHER_OPTION_SPATIAL10_FM6 || + option == DITHER_OPTION_TRUN10_FM6 || + option == DITHER_OPTION_TRUN8_FM6 || + option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { + fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; + fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 0; + } else if (option == DITHER_OPTION_FM8 || + option == DITHER_OPTION_SPATIAL10_FM8 || + option == DITHER_OPTION_TRUN10_FM8) { + fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; + fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 1; + } else if (option == DITHER_OPTION_FM10) { + fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; + fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 2; + } + + fmt_bit_depth->pixel_encoding = pixel_encoding; +} + +bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx) +{ + int i; + + /* Validate DSC config, dsc count validation is already done */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dsc_config dsc_cfg; + struct pipe_ctx *odm_pipe; + int opp_cnt = 1; + + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + opp_cnt++; + + /* Only need to validate top pipe */ + if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC) + continue; + + dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + + stream->timing.h_border_right) / opp_cnt; + dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + + stream->timing.v_border_bottom; + dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; + dsc_cfg.color_depth = stream->timing.display_color_depth; + dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; + dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; + dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + + if (pipe_ctx->stream_res.dsc && !pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) + return false; + } + return true; +} + +enum dc_status dml_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream) +{ + enum dc_status status = DC_OK; + struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + + status = build_pipe_hw_param(pipe_ctx); + + return status; +} + +void dml_acquire_dsc(const struct dc *dc, + struct resource_context *res_ctx, + struct display_stream_compressor **dsc, + int pipe_idx) +{ + int i; + const struct resource_pool *pool = dc->res_pool; + struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc; + + ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */ + *dsc = NULL; + + /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */ + if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { + *dsc = pool->dscs[pipe_idx]; + res_ctx->is_dsc_acquired[pipe_idx] = true; + return; + } + + /* Return old DSC to avoid the need for redo it */ + if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) { + *dsc = dsc_old; + res_ctx->is_dsc_acquired[dsc_old->inst] = true; + return ; + } + + /* Find first free DSC */ + for (i = 0; i < pool->res_cap->num_dsc; i++) + if (!res_ctx->is_dsc_acquired[i]) { + *dsc = pool->dscs[i]; + res_ctx->is_dsc_acquired[i] = true; + break; + } +} + +static bool dml_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + ASSERT(sec_pipe->top_pipe == NULL); + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dml_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +static struct pipe_ctx *dml_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +static void dml_release_dsc(struct resource_context *res_ctx, + const struct resource_pool *pool, + struct display_stream_compressor **dsc) +{ + int i; + + for (i = 0; i < pool->res_cap->num_dsc; i++) + if (pool->dscs[i] == *dsc) { + res_ctx->is_dsc_acquired[i] = false; + *dsc = NULL; + break; + } +} + +static int dml_get_num_mpc_splits(struct pipe_ctx *pipe) +{ + int mpc_split_count = 0; + struct pipe_ctx *other_pipe = pipe->bottom_pipe; + + while (other_pipe && other_pipe->plane_state == pipe->plane_state) { + mpc_split_count++; + other_pipe = other_pipe->bottom_pipe; + } + other_pipe = pipe->top_pipe; + while (other_pipe && other_pipe->plane_state == pipe->plane_state) { + mpc_split_count++; + other_pipe = other_pipe->top_pipe; + } + + return mpc_split_count; +} + +static bool dml_enough_pipes_for_subvp(struct dc *dc, + struct dc_state *context) +{ + int i = 0; + int num_pipes = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->plane_state) + num_pipes++; + } + + // Sub-VP only possible if the number of "real" pipes is + // less than or equal to half the number of available pipes + if (num_pipes * 2 > dc->res_pool->pipe_count) + return false; + + return true; +} + +static int dml_validate_apply_pipe_split_flags( + struct dc *dc, + struct dc_state *context, + int vlevel, + int *split, + bool *merge) +{ + int i, pipe_idx, vlevel_split; + int plane_count = 0; + bool force_split = false; + bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID; + struct vba_vars_st *v = &context->bw_ctx.dml.vba; + int max_mpc_comb = v->maxMpcComb; + + if (context->stream_count > 1) { + if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP) + avoid_split = true; + } else if (dc->debug.force_single_disp_pipe_split) + force_split = true; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /** + * Workaround for avoiding pipe-split in cases where we'd split + * planes that are too small, resulting in splits that aren't + * valid for the scaler. + */ + if (pipe->plane_state && + (pipe->plane_state->dst_rect.width <= 16 || + pipe->plane_state->dst_rect.height <= 16 || + pipe->plane_state->src_rect.width <= 16 || + pipe->plane_state->src_rect.height <= 16)) + avoid_split = true; + + /* TODO: fix dc bugs and remove this split threshold thing */ + if (pipe->stream && !pipe->prev_odm_pipe && + (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) + ++plane_count; + } + if (plane_count > dc->res_pool->pipe_count / 2) + avoid_split = true; + + /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct dc_crtc_timing timing; + + if (!pipe->stream) + continue; + else { + timing = pipe->stream->timing; + if (timing.h_border_left + timing.h_border_right + + timing.v_border_top + timing.v_border_bottom > 0) { + avoid_split = true; + break; + } + } + } + + /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ + if (avoid_split) { + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) + if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 && + v->ModeSupport[vlevel][0]) + break; + /* Impossible to not split this pipe */ + if (vlevel > context->bw_ctx.dml.soc.num_states) + vlevel = vlevel_split; + else + max_mpc_comb = 0; + pipe_idx++; + } + v->maxMpcComb = max_mpc_comb; + } + + /* Split loop sets which pipe should be split based on dml outputs and dc flags */ + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + int pipe_plane = v->pipe_plane[pipe_idx]; + bool split4mpc = context->stream_count == 1 && plane_count == 1 + && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4; + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) + split[i] = 4; + else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) + split[i] = 2; + + if ((pipe->stream->view_format == + VIEW_3D_FORMAT_SIDE_BY_SIDE || + pipe->stream->view_format == + VIEW_3D_FORMAT_TOP_AND_BOTTOM) && + (pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_TOP_AND_BOTTOM || + pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_SIDE_BY_SIDE)) + split[i] = 2; + if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { + split[i] = 2; + v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; + } + if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) { + split[i] = 4; + v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1; + } + /*420 format workaround*/ + if (pipe->stream->timing.h_addressable > 7680 && + pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + split[i] = 4; + } + + v->ODMCombineEnabled[pipe_plane] = + v->ODMCombineEnablePerState[vlevel][pipe_plane]; + + if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { + if (dml_get_num_mpc_splits(pipe) == 1) { + /*If need split for mpc but 2 way split already*/ + if (split[i] == 4) + split[i] = 2; /* 2 -> 4 MPC */ + else if (split[i] == 2) + split[i] = 0; /* 2 -> 2 MPC */ + else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) + merge[i] = true; /* 2 -> 1 MPC */ + } else if (dml_get_num_mpc_splits(pipe) == 3) { + /*If need split for mpc but 4 way split already*/ + if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe) + || !pipe->bottom_pipe)) { + merge[i] = true; /* 4 -> 2 MPC */ + } else if (split[i] == 0 && pipe->top_pipe && + pipe->top_pipe->plane_state == pipe->plane_state) + merge[i] = true; /* 4 -> 1 MPC */ + split[i] = 0; + } else if (dml_get_num_mpc_splits(pipe)) { + /* ODM -> MPC transition */ + if (pipe->prev_odm_pipe) { + split[i] = 0; + merge[i] = true; + } + } + } else { + if (dml_get_num_mpc_splits(pipe) == 1) { + /*If need split for odm but 2 way split already*/ + if (split[i] == 4) + split[i] = 2; /* 2 -> 4 ODM */ + else if (split[i] == 2) + split[i] = 0; /* 2 -> 2 ODM */ + else if (pipe->prev_odm_pipe) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* exit ODM */ + } + } else if (dml_get_num_mpc_splits(pipe) == 3) { + /*If need split for odm but 4 way split already*/ + if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) + || !pipe->next_odm_pipe)) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* 4 -> 2 ODM */ + } else if (split[i] == 0 && pipe->prev_odm_pipe) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* exit ODM */ + } + split[i] = 0; + } else if (dml_get_num_mpc_splits(pipe)) { + /* MPC -> ODM transition */ + ASSERT(0); /* NOT expected yet */ + if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + split[i] = 0; + merge[i] = true; + } + } + } + + /* Adjust dppclk when split is forced, do not bother with dispclk */ + if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1) + v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; + pipe_idx++; + } + + return vlevel; +} + +static void dml_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream) +{ + // phantom_vactive = blackout (latency + margin) + fw_processing_delays + pstate allow width + uint32_t phantom_vactive_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us + 60 + + dc->caps.subvp_fw_processing_delay_us + + dc->caps.subvp_pstate_allow_width_us; + uint32_t phantom_vactive = ((double)phantom_vactive_us/1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + uint32_t phantom_bp = ref_pipe->pipe_dlg_param.vstartup_start; + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + phantom_stream->timing.v_front_porch = 1; + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; +} + +static struct dc_stream_state *dml_enable_phantom_stream(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe) +{ + struct dc_stream_state *phantom_stream = NULL; + + phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); + phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; + phantom_stream->dpms_off = true; + phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; + phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; + ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; + ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + + /* stream has limited viewport and small timing */ + memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + dml_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream); + + dc_add_stream_to_ctx(dc, context, phantom_stream); + dc->hwss.apply_ctx_to_hw(dc, context); + return phantom_stream; +} + +static void dml_enable_phantom_plane(struct dc *dc, + struct dc_state *context, + struct dc_stream_state *phantom_stream, + struct pipe_ctx *main_pipe) +{ + struct dc_plane_state *phantom_plane = NULL; + struct dc_plane_state *prev_phantom_plane = NULL; + struct pipe_ctx *curr_pipe = main_pipe; + + while (curr_pipe) { + if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) + phantom_plane = prev_phantom_plane; + else + phantom_plane = dc_create_plane_state(dc); + + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); + /* Currently compat_level is undefined in dc_state + * phantom_plane->compat_level = curr_pipe->plane_state->compat_level; + */ + phantom_plane->format = curr_pipe->plane_state->format; + phantom_plane->rotation = curr_pipe->plane_state->rotation; + phantom_plane->visible = curr_pipe->plane_state->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; + + dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); + + curr_pipe = curr_pipe->bottom_pipe; + prev_phantom_plane = phantom_plane; + } +} + +static void dml_add_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct dc_stream_state *ref_stream = pipe->stream; + // Only construct phantom stream for top pipes that have plane enabled + if (!pipe->top_pipe && pipe->plane_state && pipe->stream && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + struct dc_stream_state *phantom_stream = NULL; + + phantom_stream = dml_enable_phantom_stream(dc, context, pipe); + dml_enable_phantom_plane(dc, context, phantom_stream, pipe); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && pipe->stream && + pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipe->stream->use_dynamic_meta = false; + pipe->plane_state->flip_immediate = false; + if (!resource_build_scaling_params(pipe)) { + // Log / remove phantom pipes since failed to build scaling params + } + } + } +} + +static void dml_remove_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i; + bool removed_pipe = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + // build scaling params for phantom pipes + if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_rem_all_planes_for_stream(dc, pipe->stream, context); + dc_remove_stream_from_ctx(dc, context, pipe->stream); + removed_pipe = true; + } + + // Clear all phantom stream info + if (pipe->stream) { + pipe->stream->mall_stream_config.type = SUBVP_NONE; + pipe->stream->mall_stream_config.paired_stream = NULL; + } + } + if (removed_pipe) + dc->hwss.apply_ctx_to_hw(dc, context); +} + +/* + * If the input state contains no upstream planes for a particular pipe (i.e. only timing) + * we need to populate some "conservative" plane information as DML cannot handle "no planes" + */ +static void populate_default_plane_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_params_st *pipe) +{ + pipe->src.is_hsplit = pipe->dest.odm_combine != dm_odm_combine_mode_disabled; + pipe->src.source_scan = dm_horz; + pipe->src.sw_mode = dm_sw_4kb_s; + pipe->src.macro_tile_size = dm_64k_tile; + pipe->src.viewport_width = timing->h_addressable; + if (pipe->src.viewport_width > 1920) + pipe->src.viewport_width = 1920; + pipe->src.viewport_height = timing->v_addressable; + if (pipe->src.viewport_height > 1080) + pipe->src.viewport_height = 1080; + pipe->src.surface_height_y = pipe->src.viewport_height; + pipe->src.surface_width_y = pipe->src.viewport_width; + pipe->src.surface_height_c = pipe->src.viewport_height; + pipe->src.surface_width_c = pipe->src.viewport_width; + pipe->src.data_pitch = ((pipe->src.viewport_width + 255) / 256) * 256; + pipe->src.source_format = dm_444_32; + pipe->dest.recout_width = pipe->src.viewport_width; + pipe->dest.recout_height = pipe->src.viewport_height; + pipe->dest.full_recout_width = pipe->dest.recout_width; + pipe->dest.full_recout_height = pipe->dest.recout_height; + pipe->scale_ratio_depth.lb_depth = dm_lb_16; + pipe->scale_ratio_depth.hscl_ratio = 1.0; + pipe->scale_ratio_depth.vscl_ratio = 1.0; + pipe->scale_ratio_depth.scl_enable = 0; + pipe->scale_taps.htaps = 1; + pipe->scale_taps.vtaps = 1; + pipe->dest.vtotal_min = timing->v_total; + pipe->dest.vtotal_max = timing->v_total; + + if (pipe->dest.odm_combine == dm_odm_combine_mode_2to1) { + pipe->src.viewport_width /= 2; + pipe->dest.recout_width /= 2; + } else if (pipe->dest.odm_combine == dm_odm_combine_mode_4to1) { + pipe->src.viewport_width /= 4; + pipe->dest.recout_width /= 4; + } + + pipe->src.dcc = false; + pipe->src.dcc_rate = 1; +} + +/* + * If the pipe is not blending (i.e. pipe_ctx->top pipe == null) then its + * hsplit group is equal to its own pipe ID + * Otherwise, all pipes part of the same blending tree have the same hsplit group + * ID as the top most pipe + * + * If the pipe ctx is ODM combined, then similar logic follows + */ +static void populate_hsplit_group_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) +{ + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; + + if (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state + == dc_pipe_ctx->plane_state) { + struct pipe_ctx *first_pipe = dc_pipe_ctx->top_pipe; + int split_idx = 0; + + while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state + == dc_pipe_ctx->plane_state) { + first_pipe = first_pipe->top_pipe; + split_idx++; + } + + /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */ + if (split_idx == 0) + e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; + else if (split_idx == 1) + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; + else if (split_idx == 2) + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->top_pipe->pipe_idx; + + } else if (dc_pipe_ctx->prev_odm_pipe) { + struct pipe_ctx *first_pipe = dc_pipe_ctx->prev_odm_pipe; + + while (first_pipe->prev_odm_pipe) + first_pipe = first_pipe->prev_odm_pipe; + e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; + } +} + +static void populate_dml_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe, int always_scale) +{ + const struct dc_plane_state *pln = dc_pipe_ctx->plane_state; + const struct scaler_data *scl = &dc_pipe_ctx->plane_res.scl_data; + + e2e_pipe->pipe.src.immediate_flip = pln->flip_immediate; + e2e_pipe->pipe.src.is_hsplit = (dc_pipe_ctx->bottom_pipe && dc_pipe_ctx->bottom_pipe->plane_state == pln) + || (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state == pln) + || e2e_pipe->pipe.dest.odm_combine != dm_odm_combine_mode_disabled; + + /* stereo is not split */ + if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE || + pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) { + e2e_pipe->pipe.src.is_hsplit = false; + e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; + } + + e2e_pipe->pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90 + || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz; + e2e_pipe->pipe.src.viewport_y_y = scl->viewport.y; + e2e_pipe->pipe.src.viewport_y_c = scl->viewport_c.y; + e2e_pipe->pipe.src.viewport_width = scl->viewport.width; + e2e_pipe->pipe.src.viewport_width_c = scl->viewport_c.width; + e2e_pipe->pipe.src.viewport_height = scl->viewport.height; + e2e_pipe->pipe.src.viewport_height_c = scl->viewport_c.height; + e2e_pipe->pipe.src.viewport_width_max = pln->src_rect.width; + e2e_pipe->pipe.src.viewport_height_max = pln->src_rect.height; + e2e_pipe->pipe.src.surface_width_y = pln->plane_size.surface_size.width; + e2e_pipe->pipe.src.surface_height_y = pln->plane_size.surface_size.height; + e2e_pipe->pipe.src.surface_width_c = pln->plane_size.chroma_size.width; + e2e_pipe->pipe.src.surface_height_c = pln->plane_size.chroma_size.height; + + if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA + || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; + e2e_pipe->pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; + e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; + e2e_pipe->pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c; + } else { + e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; + e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; + } + e2e_pipe->pipe.src.dcc = pln->dcc.enable; + e2e_pipe->pipe.src.dcc_rate = 1; + e2e_pipe->pipe.dest.recout_width = scl->recout.width; + e2e_pipe->pipe.dest.recout_height = scl->recout.height; + e2e_pipe->pipe.dest.full_recout_height = scl->recout.height; + e2e_pipe->pipe.dest.full_recout_width = scl->recout.width; + if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_2to1) + e2e_pipe->pipe.dest.full_recout_width *= 2; + else if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_4to1) + e2e_pipe->pipe.dest.full_recout_width *= 4; + else { + struct pipe_ctx *split_pipe = dc_pipe_ctx->bottom_pipe; + + while (split_pipe && split_pipe->plane_state == pln) { + e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; + split_pipe = split_pipe->bottom_pipe; + } + split_pipe = dc_pipe_ctx->top_pipe; + while (split_pipe && split_pipe->plane_state == pln) { + e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; + split_pipe = split_pipe->top_pipe; + } + } + + e2e_pipe->pipe.scale_ratio_depth.lb_depth = dm_lb_16; + e2e_pipe->pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32); + e2e_pipe->pipe.scale_ratio_depth.scl_enable = + scl->ratios.vert.value != dc_fixpt_one.value + || scl->ratios.horz.value != dc_fixpt_one.value + || scl->ratios.vert_c.value != dc_fixpt_one.value + || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/ + || always_scale; /*support always scale*/ + e2e_pipe->pipe.scale_taps.htaps = scl->taps.h_taps; + e2e_pipe->pipe.scale_taps.htaps_c = scl->taps.h_taps_c; + e2e_pipe->pipe.scale_taps.vtaps = scl->taps.v_taps; + e2e_pipe->pipe.scale_taps.vtaps_c = scl->taps.v_taps_c; + + /* Currently compat_level is not defined. Commenting it until further resolution + * if (pln->compat_level == DC_LEGACY_TILING_ADDR_GEN_TWO) { + swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle, + &e2e_pipe->pipe.src.sw_mode); + e2e_pipe->pipe.src.macro_tile_size = + swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle); + } else { + gfx10array_mode_to_dml_params(pln->tiling_info.gfx10compatible.array_mode, + pln->compat_level, + &e2e_pipe->pipe.src.sw_mode); + e2e_pipe->pipe.src.macro_tile_size = dm_4k_tile; + }*/ + + e2e_pipe->pipe.src.source_format = dc_source_format_to_dml_source_format(pln->format); +} + +static void populate_dml_cursor_parameters_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) +{ + /* + * For graphic plane, cursor number is 1, nv12 is 0 + * bw calculations due to cursor on/off + */ + if (dc_pipe_ctx->plane_state && + (dc_pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || + dc_pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)) + e2e_pipe->pipe.src.num_cursors = 0; + else + e2e_pipe->pipe.src.num_cursors = 1; + + e2e_pipe->pipe.src.cur0_src_width = 256; + e2e_pipe->pipe.src.cur0_bpp = dm_cur_32bit; +} + +static int populate_dml_pipes_from_context_base( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int pipe_cnt, i; + bool synchronized_vblank = true; + struct resource_context *res_ctx = &context->res_ctx; + + for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + + if (pipe_cnt < 0) { + pipe_cnt = i; + continue; + } + + if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream) + continue; + + if (dc->debug.disable_timing_sync || + (!resource_are_streams_timing_synchronizable( + res_ctx->pipe_ctx[pipe_cnt].stream, + res_ctx->pipe_ctx[i].stream) && + !resource_are_vblanks_synchronizable( + res_ctx->pipe_ctx[pipe_cnt].stream, + res_ctx->pipe_ctx[i].stream))) { + synchronized_vblank = false; + break; + } + } + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; + + struct audio_check aud_check = {0}; + if (!res_ctx->pipe_ctx[i].stream) + continue; + + /* todo: + pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; + pipes[pipe_cnt].pipe.src.dcc = 0; + pipes[pipe_cnt].pipe.src.vm = 0;*/ + + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; + /* todo: rotation?*/ + pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; + if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) { + pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true; + /* 1/2 vblank */ + pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active = + (timing->v_total - timing->v_addressable + - timing->v_border_top - timing->v_border_bottom) / 2; + /* 36 bytes dp, 32 hdmi */ + pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes = + dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32; + } + pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; + + dc_timing_to_dml_timing(timing, &pipes[pipe_cnt].pipe.dest); + pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; + pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; + + pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst; + + pipes[pipe_cnt].pipe.dest.odm_combine = get_dml_odm_combine(&res_ctx->pipe_ctx[i]); + + populate_hsplit_group_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); + + pipes[pipe_cnt].dout.dp_lanes = 4; + pipes[pipe_cnt].dout.is_virtual = 0; + pipes[pipe_cnt].dout.output_type = get_dml_output_type(res_ctx->pipe_ctx[i].stream->signal); + if (pipes[pipe_cnt].dout.output_type < 0) { + pipes[pipe_cnt].dout.output_type = dm_dp; + pipes[pipe_cnt].dout.is_virtual = 1; + } + + populate_color_depth_and_encoding_from_timing(&res_ctx->pipe_ctx[i].stream->timing, &pipes[pipe_cnt].dout); + + if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC) + pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0; + + /* todo: default max for now, until there is logic reflecting this in dc*/ + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + /*fill up the audio sample rate (unit in kHz)*/ + get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check); + pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000; + + populate_dml_cursor_parameters_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); + + if (!res_ctx->pipe_ctx[i].plane_state) { + populate_default_plane_from_timing(timing, &pipes[pipe_cnt].pipe); + } else { + populate_dml_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt], dc->debug.always_scale); + } + + pipe_cnt++; + } + + /* populate writeback information */ + if (dc->res_pool) + dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); + + return pipe_cnt; +} + +static int dml_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + + populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + pipes[pipe_cnt].pipe.src.gpuvm = true; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + pipe_cnt++; + } + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + } else if (!is_dual_plane(pipe->plane_state->format)) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } + + return pipe_cnt; +} + +static void dml_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh_and_mclk_switch; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) + *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + /* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. !fast_validate) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ + if (!dc->debug.force_disable_subvp && + dml_enough_pipes_for_subvp(dc, context) && + *vlevel < context->bw_ctx.dml.soc.num_states && + (vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || + dc->debug.force_subvp_mclk_switch)) { + + dml_add_phantom_pipes(dc, context); + + /* Create input to DML based on new context which includes phantom pipes + * TODO: Input to DML should mark which pipes are phantom + */ + *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + if (*vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, MAX_PIPES * sizeof(*split)); + memset(merge, 0, MAX_PIPES * sizeof(*merge)); + *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + } + + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) + // remove phantom pipes and repopulate dml pipes + if (*vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + dml_remove_phantom_pipes(dc, context); + *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); + } + } +} + +static void dcn20_adjust_adaptive_sync_v_startup( + const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start) +{ + struct dc_crtc_timing patched_crtc_timing; + uint32_t asic_blank_end = 0; + uint32_t asic_blank_start = 0; + uint32_t newVstartup = 0; + + patched_crtc_timing = *dc_crtc_timing; + + if (patched_crtc_timing.flags.INTERLACE == 1) { + if (patched_crtc_timing.v_front_porch < 2) + patched_crtc_timing.v_front_porch = 2; + } else { + if (patched_crtc_timing.v_front_porch < 1) + patched_crtc_timing.v_front_porch = 1; + } + + /* blank_start = frame end - front porch */ + asic_blank_start = patched_crtc_timing.v_total - + patched_crtc_timing.v_front_porch; + + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; + + newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); + + *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); +} + +static bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx) +{ + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->stream->link->hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; +#if defined (CONFIG_DRM_AMD_DC_DP2_0) + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; +#endif + } + return false; +} + +static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + } +} + +static bool dml_internal_validate( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx, vlevel; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + ASSERT(pipes); + if (!pipes) + return false; + + // For each full update, remove all existing phantom pipes first + dml_remove_phantom_pipes(dc, context); + + dml_update_soc_for_wm_a(dc, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + // On initial pass through DML, we intend to use MALL for SS on all + // (non-PSR) surfaces with none using MALL for P-State + // 'mall_plane_config': is not a member of 'dc_plane_state' - commenting it out till mall_plane_config gets supported in dc_plant_state + //if (pipe->stream && pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) + // pipe->plane_state->mall_plane_config.use_mall_for_ss = true; + } + } + pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate) { + dml_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + } + + if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * We don't actually support prefetch mode 2, so require that we + * at least support prefetch mode 1. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dml_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + /* We only support full screen mpo with ODM */ + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_res.scl_data.recout, + &pipe->plane_res.scl_data.recout, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dml_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dml_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dml_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dml_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dml_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dml_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dml_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dml_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dml_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) + pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + +static void dml_calculate_dlg_params( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx; + int plane_count; + + /* Writeback MCIF_WB arbitration parameters */ + if (dc->res_pool) + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + /* 'z9_support': is not a member of 'dc_clocks' - Commenting out till we have this support in dc_clocks + * context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? + DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; + */ + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* Commented out as per above error for now. + if (plane_count == 0) + context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; + */ + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + /* TODO : Uncomment the below line and make changes + * as per DML nomenclature once it is available. + * context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = context->bw_ctx.dml.vba.fclk_pstate_support; + */ + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes; + context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; + } + + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = + pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000; + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes + - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2; + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, + &context->res_ctx.pipe_ctx[i].ttu_regs, + pipes, + pipe_cnt, + pipe_idx, + cstate_en, + context->bw_ctx.bw.dcn.clk.p_state_change_support, + false, false, true); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].rq_regs, + &pipes[pipe_idx].pipe); + pipe_idx++; + } +} + +static void dml_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, vlevel_temp = 0; + + double dcfclk = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; + double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; + + /* Set B: + * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, + * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark + * calculations to cover bootup clocks. + * DCFCLK: soc.clock_limits[2] when available + * UCLK: soc.clock_limits[2] when available + */ + if (context->bw_ctx.dml.soc.num_states > 2) { + vlevel_temp = 2; + dcfclk = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 4; + context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8; + + /* Set D: + * All clocks min. + * DCFCLK: Min, as reported by PM FW when available + * UCLK : Min, as reported by PM FW when available + * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) + */ + + if (context->bw_ctx.dml.soc.num_states > 2) { + vlevel_temp = 0; + dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 4; + context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8; + + /* Set C, for Dummy P-State: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK : Min, as reported by PM FW, when available + * pstate latency as per UCLK state dummy pstate latency + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts_margin = 160; + + if ((!pstate_en)) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ + for (i = 3; i > 0; i--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) + break; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 4; + context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8; + + if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { + /* The only difference between A and C is p-state latency, if p-state is not supported + * with full p-state latency we want to calculate DLG based on dummy p-state latency, + * Set A p-state watermark set to 0 previously, when p-state unsupported, for now keep as previous implementation. + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK: Min, as reported by PM FW, when available + */ + dml_update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + dml_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; +} + +bool dml_validate(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = context->bw_ctx.dml.dml_pipe_state; + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + out = dml_internal_validate(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + + if (pipe_cnt == 0) + goto validate_out; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + dml_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + BW_VAL_TRACE_FINISH(); + + return out; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c new file mode 100644 index 000000000000..4ec5310a2962 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c @@ -0,0 +1,284 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifdef DML_WRAPPER_TRANSLATION_ + +static void gfx10array_mode_to_dml_params( + enum array_mode_values array_mode, + enum legacy_tiling_compat_level compat_level, + unsigned int *sw_mode) +{ + switch (array_mode) { + case DC_ARRAY_LINEAR_ALLIGNED: + case DC_ARRAY_LINEAR_GENERAL: + *sw_mode = dm_sw_linear; + break; + case DC_ARRAY_2D_TILED_THIN1: +// DC_LEGACY_TILING_ADDR_GEN_ZERO - undefined as per current code hence removed +#if 0 + if (compat_level == DC_LEGACY_TILING_ADDR_GEN_ZERO) + *sw_mode = dm_sw_gfx7_2d_thin_l_vp; + else + *sw_mode = dm_sw_gfx7_2d_thin_gl; +#endif + break; + default: + ASSERT(0); /* Not supported */ + break; + } +} + +static void swizzle_to_dml_params( + enum swizzle_mode_values swizzle, + unsigned int *sw_mode) +{ + switch (swizzle) { + case DC_SW_LINEAR: + *sw_mode = dm_sw_linear; + break; + case DC_SW_4KB_S: + *sw_mode = dm_sw_4kb_s; + break; + case DC_SW_4KB_S_X: + *sw_mode = dm_sw_4kb_s_x; + break; + case DC_SW_4KB_D: + *sw_mode = dm_sw_4kb_d; + break; + case DC_SW_4KB_D_X: + *sw_mode = dm_sw_4kb_d_x; + break; + case DC_SW_64KB_S: + *sw_mode = dm_sw_64kb_s; + break; + case DC_SW_64KB_S_X: + *sw_mode = dm_sw_64kb_s_x; + break; + case DC_SW_64KB_S_T: + *sw_mode = dm_sw_64kb_s_t; + break; + case DC_SW_64KB_D: + *sw_mode = dm_sw_64kb_d; + break; + case DC_SW_64KB_D_X: + *sw_mode = dm_sw_64kb_d_x; + break; + case DC_SW_64KB_D_T: + *sw_mode = dm_sw_64kb_d_t; + break; + case DC_SW_64KB_R_X: + *sw_mode = dm_sw_64kb_r_x; + break; + case DC_SW_VAR_S: + *sw_mode = dm_sw_var_s; + break; + case DC_SW_VAR_S_X: + *sw_mode = dm_sw_var_s_x; + break; + case DC_SW_VAR_D: + *sw_mode = dm_sw_var_d; + break; + case DC_SW_VAR_D_X: + *sw_mode = dm_sw_var_d_x; + break; + + default: + ASSERT(0); /* Not supported */ + break; + } +} + +static void dc_timing_to_dml_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_dest_params_st *dest) +{ + dest->hblank_start = timing->h_total - timing->h_front_porch; + dest->hblank_end = dest->hblank_start + - timing->h_addressable + - timing->h_border_left + - timing->h_border_right; + dest->vblank_start = timing->v_total - timing->v_front_porch; + dest->vblank_end = dest->vblank_start + - timing->v_addressable + - timing->v_border_top + - timing->v_border_bottom; + dest->htotal = timing->h_total; + dest->vtotal = timing->v_total; + dest->hactive = timing->h_addressable; + dest->vactive = timing->v_addressable; + dest->interlaced = timing->flags.INTERLACE; + dest->pixel_rate_mhz = timing->pix_clk_100hz/10000.0; + if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + dest->pixel_rate_mhz *= 2; +} + +static enum odm_combine_mode get_dml_odm_combine(const struct pipe_ctx *pipe) +{ + int odm_split_count = 0; + enum odm_combine_mode combine_mode = dm_odm_combine_mode_disabled; + struct pipe_ctx *next_pipe = pipe->next_odm_pipe; + + // Traverse pipe tree to determine odm split count + while (next_pipe) { + odm_split_count++; + next_pipe = next_pipe->next_odm_pipe; + } + pipe = pipe->prev_odm_pipe; + while (pipe) { + odm_split_count++; + pipe = pipe->prev_odm_pipe; + } + + // Translate split to DML odm combine factor + switch (odm_split_count) { + case 1: + combine_mode = dm_odm_combine_mode_2to1; + break; + case 3: + combine_mode = dm_odm_combine_mode_4to1; + break; + default: + combine_mode = dm_odm_combine_mode_disabled; + } + + return combine_mode; +} + +static int get_dml_output_type(enum signal_type dc_signal) +{ + int dml_output_type = -1; + + switch (dc_signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + dml_output_type = dm_dp; + break; + case SIGNAL_TYPE_EDP: + dml_output_type = dm_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + dml_output_type = dm_hdmi; + break; + default: + break; + } + + return dml_output_type; +} + +static void populate_color_depth_and_encoding_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_output_params_st *dout) +{ + int output_bpc = 0; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + output_bpc = 6; + break; + case COLOR_DEPTH_888: + output_bpc = 8; + break; + case COLOR_DEPTH_101010: + output_bpc = 10; + break; + case COLOR_DEPTH_121212: + output_bpc = 12; + break; + case COLOR_DEPTH_141414: + output_bpc = 14; + break; + case COLOR_DEPTH_161616: + output_bpc = 16; + break; + case COLOR_DEPTH_999: + output_bpc = 9; + break; + case COLOR_DEPTH_111111: + output_bpc = 11; + break; + default: + output_bpc = 8; + break; + } + + switch (timing->pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + dout->output_format = dm_444; + dout->output_bpp = output_bpc * 3; + break; + case PIXEL_ENCODING_YCBCR420: + dout->output_format = dm_420; + dout->output_bpp = (output_bpc * 3.0) / 2; + break; + case PIXEL_ENCODING_YCBCR422: + if (timing->flags.DSC && !timing->dsc_cfg.ycbcr422_simple) + dout->output_format = dm_n422; + else + dout->output_format = dm_s422; + dout->output_bpp = output_bpc * 2; + break; + default: + dout->output_format = dm_444; + dout->output_bpp = output_bpc * 3; + } +} + +static enum source_format_class dc_source_format_to_dml_source_format(enum surface_pixel_format dc_format) +{ + enum source_format_class dml_format = dm_444_32; + + switch (dc_format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + dml_format = dm_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + dml_format = dm_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + dml_format = dm_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + dml_format = dm_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + dml_format = dm_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + dml_format = dm_rgbe_alpha; + break; + default: + dml_format = dm_444_32; + break; + } + + return dml_format; +} + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c index 3ee858f311d1..ec636d06e18c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c @@ -61,16 +61,6 @@ static double dsc_roundf(double num) return (int)(num); } -static double dsc_ceil(double num) -{ - double retval = (int)num; - - if (retval != num && num > 0) - retval = num + 1; - - return (int)retval; -} - static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp) { @@ -103,7 +93,7 @@ static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, TABLE_CASE(420, 12, min); } - if (table == 0) + if (!table) return; index = (bpp - table[0].bpp) * 2; @@ -268,24 +258,3 @@ void _do_calc_rc_params(struct rc_params *rc, rc->rc_buf_thresh[13] = 8064; } -u32 _do_bytes_per_pixel_calc(int slice_width, - u16 drm_bpp, - bool is_navite_422_or_420) -{ - float bpp; - u32 bytes_per_pixel; - double d_bytes_per_pixel; - - dc_assert_fp_enabled(); - - bpp = ((float)drm_bpp / 16.0); - d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; - // TODO: Make sure the formula for calculating this is precise (ceiling - // vs. floor, and at what point they should be applied) - if (is_navite_422_or_420) - d_bytes_per_pixel /= 2; - - bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); - - return bytes_per_pixel; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h index b93b95409fbe..cad244c023cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h @@ -78,10 +78,6 @@ struct qp_entry { typedef struct qp_entry qp_table[]; -u32 _do_bytes_per_pixel_calc(int slice_width, - u16 drm_bpp, - bool is_navite_422_or_420); - void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 0321b4446e05..9c74564cbd8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -455,6 +455,7 @@ static bool intersect_dsc_caps( if (pixel_encoding == PIXEL_ENCODING_YCBCR422 || pixel_encoding == PIXEL_ENCODING_YCBCR420) dsc_common_caps->bpp_increment_div = min(dsc_common_caps->bpp_increment_div, (uint32_t)8); + dsc_common_caps->edp_sink_max_bits_per_pixel = dsc_sink_caps->edp_max_bits_per_pixel; dsc_common_caps->is_dp = dsc_sink_caps->is_dp; return true; } @@ -513,6 +514,13 @@ static bool decide_dsc_bandwidth_range( range->min_target_bpp_x16 = preferred_bpp_x16; } } + /* TODO - make this value generic to all signal types */ + else if (dsc_caps->edp_sink_max_bits_per_pixel) { + /* apply max bpp limitation from edp sink */ + range->max_target_bpp_x16 = MIN(dsc_caps->edp_sink_max_bits_per_pixel, + max_bpp_x16); + range->min_target_bpp_x16 = min_bpp_x16; + } else { range->max_target_bpp_x16 = max_bpp_x16; range->min_target_bpp_x16 = min_bpp_x16; @@ -574,7 +582,7 @@ static bool decide_dsc_target_bpp_x16( return *target_bpp_x16 != 0; } -#define MIN_AVAILABLE_SLICES_SIZE 4 +#define MIN_AVAILABLE_SLICES_SIZE 6 static int get_available_dsc_slices(union dsc_enc_slice_caps slice_caps, int *available_slices) { @@ -860,6 +868,10 @@ static bool setup_dsc_config( min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? is_dsc_possible = (min_slices_h <= max_slices_h); + + if (min_slices_h == 0 && max_slices_h == 0) + is_dsc_possible = false; + if (!is_dsc_possible) goto done; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index b19d3aeb5962..e97cf09be9d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -60,31 +60,3 @@ void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) pps->dsc_version_minor); DC_FP_END(); } - -/** - * calc_dsc_bytes_per_pixel - calculate bytes per pixel - * @pps: DRM struct with all required DSC values - * - * Based on the information inside drm_dsc_config, this function calculates the - * total of bytes per pixel. - * - * @note This calculation requires float point operation, most of it executes - * under kernel_fpu_{begin,end}. - * - * Return: - * Return the number of bytes per pixel - */ -u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) - -{ - u32 ret; - u16 drm_bpp = pps->bits_per_pixel; - int slice_width = pps->slice_width; - bool is_navite_422_or_420 = pps->native_422 || pps->native_420; - - DC_FP_START(); - ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp, - is_navite_422_or_420); - DC_FP_END(); - return ret; -} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h index c2340e001b57..80921c1c0d53 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h @@ -30,7 +30,6 @@ #include "dml/dsc/rc_calc_fpu.h" void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); -u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c index 1e19dd674e5a..7e306aa3e2b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c @@ -100,8 +100,7 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_par int ret; struct rc_params rc; struct drm_dsc_config dsc_cfg; - - dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps); + unsigned long long tmp; calc_rc_params(&rc, pps); dsc_params->pps = *pps; @@ -113,6 +112,9 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_par dsc_cfg.mux_word_size = dsc_params->pps.bits_per_component <= 10 ? 48 : 64; ret = drm_dsc_compute_rc_parameters(&dsc_cfg); + tmp = (unsigned long long)dsc_cfg.slice_chunk_size * 0x10000000 + (dsc_cfg.slice_width - 1); + do_div(tmp, (uint32_t)dsc_cfg.slice_width); //ROUND-UP + dsc_params->bytes_per_pixel = (uint32_t)tmp; copy_pps_fields(&dsc_params->pps, &dsc_cfg); dsc_params->rc_buffer_model_size = dsc_cfg.rc_bits; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 6fc6488c54c0..f3c0e70073da 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -367,6 +367,7 @@ struct pipe_ctx { struct pll_settings pll_settings; uint8_t pipe_idx; + uint8_t pipe_idx_syncd; struct pipe_ctx *top_pipe; struct pipe_ctx *bottom_pipe; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index a6d3d859754a..52bdfea7897b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -170,6 +170,7 @@ uint8_t dc_dp_initialize_scrambling_data_symbols( enum dc_status dp_set_fec_ready(struct dc_link *link, bool ready); void dp_set_fec_enable(struct dc_link *link, bool enable); +struct link_encoder *dp_get_link_enc(struct dc_link *link); bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable); bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable, bool immediate_update); void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); @@ -217,4 +218,5 @@ bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx); void reset_dp_hpo_stream_encoders_for_link(struct dc_link *link); bool dp_retrieve_lttpr_cap(struct dc_link *link); +void edp_panel_backlight_power_on(struct dc_link *link); #endif /* __DC_LINK_DP_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 806f3041db14..337c0161e72d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -619,7 +619,7 @@ struct dcn_ip_params { }; extern const struct dcn_ip_params dcn10_ip_defaults; -bool dcn_validate_bandwidth( +bool dcn10_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h b/drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h new file mode 100644 index 000000000000..5dcfbd8e2697 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h @@ -0,0 +1,34 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DML_WRAPPER_H_ +#define DML_WRAPPER_H_ + +#include "dc.h" +#include "dml/display_mode_vba.h" + +bool dml_validate(struct dc *dc, struct dc_state *context, bool fast_validate); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index a17e5de3b100..c920c4b6077d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -211,6 +211,8 @@ struct dummy_pstate_entry { struct clk_bw_params { unsigned int vram_type; unsigned int num_channels; + unsigned int dispclk_vco_khz; + unsigned int dc_mode_softmax_memclk; struct clk_limit_table clk_table; struct wm_table wm_table; struct dummy_pstate_entry dummy_pstate_table[4]; @@ -261,6 +263,10 @@ struct clk_mgr_funcs { /* Send message to PMFW to set hard max memclk frequency to highest DPM */ void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr); + /* Custom set a memclk freq range*/ + void (*set_max_memclk)(struct clk_mgr *clk_mgr, unsigned int memclk_mhz); + void (*set_min_memclk)(struct clk_mgr *clk_mgr, unsigned int memclk_mhz); + /* Get current memclk states from PMFW, update relevant structures */ void (*get_memclk_states_from_smu)(struct clk_mgr *clk_mgr); @@ -274,6 +280,7 @@ struct clk_mgr { struct dc_clocks clks; bool psr_allow_active_cache; bool force_smu_not_present; + bool dc_mode_softmax_enabled; int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes int dentist_vco_freq_khz; struct clk_state_registers_and_bypass boot_snapshot; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index f94135c6e3c2..346f0ba73e86 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -61,6 +61,8 @@ struct dcn_dsc_state { uint32_t dsc_pic_height; uint32_t dsc_slice_bpg_offset; uint32_t dsc_chunk_size; + uint32_t dsc_fw_en; + uint32_t dsc_opp_source; }; @@ -88,6 +90,7 @@ struct dsc_enc_caps { int32_t max_total_throughput_mps; /* Maximum total throughput with all the slices combined */ int32_t max_slice_width; uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */ + uint32_t edp_sink_max_bits_per_pixel; bool is_dp; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 80e1a32bc63d..2c031586f4e6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -139,6 +139,7 @@ struct hubp_funcs { bool (*hubp_is_flip_pending)(struct hubp *hubp); void (*set_blank)(struct hubp *hubp, bool blank); + void (*set_blank_regs)(struct hubp *hubp, bool blank); void (*set_hubp_blank_en)(struct hubp *hubp, bool blank); void (*set_cursor_attributes)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index c88e113b94d1..073f8b667eff 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -164,6 +164,10 @@ struct stream_encoder_funcs { void (*stop_dp_info_packets)( struct stream_encoder *enc); + void (*reset_fifo)( + struct stream_encoder *enc + ); + void (*dp_blank)( struct dc_link *link, struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 7390baf916b5..c29320b3855d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -290,6 +290,8 @@ struct timing_generator_funcs { enum optc_dsc_mode dsc_mode, uint32_t dsc_bytes_per_pixel, uint32_t dsc_slice_width); + void (*get_dsc_status)(struct timing_generator *optc, + uint32_t *dsc_mode); void (*set_odm_bypass)(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); void (*set_odm_combine)(struct timing_generator *optc, int *opp_id, int opp_cnt, struct dc_crtc_timing *timing); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index d50f4bd06b5d..05053f3b4ab7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -64,6 +64,7 @@ struct hw_sequencer_funcs { enum dc_status (*apply_ctx_to_hw)(struct dc *dc, struct dc_state *context); void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank); void (*apply_ctx_for_surface)(struct dc *dc, const struct dc_stream_state *stream, int num_planes, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h index 10dcf6a5e9b1..a4e43b4826e0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h @@ -36,7 +36,7 @@ * Initialise link encoder resource tracking. */ void link_enc_cfg_init( - struct dc *dc, + const struct dc *dc, struct dc_state *state); /* diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 372c0898facd..c208925f8247 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -34,6 +34,10 @@ #define MEMORY_TYPE_HBM 2 +#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0) +#define GET_PIPE_SYNCD_FROM_PIPE(pipe) ((pipe)->pipe_idx_syncd & 0x7F) +#define SET_PIPE_SYNCD_TO_PIPE(pipe, pipe_syncd) ((pipe)->pipe_idx_syncd = (0x80 | pipe_syncd)) + enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); @@ -206,4 +210,11 @@ struct hpo_dp_link_encoder *resource_get_unused_hpo_dp_link_encoder( const struct resource_pool *pool); #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context); + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx); + #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c index 378cc11aa047..6b5fedd9ace0 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c @@ -185,16 +185,18 @@ bool dal_irq_service_dummy_set(struct irq_service *irq_service, const struct irq_source_info *info, bool enable) { - DC_LOG_ERROR("%s: called for non-implemented irq source\n", - __func__); + DC_LOG_ERROR("%s: called for non-implemented irq source, src_id=%u, ext_id=%u\n", + __func__, info->src_id, info->ext_id); + return false; } bool dal_irq_service_dummy_ack(struct irq_service *irq_service, const struct irq_source_info *info) { - DC_LOG_ERROR("%s: called for non-implemented irq source\n", - __func__); + DC_LOG_ERROR("%s: called for non-implemented irq source, src_id=%u, ext_id=%u\n", + __func__, info->src_id, info->ext_id); + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c index 34f43cb650f8..cf072e2347d3 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c @@ -40,10 +40,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn10( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn10(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c index a47f68634fc3..aa708b61142f 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c @@ -39,10 +39,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn201( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn201(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 78940cb20e10..235294534c43 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -40,10 +40,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn21( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c index 38e0ade60c7b..1b88e4e627fd 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c @@ -36,10 +36,9 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -enum dc_irq_source to_dal_irq_source_dcn31( - struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +static enum dc_irq_source to_dal_irq_source_dcn31(struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index cd204eef073b..83855b8a32e9 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -360,6 +360,8 @@ struct dmub_srv_hw_funcs { uint32_t (*get_gpint_dataout)(struct dmub_srv *dmub); + void (*clear_inbox0_ack_register)(struct dmub_srv *dmub); + uint32_t (*read_inbox0_ack_register)(struct dmub_srv *dmub); void (*send_inbox0_cmd)(struct dmub_srv *dmub, union dmub_inbox0_data_register data); uint32_t (*get_current_time)(struct dmub_srv *dmub); @@ -409,6 +411,7 @@ struct dmub_srv { struct dmub_srv_base_funcs funcs; struct dmub_srv_hw_funcs hw_funcs; struct dmub_rb inbox1_rb; + uint32_t inbox1_last_wptr; /** * outbox1_rb is accessed without locks (dal & dc) * and to be used only in dmub_srv_stat_get_notification() @@ -735,6 +738,45 @@ bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_ bool dmub_srv_should_detect(struct dmub_srv *dmub); +/** + * dmub_srv_send_inbox0_cmd() - Send command to DMUB using INBOX0 + * @dmub: the dmub service + * @data: the data to be sent in the INBOX0 command + * + * Send command by writing directly to INBOX0 WPTR + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - hw_init false or hw function does not exist + */ +enum dmub_status dmub_srv_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data); + +/** + * dmub_srv_wait_for_inbox0_ack() - wait for DMUB to ACK INBOX0 command + * @dmub: the dmub service + * @timeout_us: the maximum number of microseconds to wait + * + * Wait for DMUB to ACK the INBOX0 message + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - hw_init false or hw function does not exist + * DMUB_STATUS_TIMEOUT - wait for ack timed out + */ +enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t timeout_us); + +/** + * dmub_srv_wait_for_inbox0_ack() - clear ACK register for INBOX0 + * @dmub: the dmub service + * + * Clear ACK register for INBOX0 + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - hw_init false or hw function does not exist + */ +enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); + #if defined(__cplusplus) } #endif diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c29a67ccef17..afe9bc839b45 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -46,10 +46,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x1d82d23e +#define DMUB_FW_VERSION_GIT_HASH 0xc99a4517 #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 91 +#define DMUB_FW_VERSION_REVISION 97 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -173,13 +173,6 @@ extern "C" { #endif /** - * Number of nanoseconds per DMUB tick. - * DMCUB_TIMER_CURRENT increments in DMUB ticks, which are 10ns by default. - * If DMCUB_TIMER_WINDOW is non-zero this will no longer be true. - */ -#define NS_PER_DMUB_TICK 10 - -/** * union dmub_addr - DMUB physical/virtual 64-bit address. */ union dmub_addr { @@ -208,10 +201,9 @@ union dmub_psr_debug_flags { uint32_t use_hw_lock_mgr : 1; /** - * Unused. - * TODO: Remove. + * Use TPS3 signal when restore main link. */ - uint32_t log_line_nums : 1; + uint32_t force_wakeup_by_tps3 : 1; } bitfields; /** @@ -416,7 +408,14 @@ enum dmub_cmd_vbios_type { * Enables or disables power gating. */ DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING = 3, + /** + * Controls embedded panels. + */ DMUB_CMD__VBIOS_LVTMA_CONTROL = 15, + /** + * Query DP alt status on a transmitter. + */ + DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT = 26, }; //============================================================================== @@ -1550,10 +1549,14 @@ struct dmub_cmd_psr_copy_settings_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; + /* + * DSC enable status in driver + */ + uint8_t dsc_enable_status; /** - * Explicit padding to 4 byte boundary. + * Explicit padding to 3 byte boundary. */ - uint8_t pad3[4]; + uint8_t pad3[3]; }; /** @@ -2398,6 +2401,24 @@ struct dmub_rb_cmd_lvtma_control { }; /** + * Data passed in/out in a DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT command. + */ +struct dmub_rb_cmd_transmitter_query_dp_alt_data { + uint8_t phy_id; /**< 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4=UNIPHYE, 5=UNIPHYF */ + uint8_t is_usb; /**< is phy is usb */ + uint8_t is_dp_alt_disable; /**< is dp alt disable */ + uint8_t is_dp4; /**< is dp in 4 lane */ +}; + +/** + * Definition of a DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT command. + */ +struct dmub_rb_cmd_transmitter_query_dp_alt { + struct dmub_cmd_header header; /**< header */ + struct dmub_rb_cmd_transmitter_query_dp_alt_data data; /**< payload */ +}; + +/** * Maximum number of bytes a chunk sent to DMUB for parsing */ #define DMUB_EDID_CEA_DATA_CHUNK_BYTES 8 @@ -2408,7 +2429,7 @@ struct dmub_rb_cmd_lvtma_control { struct dmub_cmd_send_edid_cea { uint16_t offset; /**< offset into the CEA block */ uint8_t length; /**< number of bytes in payload to copy as part of CEA block */ - uint16_t total_length; /**< total length of the CEA block */ + uint16_t cea_total_length; /**< total length of the CEA block */ uint8_t payload[DMUB_EDID_CEA_DATA_CHUNK_BYTES]; /**< data chunk of the CEA block */ uint8_t pad[3]; /**< padding and for future expansion */ }; @@ -2605,6 +2626,10 @@ union dmub_rb_cmd { */ struct dmub_rb_cmd_lvtma_control lvtma_control; /** + * Definition of a DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT command. + */ + struct dmub_rb_cmd_transmitter_query_dp_alt query_dp_alt; + /** * Definition of a DMUB_CMD__DPIA_DIG1_CONTROL command. */ struct dmub_rb_cmd_dig1_dpia_control dig1_dpia_control; @@ -2722,7 +2747,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const union dmub_rb_cmd *cmd) { - uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); + uint64_t volatile *dst = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->wrpt); const uint64_t *src = (const uint64_t *)cmd; uint8_t i; @@ -2840,7 +2865,7 @@ static inline bool dmub_rb_peek_offset(struct dmub_rb *rb, static inline bool dmub_rb_out_front(struct dmub_rb *rb, union dmub_rb_out_cmd *cmd) { - const uint64_t volatile *src = (const uint64_t volatile *)(rb->base_address) + rb->rptr / sizeof(uint64_t); + const uint64_t volatile *src = (const uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->rptr); uint64_t *dst = (uint64_t *)cmd; uint8_t i; @@ -2888,7 +2913,7 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t wptr = rb->wrpt; while (rptr != wptr) { - uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); + uint64_t volatile *data = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rptr); //uint64_t volatile *p = (uint64_t volatile *)data; uint64_t temp; uint8_t i; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 56d400ffa7ac..f673a1c1777a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -100,24 +100,9 @@ void dmub_flush_buffer_mem(const struct dmub_fb *fb) } static const struct dmub_fw_meta_info * -dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) +dmub_get_fw_meta_info_from_blob(const uint8_t *blob, uint32_t blob_size, uint32_t meta_offset) { const union dmub_fw_meta *meta; - const uint8_t *blob = NULL; - uint32_t blob_size = 0; - uint32_t meta_offset = 0; - - if (params->fw_bss_data && params->bss_data_size) { - /* Legacy metadata region. */ - blob = params->fw_bss_data; - blob_size = params->bss_data_size; - meta_offset = DMUB_FW_META_OFFSET; - } else if (params->fw_inst_const && params->inst_const_size) { - /* Combined metadata region. */ - blob = params->fw_inst_const; - blob_size = params->inst_const_size; - meta_offset = 0; - } if (!blob || !blob_size) return NULL; @@ -134,6 +119,32 @@ dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) return &meta->info; } +static const struct dmub_fw_meta_info * +dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) +{ + const struct dmub_fw_meta_info *info = NULL; + + if (params->fw_bss_data && params->bss_data_size) { + /* Legacy metadata region. */ + info = dmub_get_fw_meta_info_from_blob(params->fw_bss_data, + params->bss_data_size, + DMUB_FW_META_OFFSET); + } else if (params->fw_inst_const && params->inst_const_size) { + /* Combined metadata region - can be aligned to 16-bytes. */ + uint32_t i; + + for (i = 0; i < 16; ++i) { + info = dmub_get_fw_meta_info_from_blob( + params->fw_inst_const, params->inst_const_size, i); + + if (info) + break; + } + } + + return info; +} + static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) { struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; @@ -598,6 +609,8 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) { + struct dmub_rb flush_rb; + if (!dmub->hw_init) return DMUB_STATUS_INVALID; @@ -606,9 +619,14 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) * been flushed to framebuffer memory. Otherwise DMCUB might * read back stale, fully invalid or partially invalid data. */ - dmub_rb_flush_pending(&dmub->inbox1_rb); + flush_rb = dmub->inbox1_rb; + flush_rb.rptr = dmub->inbox1_last_wptr; + dmub_rb_flush_pending(&flush_rb); + + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); + + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); return DMUB_STATUS_OK; } @@ -831,3 +849,38 @@ bool dmub_srv_should_detect(struct dmub_srv *dmub) return dmub->hw_funcs.should_detect(dmub); } + +enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub) +{ + if (!dmub->hw_init || dmub->hw_funcs.clear_inbox0_ack_register) + return DMUB_STATUS_INVALID; + + dmub->hw_funcs.clear_inbox0_ack_register(dmub); + return DMUB_STATUS_OK; +} + +enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t timeout_us) +{ + uint32_t i = 0; + uint32_t ack = 0; + + if (!dmub->hw_init || !dmub->hw_funcs.read_inbox0_ack_register) + return DMUB_STATUS_INVALID; + + for (i = 0; i <= timeout_us; i++) { + ack = dmub->hw_funcs.read_inbox0_ack_register(dmub); + if (ack) + return DMUB_STATUS_OK; + } + return DMUB_STATUS_TIMEOUT; +} + +enum dmub_status dmub_srv_send_inbox0_cmd(struct dmub_srv *dmub, + union dmub_inbox0_data_register data) +{ + if (!dmub->hw_init || dmub->hw_funcs.send_inbox0_cmd) + return DMUB_STATUS_INVALID; + + dmub->hw_funcs.send_inbox0_cmd(dmub, data); + return DMUB_STATUS_OK; +} diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h index 4de59b66bb1a..a2b80514d83e 100644 --- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h +++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h @@ -35,6 +35,7 @@ #define DP_BRANCH_DEVICE_ID_00E04C 0x00E04C #define DP_BRANCH_DEVICE_ID_006037 0x006037 +#define DP_DEVICE_ID_38EC11 0x38EC11 enum ddc_result { DDC_RESULT_UNKNOWN = 0, DDC_RESULT_SUCESSFULL, @@ -117,4 +118,7 @@ struct av_sync_data { uint8_t aud_del_ins3;/* DPCD 0002Dh */ }; +static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3, 0}; +static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5, 0}; + #endif /* __DAL_DDC_SERVICE_TYPES_H__ */ diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 370fad883e33..f093b49c5e6e 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -72,9 +72,7 @@ #define DC_LOG_DSC(...) DRM_DEBUG_KMS(__VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) #define DC_LOG_DWB(...) DRM_DEBUG_KMS(__VA_ARGS__) -#if defined(CONFIG_DRM_AMD_DC_DCN) #define DC_LOG_DP2(...) DRM_DEBUG_KMS(__VA_ARGS__) -#endif struct dal_logger; @@ -126,9 +124,7 @@ enum dc_log_type { LOG_MAX_HW_POINTS, LOG_ALL_TF_CHANNELS, LOG_SAMPLE_1DLUT, -#if defined(CONFIG_DRM_AMD_DC_DCN) LOG_DP2, -#endif LOG_SECTION_TOTAL_COUNT }; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 4b9e68a79f06..f57a1478f0fe 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -231,6 +231,8 @@ enum DC_FEATURE_MASK { DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1 DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default + DC_DISABLE_LTTPR_DP1_4A = (1 << 5), //0x20, disabled by default + DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default }; enum DC_DEBUG_MASK { diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h index 6d0052ce6bed..da6d380c948b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_offset.h @@ -354,5 +354,12 @@ #define mmMP1_SMN_EXT_SCRATCH7 0x03c7 #define mmMP1_SMN_EXT_SCRATCH7_BASE_IDX 0 +/* + * addressBlock: mp_SmuMp1Pub_MmuDec + * base address: 0x0 + */ +#define smnMP1_PMI_3_START 0x3030204 +#define smnMP1_PMI_3_FIFO 0x3030208 +#define smnMP1_PMI_3 0x3030600 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h index 136fb5de6a4c..a5ae2a801254 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_sh_mask.h @@ -959,5 +959,17 @@ #define MP1_SMN_EXT_SCRATCH7__DATA__SHIFT 0x0 #define MP1_SMN_EXT_SCRATCH7__DATA_MASK 0xFFFFFFFFL +// MP1_PMI_3_START +#define MP1_PMI_3_START__ENABLE_MASK 0x80000000L +// MP1_PMI_3_FIFO +#define MP1_PMI_3_FIFO__DEPTH_MASK 0x00000fffL + +// MP1_PMI_3_START +#define MP1_PMI_3_START__ENABLE__SHIFT 0x0000001f +// MP1_PMI_3_FIFO +#define MP1_PMI_3_FIFO__DEPTH__SHIFT 0x00000000 + + + #endif diff --git a/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h index 9cb5f3631c60..ce79e5de8ce3 100644 --- a/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h +++ b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h @@ -25,15 +25,15 @@ #define MAX_SEGMENT 5 -struct IP_BASE_INSTANCE +struct IP_BASE_INSTANCE { unsigned int segment[MAX_SEGMENT]; -}; - -struct IP_BASE +} __maybe_unused; + +struct IP_BASE { struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; -}; +} __maybe_unused; static const struct IP_BASE ATHUB_BASE ={ { { { 0x00000C00, 0, 0, 0, 0 } }, diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index c84bd7b2cf59..ac941f62cbed 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -33,12 +33,11 @@ #include <linux/dma-fence.h> struct pci_dev; +struct amdgpu_device; #define KGD_MAX_QUEUES 128 struct kfd_dev; -struct kgd_dev; - struct kgd_mem; enum kfd_preempt_type { @@ -228,61 +227,61 @@ struct tile_config { */ struct kfd2kgd_calls { /* Register access functions */ - void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid, + void (*program_sh_mem_settings)(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, u32 pasid, + int (*set_pasid_vmid_mapping)(struct amdgpu_device *adev, u32 pasid, unsigned int vmid); - int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); + int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id); - int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + int (*hqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); - int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, + int (*hiq_mqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off); - int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, + int (*hqd_sdma_load)(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); - int (*hqd_dump)(struct kgd_dev *kgd, + int (*hqd_dump)(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); - int (*hqd_sdma_dump)(struct kgd_dev *kgd, + int (*hqd_sdma_dump)(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); - bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - - int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, - unsigned int timeout, uint32_t pipe_id, + bool (*hqd_is_occupied)(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); - bool (*hqd_sdma_is_occupied)(struct kgd_dev *kgd, void *mqd); + int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd, + uint32_t reset_type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id); + + bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd); - int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, + int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, unsigned int timeout); - int (*address_watch_disable)(struct kgd_dev *kgd); - int (*address_watch_execute)(struct kgd_dev *kgd, + int (*address_watch_disable)(struct amdgpu_device *adev); + int (*address_watch_execute)(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, uint32_t addr_lo); - int (*wave_control_execute)(struct kgd_dev *kgd, + int (*wave_control_execute)(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); - uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, + uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset); - bool (*get_atc_vmid_pasid_mapping_info)( - struct kgd_dev *kgd, + bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); @@ -290,16 +289,16 @@ struct kfd2kgd_calls { * passed to the shader by the CP. It's the user mode driver's * responsibility. */ - void (*set_scratch_backing_va)(struct kgd_dev *kgd, + void (*set_scratch_backing_va)(struct amdgpu_device *adev, uint64_t va, uint32_t vmid); - void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, + void (*set_vm_context_page_table_base)(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); - uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); + uint32_t (*read_vmid_from_vmfault_reg)(struct amdgpu_device *adev); - void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt, - int *max_waves_per_cu); - void (*program_trap_handler_settings)(struct kgd_dev *kgd, + void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, + int *wave_cnt, int *max_waves_per_cu); + void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); }; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index bac15c466733..5c0867ebcfce 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -153,6 +153,10 @@ enum PP_SMC_POWER_PROFILE { PP_SMC_POWER_PROFILE_COUNT, }; +extern const char * const amdgpu_pp_profile_name[PP_SMC_POWER_PROFILE_COUNT]; + + + enum { PP_GROUP_UNKNOWN = 0, PP_GROUP_GFX = 1, diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h b/drivers/gpu/drm/amd/include/yellow_carp_offset.h index 76b9eb3f441d..28a56b56bcaf 100644 --- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h +++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h @@ -9,12 +9,12 @@ struct IP_BASE_INSTANCE { unsigned int segment[MAX_SEGMENT]; -}; +} __maybe_unused; struct IP_BASE { struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; -}; +} __maybe_unused; static const struct IP_BASE ACP_BASE = { { { { 0x02403800, 0x00480000, 0, 0, 0, 0 } }, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 41472ed99253..082539c70fd4 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -82,6 +82,16 @@ static const struct hwmon_temp_label { {PP_TEMP_MEM, "mem"}, }; +const char * const amdgpu_pp_profile_name[] = { + "BOOTUP_DEFAULT", + "3D_FULL_SCREEN", + "POWER_SAVING", + "VIDEO", + "VR", + "COMPUTE", + "CUSTOM" +}; + /** * DOC: power_dpm_state * @@ -3759,5 +3769,7 @@ void amdgpu_debugfs_pm_init(struct amdgpu_device *adev) adev, &amdgpu_debugfs_pm_prv_buffer_fops, adev->pm.smu_prv_buffer_size); + + amdgpu_smu_stb_debug_fs_init(adev); #endif } diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 16e3f72d31b9..c464a045000d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -423,6 +423,9 @@ enum ip_power_state { POWER_STATE_OFF, }; +/* Used to mask smu debug modes */ +#define SMU_DEBUG_HALT_ON_ERROR 0x1 + struct amdgpu_pm { struct mutex mutex; u32 current_sclk; @@ -460,6 +463,11 @@ struct amdgpu_pm { struct list_head pm_attr_list; atomic_t pwr_state[AMD_IP_BLOCK_TYPE_NUM]; + + /* + * 0 = disabled (default), otherwise enable corresponding debug mode + */ + uint32_t smu_debug_mask; }; #define R600_SSTU_DFLT 0 diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 3557f4e7fc30..2b9b9a7ba97a 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -324,6 +324,7 @@ enum smu_table_id SMU_TABLE_OVERDRIVE, SMU_TABLE_I2C_COMMANDS, SMU_TABLE_PACE, + SMU_TABLE_ECCINFO, SMU_TABLE_COUNT, }; @@ -340,6 +341,7 @@ struct smu_table_context void *max_sustainable_clocks; struct smu_bios_boot_up_values boot_values; void *driver_pptable; + void *ecc_table; struct smu_table tables[SMU_TABLE_COUNT]; /* * The driver table is just a staging buffer for @@ -472,7 +474,14 @@ struct cmn2asic_mapping { int map_to; }; +struct stb_context { + uint32_t stb_buf_size; + bool enabled; + spinlock_t lock; +}; + #define WORKLOAD_POLICY_MAX 7 + struct smu_context { struct amdgpu_device *adev; @@ -559,6 +568,8 @@ struct smu_context uint16_t cpu_core_num; struct smu_user_dpm_profile user_dpm_profile; + + struct stb_context stb_context; }; struct i2c_adapter; @@ -1261,6 +1272,17 @@ struct pptable_funcs { * of SMUBUS table. */ int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size); + + /** + * @get_ecc_table: message SMU to get ECC INFO table. + */ + ssize_t (*get_ecc_info)(struct smu_context *smu, void *table); + + + /** + * @stb_collect_info: Collects Smart Trace Buffers data. + */ + int (*stb_collect_info)(struct smu_context *smu, void *buf, uint32_t size); }; typedef enum { @@ -1397,6 +1419,9 @@ int smu_set_light_sbr(struct smu_context *smu, bool enable); int smu_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, uint64_t event_arg); +int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc); +int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size); +void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h index a017983ff1fa..0f67c56c2863 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h +++ b/drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h @@ -140,6 +140,8 @@ #define MAX_SW_I2C_COMMANDS 24 +#define ALDEBARAN_UMC_CHANNEL_NUM 32 + typedef enum { I2C_CONTROLLER_PORT_0, //CKSVII2C0 I2C_CONTROLLER_PORT_1, //CKSVII2C1 @@ -507,6 +509,19 @@ typedef struct { uint32_t MmHubPadding[8]; // SMU internal use } AvfsDebugTable_t; +typedef struct { + uint64_t mca_umc_status; + uint64_t mca_umc_addr; + uint16_t ce_count_lo_chip; + uint16_t ce_count_hi_chip; + + uint32_t eccPadding; +} EccInfo_t; + +typedef struct { + EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM]; +} EccInfoTable_t; + // These defines are used with the following messages: // SMC_MSG_TransferTableDram2Smu // SMC_MSG_TransferTableSmu2Dram @@ -517,6 +532,7 @@ typedef struct { #define TABLE_SMU_METRICS 4 #define TABLE_DRIVER_SMU_CONFIG 5 #define TABLE_I2C_COMMANDS 6 -#define TABLE_COUNT 7 +#define TABLE_ECCINFO 7 +#define TABLE_COUNT 8 #endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index e5d3b0d1a032..44af23ae059e 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -27,7 +27,9 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 -#define SMU13_DRIVER_IF_VERSION_ALDE 0x07 +#define SMU13_DRIVER_IF_VERSION_ALDE 0x08 + +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms /* MP Apertures */ #define MP0_Public 0x03800000 @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) int smu_v13_0_baco_enter(struct smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu); -int smu_v13_0_mode1_reset(struct smu_context *smu); int smu_v13_0_mode2_reset(struct smu_context *smu); int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 8d796ed3b7d1..3ab67b232cd4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1328,7 +1328,12 @@ static int pp_set_powergating_by_smu(void *handle, pp_dpm_powergate_vce(handle, gate); break; case AMD_IP_BLOCK_TYPE_GMC: - pp_dpm_powergate_mmhub(handle); + /* + * For now, this is only used on PICASSO. + * And only "gate" operation is supported. + */ + if (gate) + pp_dpm_powergate_mmhub(handle); break; case AMD_IP_BLOCK_TYPE_GFX: ret = pp_dpm_powergate_gfx(handle, gate); @@ -1551,7 +1556,7 @@ static int pp_set_ppfeature_status(void *handle, uint64_t ppfeature_masks) static int pp_asic_reset_mode_2(void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; + int ret = 0; if (!hwmgr || !hwmgr->pm_en) return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 258c573acc97..9ddd8491ff00 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1024,8 +1024,6 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, uint32_t min_freq, max_freq = 0; uint32_t ret = 0; - phm_get_sysfs_buf(&buf, &size); - switch (type) { case PP_SCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency, &now); @@ -1038,13 +1036,13 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, else i = 1; - size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", + size += sprintf(buf + size, "0: %uMhz %s\n", data->gfx_min_freq_limit/100, i == 0 ? "*" : ""); - size += sysfs_emit_at(buf, size, "1: %uMhz %s\n", + size += sprintf(buf + size, "1: %uMhz %s\n", i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK, i == 1 ? "*" : ""); - size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", + size += sprintf(buf + size, "2: %uMhz %s\n", data->gfx_max_freq_limit/100, i == 2 ? "*" : ""); break; @@ -1052,7 +1050,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &now); for (i = 0; i < mclk_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, mclk_table->entries[i].clk / 100, ((mclk_table->entries[i].clk / 100) @@ -1067,10 +1065,10 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, if (ret) return ret; - size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); - size += sysfs_emit_at(buf, size, "0: %10uMhz\n", + size += sprintf(buf + size, "%s:\n", "OD_SCLK"); + size += sprintf(buf + size, "0: %10uMhz\n", (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + size += sprintf(buf + size, "1: %10uMhz\n", (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq); } break; @@ -1083,8 +1081,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, if (ret) return ret; - size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); - size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n", + size += sprintf(buf + size, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", min_freq, max_freq); } break; @@ -1441,13 +1439,6 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) {70, 90, 0, 0,}, {30, 60, 0, 6,}, }; - static const char *profile_name[6] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE"}; static const char *title[6] = {"NUM", "MODE_NAME", "BUSY_SET_POINT", @@ -1465,7 +1456,7 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i <= PP_SMC_POWER_PROFILE_COMPUTE; i++) size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", - i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + i, amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", profile_mode_setting[i][0], profile_mode_setting[i][1], profile_mode_setting[i][2], profile_mode_setting[i][3]); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index aceebf584225..cd99db0dc2be 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -4914,8 +4914,6 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, int size = 0; uint32_t i, now, clock, pcie_speed; - phm_get_sysfs_buf(&buf, &size); - switch (type) { case PP_SCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetSclkFrequency, &clock); @@ -4928,7 +4926,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, now = i; for (i = 0; i < sclk_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, sclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4943,7 +4941,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, now = i; for (i = 0; i < mclk_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, mclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4957,7 +4955,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, now = i; for (i = 0; i < pcie_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %s %s\n", i, + size += sprintf(buf + size, "%d: %s %s\n", i, (pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x8" : (pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" : (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "", @@ -4965,32 +4963,32 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); + size += sprintf(buf + size, "%s:\n", "OD_SCLK"); for (i = 0; i < odn_sclk_table->num_of_pl; i++) - size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n", + size += sprintf(buf + size, "%d: %10uMHz %10umV\n", i, odn_sclk_table->entries[i].clock/100, odn_sclk_table->entries[i].vddc); } break; case OD_MCLK: if (hwmgr->od_enabled) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); + size += sprintf(buf + size, "%s:\n", "OD_MCLK"); for (i = 0; i < odn_mclk_table->num_of_pl; i++) - size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n", + size += sprintf(buf + size, "%d: %10uMHz %10umV\n", i, odn_mclk_table->entries[i].clock/100, odn_mclk_table->entries[i].vddc); } break; case OD_RANGE: if (hwmgr->od_enabled) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); - size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n", + size += sprintf(buf + size, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.sclk_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.engineClock/100); - size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n", + size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.mclk_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); - size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n", + size += sprintf(buf + size, "VDDC: %7umV %11umV\n", data->odn_dpm_table.min_vddc, data->odn_dpm_table.max_vddc); } @@ -5500,14 +5498,6 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) uint32_t i, size = 0; uint32_t len; - static const char *profile_name[7] = {"BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; - static const char *title[8] = {"NUM", "MODE_NAME", "SCLK_UP_HYST", @@ -5531,7 +5521,7 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i < len; i++) { if (i == hwmgr->power_profile_mode) { size += sysfs_emit_at(buf, size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n", - i, profile_name[i], "*", + i, amdgpu_pp_profile_name[i], "*", data->current_profile_setting.sclk_up_hyst, data->current_profile_setting.sclk_down_hyst, data->current_profile_setting.sclk_activity, @@ -5542,12 +5532,12 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) } if (smu7_profiling[i].bupdate_sclk) size += sysfs_emit_at(buf, size, "%3d %16s: %8d %16d %16d ", - i, profile_name[i], smu7_profiling[i].sclk_up_hyst, + i, amdgpu_pp_profile_name[i], smu7_profiling[i].sclk_up_hyst, smu7_profiling[i].sclk_down_hyst, smu7_profiling[i].sclk_activity); else size += sysfs_emit_at(buf, size, "%3d %16s: %8s %16s %16s ", - i, profile_name[i], "-", "-", "-"); + i, amdgpu_pp_profile_name[i], "-", "-", "-"); if (smu7_profiling[i].bupdate_mclk) size += sysfs_emit_at(buf, size, "%16d %16d %16d\n", diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c index 8e28a8eecefc..03bf8f069222 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c @@ -1550,8 +1550,6 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr, uint32_t i, now; int size = 0; - phm_get_sysfs_buf(&buf, &size); - switch (type) { case PP_SCLK: now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device, @@ -1561,7 +1559,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr, CURR_SCLK_INDEX); for (i = 0; i < sclk_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, sclk_table->entries[i].clk / 100, (i == now) ? "*" : ""); break; @@ -1573,7 +1571,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr, CURR_MCLK_INDEX); for (i = SMU8_NUM_NBPMEMORYCLOCK; i > 0; i--) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", SMU8_NUM_NBPMEMORYCLOCK-i, data->sys_info.nbp_memory_clock[i-1] / 100, (SMU8_NUM_NBPMEMORYCLOCK-i == now) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index c981fc2882f0..3f040be0d158 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -4639,8 +4639,6 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, int i, now, size = 0, count = 0; - phm_get_sysfs_buf(&buf, &size); - switch (type) { case PP_SCLK: if (data->registry_data.sclk_dpm_key_disabled) @@ -4654,7 +4652,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, else count = sclk_table->count; for (i = 0; i < count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, sclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4665,7 +4663,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now); for (i = 0; i < mclk_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, mclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4676,7 +4674,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now); for (i = 0; i < soc_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, soc_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4688,7 +4686,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK, &now); for (i = 0; i < dcef_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, dcef_table->dpm_levels[i].value / 100, (dcef_table->dpm_levels[i].value / 100 == now) ? "*" : ""); @@ -4702,7 +4700,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, gen_speed = pptable->PcieGenSpeed[i]; lane_width = pptable->PcieLaneCount[i]; - size += sysfs_emit_at(buf, size, "%d: %s %s %s\n", i, + size += sprintf(buf + size, "%d: %s %s %s\n", i, (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : (gen_speed == 2) ? "8.0GT/s," : @@ -4721,34 +4719,34 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, case OD_SCLK: if (hwmgr->od_enabled) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); + size += sprintf(buf + size, "%s:\n", "OD_SCLK"); podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; for (i = 0; i < podn_vdd_dep->count; i++) - size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n", + size += sprintf(buf + size, "%d: %10uMhz %10umV\n", i, podn_vdd_dep->entries[i].clk / 100, podn_vdd_dep->entries[i].vddc); } break; case OD_MCLK: if (hwmgr->od_enabled) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); + size += sprintf(buf + size, "%s:\n", "OD_MCLK"); podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; for (i = 0; i < podn_vdd_dep->count; i++) - size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n", + size += sprintf(buf + size, "%d: %10uMhz %10umV\n", i, podn_vdd_dep->entries[i].clk/100, podn_vdd_dep->entries[i].vddc); } break; case OD_RANGE: if (hwmgr->od_enabled) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); - size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n", + size += sprintf(buf + size, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.gfx_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.engineClock/100); - size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n", + size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.mem_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); - size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n", + size += sprintf(buf + size, "VDDC: %7umV %11umV\n", data->odn_dpm_table.min_vddc, data->odn_dpm_table.max_vddc); } @@ -5099,13 +5097,6 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) {70, 90, 0, 0,}, {30, 60, 0, 6,}, }; - static const char *profile_name[7] = {"BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[6] = {"NUM", "MODE_NAME", "BUSY_SET_POINT", @@ -5123,11 +5114,12 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i < PP_SMC_POWER_PROFILE_CUSTOM; i++) size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", - i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + i, amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", profile_mode_setting[i][0], profile_mode_setting[i][1], profile_mode_setting[i][2], profile_mode_setting[i][3]); + size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", i, - profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", data->custom_profile_mode[0], data->custom_profile_mode[1], data->custom_profile_mode[2], data->custom_profile_mode[3]); return size; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index f7e783e1c888..a2f4d6773d45 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -2246,8 +2246,6 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, int i, now, size = 0; struct pp_clock_levels_with_latency clocks; - phm_get_sysfs_buf(&buf, &size); - switch (type) { case PP_SCLK: PP_ASSERT_WITH_CODE( @@ -2260,7 +2258,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get gfx clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : ""); break; @@ -2276,7 +2274,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get memory clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : ""); break; @@ -2294,7 +2292,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get soc clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); break; @@ -2312,7 +2310,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get dcef clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index 03e63be4ee27..97b3ad369046 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -3366,8 +3366,6 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, int ret = 0; uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width; - phm_get_sysfs_buf(&buf, &size); - switch (type) { case PP_SCLK: ret = vega20_get_current_clk_freq(hwmgr, PPCLK_GFXCLK, &now); @@ -3376,13 +3374,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_sclks(hwmgr, &clocks)) { - size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", + size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3394,13 +3392,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_memclocks(hwmgr, &clocks)) { - size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", + size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3412,13 +3410,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_socclocks(hwmgr, &clocks)) { - size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", + size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3430,7 +3428,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); for (i = 0; i < fclk_dpm_table->count; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, fclk_dpm_table->dpm_levels[i].value, fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : ""); break; @@ -3442,13 +3440,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_dcefclocks(hwmgr, &clocks)) { - size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", + size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", + size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3462,7 +3460,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, gen_speed = pptable->PcieGenSpeed[i]; lane_width = pptable->PcieLaneCount[i]; - size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i, + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : (gen_speed == 2) ? "8.0GT/s," : @@ -3483,18 +3481,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, case OD_SCLK: if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id && od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); - size += sysfs_emit_at(buf, size, "0: %10uMhz\n", + size += sprintf(buf + size, "%s:\n", "OD_SCLK"); + size += sprintf(buf + size, "0: %10uMhz\n", od_table->GfxclkFmin); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + size += sprintf(buf + size, "1: %10uMhz\n", od_table->GfxclkFmax); } break; case OD_MCLK: if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + size += sprintf(buf + size, "%s:\n", "OD_MCLK"); + size += sprintf(buf + size, "1: %10uMhz\n", od_table->UclkFmax); } @@ -3507,14 +3505,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_VDDC_CURVE"); - size += sysfs_emit_at(buf, size, "0: %10uMhz %10dmV\n", + size += sprintf(buf + size, "%s:\n", "OD_VDDC_CURVE"); + size += sprintf(buf + size, "0: %10uMhz %10dmV\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1 / VOLTAGE_SCALE); - size += sysfs_emit_at(buf, size, "1: %10uMhz %10dmV\n", + size += sprintf(buf + size, "1: %10uMhz %10dmV\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2 / VOLTAGE_SCALE); - size += sysfs_emit_at(buf, size, "2: %10uMhz %10dmV\n", + size += sprintf(buf + size, "2: %10uMhz %10dmV\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3 / VOLTAGE_SCALE); } @@ -3522,17 +3520,17 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_RANGE: - size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "%s:\n", "OD_RANGE"); if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id && od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) { - size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FMIN].min_value, od8_settings[OD8_SETTING_GFXCLK_FMAX].max_value); } if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) { - size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", + size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_UCLK_FMAX].min_value, od8_settings[OD8_SETTING_UCLK_FMAX].max_value); } @@ -3543,22 +3541,22 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { - size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", + size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FREQ1].min_value, od8_settings[OD8_SETTING_GFXCLK_FREQ1].max_value); - size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", + size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].max_value); - size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", + size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FREQ2].min_value, od8_settings[OD8_SETTING_GFXCLK_FREQ2].max_value); - size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", + size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].max_value); - size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", + size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FREQ3].min_value, od8_settings[OD8_SETTING_GFXCLK_FREQ3].max_value); - size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", + size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].max_value); } @@ -3982,14 +3980,6 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) DpmActivityMonitorCoeffInt_t activity_monitor; uint32_t i, size = 0; uint16_t workload_type = 0; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -4023,7 +4013,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) return result); size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", - i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " "); size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 01168b8955bf..2d718c30c8eb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -277,8 +277,12 @@ static int smu_dpm_set_power_gate(void *handle, struct smu_context *smu = handle; int ret = 0; - if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) { + dev_WARN(smu->adev->dev, + "SMU uninitialized but power %s requested for %u!\n", + gate ? "gate" : "ungate", block_type); return -EOPNOTSUPP; + } switch (block_type) { /* @@ -1153,6 +1157,8 @@ static int smu_smc_hw_setup(struct smu_context *smu) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 0, 12): ret = smu_system_features_control(smu, true); + if (ret) + dev_err(adev->dev, "Failed system features control!\n"); break; default: break; @@ -1277,8 +1283,10 @@ static int smu_smc_hw_setup(struct smu_context *smu) } ret = smu_notify_display_change(smu); - if (ret) + if (ret) { + dev_err(adev->dev, "Failed to notify display change!\n"); return ret; + } /* * Set min deep sleep dce fclk with bootup value from vbios via @@ -1286,8 +1294,6 @@ static int smu_smc_hw_setup(struct smu_context *smu) */ ret = smu_set_min_dcef_deep_sleep(smu, smu->smu_table.boot_values.dcefclk / 100); - if (ret) - return ret; return ret; } @@ -1344,7 +1350,6 @@ static int smu_hw_init(void *handle) } if (smu->is_apu) { - smu_powergate_sdma(&adev->smu, false); smu_dpm_set_vcn_enable(smu, true); smu_dpm_set_jpeg_enable(smu, true); smu_set_gfx_cgpg(&adev->smu, true); @@ -1468,7 +1473,7 @@ static int smu_disable_dpms(struct smu_context *smu) dev_err(adev->dev, "Failed to disable smu features.\n"); } - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0) && + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); @@ -1506,10 +1511,6 @@ static int smu_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if (smu->is_apu) { - smu_powergate_sdma(&adev->smu, true); - } - smu_dpm_set_vcn_enable(smu, false); smu_dpm_set_jpeg_enable(smu, false); @@ -3072,6 +3073,20 @@ int smu_set_light_sbr(struct smu_context *smu, bool enable) return ret; } +int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc) +{ + int ret = -EOPNOTSUPP; + + mutex_lock(&smu->mutex); + if (smu->ppt_funcs && + smu->ppt_funcs->get_ecc_info) + ret = smu->ppt_funcs->get_ecc_info(smu, umc_ecc); + mutex_unlock(&smu->mutex); + + return ret; + +} + static int smu_get_prv_buffer_details(void *handle, void **addr, size_t *size) { struct smu_context *smu = handle; @@ -3161,3 +3176,107 @@ int smu_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, return ret; } + +int smu_stb_collect_info(struct smu_context *smu, void *buf, uint32_t size) +{ + + if (!smu->ppt_funcs->stb_collect_info || !smu->stb_context.enabled) + return -EOPNOTSUPP; + + /* Confirm the buffer allocated is of correct size */ + if (size != smu->stb_context.stb_buf_size) + return -EINVAL; + + /* + * No need to lock smu mutex as we access STB directly through MMIO + * and not going through SMU messaging route (for now at least). + * For registers access rely on implementation internal locking. + */ + return smu->ppt_funcs->stb_collect_info(smu, buf, size); +} + +#if defined(CONFIG_DEBUG_FS) + +static int smu_stb_debugfs_open(struct inode *inode, struct file *filp) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct smu_context *smu = &adev->smu; + unsigned char *buf; + int r; + + buf = kvmalloc_array(smu->stb_context.stb_buf_size, sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + r = smu_stb_collect_info(smu, buf, smu->stb_context.stb_buf_size); + if (r) + goto out; + + filp->private_data = buf; + + return 0; + +out: + kvfree(buf); + return r; +} + +static ssize_t smu_stb_debugfs_read(struct file *filp, char __user *buf, size_t size, + loff_t *pos) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct smu_context *smu = &adev->smu; + + + if (!filp->private_data) + return -EINVAL; + + return simple_read_from_buffer(buf, + size, + pos, filp->private_data, + smu->stb_context.stb_buf_size); +} + +static int smu_stb_debugfs_release(struct inode *inode, struct file *filp) +{ + kvfree(filp->private_data); + filp->private_data = NULL; + + return 0; +} + +/* + * We have to define not only read method but also + * open and release because .read takes up to PAGE_SIZE + * data each time so and so is invoked multiple times. + * We allocate the STB buffer in .open and release it + * in .release + */ +static const struct file_operations smu_stb_debugfs_fops = { + .owner = THIS_MODULE, + .open = smu_stb_debugfs_open, + .read = smu_stb_debugfs_read, + .release = smu_stb_debugfs_release, + .llseek = default_llseek, +}; + +#endif + +void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + + struct smu_context *smu = &adev->smu; + + if (!smu->stb_context.stb_buf_size) + return; + + debugfs_create_file_size("amdgpu_smu_stb_dump", + S_IRUSR, + adev_to_drm(adev)->primary->debugfs_root, + adev, + &smu_stb_debugfs_fops, + smu->stb_context.stb_buf_size); +#endif + +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index fd1d30a93db5..58bc387fb279 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -295,16 +295,6 @@ static int arcturus_allocate_dpm_context(struct smu_context *smu) return -ENOMEM; smu_dpm->dpm_context_size = sizeof(struct smu_11_0_dpm_context); - smu_dpm->dpm_current_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_current_power_state) - return -ENOMEM; - - smu_dpm->dpm_request_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_request_power_state) - return -ENOMEM; - return 0; } @@ -1389,14 +1379,6 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, char *buf) { DpmActivityMonitorCoeffInt_t activity_monitor; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -1453,7 +1435,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, } size += sysfs_emit_at(buf, size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); if (smu_version >= 0x360d00) { size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 60a557068ea4..2bb7816b245a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1713,14 +1713,6 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) DpmActivityMonitorCoeffInt_t activity_monitor; uint32_t i, size = 0; int16_t workload_type = 0; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -1759,7 +1751,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) } size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a4108025fe29..777f717c37ae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -80,6 +80,9 @@ (*member) = (smu->smu_table.driver_pptable + offsetof(PPTable_t, field));\ } while(0) +/* STB FIFO depth is in 64bit units */ +#define SIENNA_CICHLID_STB_DEPTH_UNIT_BYTES 8 + static int get_table_size(struct smu_context *smu) { if (smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 13)) @@ -650,6 +653,8 @@ static int sienna_cichlid_allocate_dpm_context(struct smu_context *smu) return 0; } +static void sienna_cichlid_stb_init(struct smu_context *smu); + static int sienna_cichlid_init_smc_tables(struct smu_context *smu) { int ret = 0; @@ -662,6 +667,8 @@ static int sienna_cichlid_init_smc_tables(struct smu_context *smu) if (ret) return ret; + sienna_cichlid_stb_init(smu); + return smu_v11_0_init_smc_tables(smu); } @@ -1171,7 +1178,7 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t mask) { struct amdgpu_device *adev = smu->adev; - int ret = 0, size = 0; + int ret = 0; uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0; soft_min_level = mask ? (ffs(mask) - 1) : 0; @@ -1216,7 +1223,7 @@ forec_level_out: if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK)) amdgpu_gfx_off_ctrl(adev, true); - return size; + return 0; } static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) @@ -1342,14 +1349,6 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * &(activity_monitor_external.DpmActivityMonitorCoeffInt); uint32_t i, size = 0; int16_t workload_type = 0; - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; static const char *title[] = { "PROFILE_INDEX(NAME)", "CLOCK_TYPE(NAME)", @@ -1388,7 +1387,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * } size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", @@ -2135,7 +2134,13 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu, static int sienna_cichlid_run_btc(struct smu_context *smu) { - return smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); + int res; + + res = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); + if (res) + dev_err(smu->adev->dev, "RunDcBtc failed!\n"); + + return res; } static int sienna_cichlid_baco_enter(struct smu_context *smu) @@ -3619,6 +3624,16 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, gpu_metrics->energy_accumulator = use_metrics_v2 ? metrics_v2->EnergyAccumulator : metrics->EnergyAccumulator; + if (metrics->CurrGfxVoltageOffset) + gpu_metrics->voltage_gfx = + (155000 - 625 * metrics->CurrGfxVoltageOffset) / 100; + if (metrics->CurrMemVidOffset) + gpu_metrics->voltage_mem = + (155000 - 625 * metrics->CurrMemVidOffset) / 100; + if (metrics->CurrSocVoltageOffset) + gpu_metrics->voltage_soc = + (155000 - 625 * metrics->CurrSocVoltageOffset) / 100; + average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = @@ -3793,6 +3808,53 @@ static int sienna_cichlid_set_mp1_state(struct smu_context *smu, return ret; } +static void sienna_cichlid_stb_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t reg; + + reg = RREG32_PCIE(MP1_Public | smnMP1_PMI_3_START); + smu->stb_context.enabled = REG_GET_FIELD(reg, MP1_PMI_3_START, ENABLE); + + /* STB is disabled */ + if (!smu->stb_context.enabled) + return; + + spin_lock_init(&smu->stb_context.lock); + + /* STB buffer size in bytes as function of FIFO depth */ + reg = RREG32_PCIE(MP1_Public | smnMP1_PMI_3_FIFO); + smu->stb_context.stb_buf_size = 1 << REG_GET_FIELD(reg, MP1_PMI_3_FIFO, DEPTH); + smu->stb_context.stb_buf_size *= SIENNA_CICHLID_STB_DEPTH_UNIT_BYTES; + + dev_info(smu->adev->dev, "STB initialized to %d entries", + smu->stb_context.stb_buf_size / SIENNA_CICHLID_STB_DEPTH_UNIT_BYTES); + +} + +int sienna_cichlid_stb_get_data_direct(struct smu_context *smu, + void *buf, + uint32_t size) +{ + uint32_t *p = buf; + struct amdgpu_device *adev = smu->adev; + + /* No need to disable interrupts for now as we don't lock it yet from ISR */ + spin_lock(&smu->stb_context.lock); + + /* + * Read the STB FIFO in units of 32bit since this is the accessor window + * (register width) we have. + */ + buf = ((char *) buf) + size; + while ((void *)p < buf) + *p++ = cpu_to_le32(RREG32_PCIE(MP1_Public | smnMP1_PMI_3)); + + spin_unlock(&smu->stb_context.lock); + + return 0; +} + static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_allowed_feature_mask = sienna_cichlid_get_allowed_feature_mask, .set_default_dpm_table = sienna_cichlid_set_default_dpm_table, @@ -3882,6 +3944,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .interrupt_work = smu_v11_0_interrupt_work, .gpo_control = sienna_cichlid_gpo_control, .set_mp1_state = sienna_cichlid_set_mp1_state, + .stb_collect_info = sienna_cichlid_stb_get_data_direct, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index c02ed65ffa38..5cb07ed227fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1039,14 +1039,6 @@ failed: static int vangogh_get_power_profile_mode(struct smu_context *smu, char *buf) { - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; uint32_t i, size = 0; int16_t workload_type = 0; @@ -1066,7 +1058,7 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu, continue; size += sysfs_emit_at(buf, size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } return size; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 145f13b8c977..25c4b135f830 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1095,14 +1095,6 @@ static int renoir_set_watermarks_table( static int renoir_get_power_profile_mode(struct smu_context *smu, char *buf) { - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; uint32_t i, size = 0; int16_t workload_type = 0; @@ -1121,7 +1113,7 @@ static int renoir_get_power_profile_mode(struct smu_context *smu, continue; size += sysfs_emit_at(buf, size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } return size; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index d60b8c5e8715..43028f2cd28b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -191,6 +191,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 59a7d276541d..0907da022197 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -78,6 +78,12 @@ #define smnPCIE_ESM_CTRL 0x111003D0 +/* + * SMU support ECCTABLE since version 68.42.0, + * use this to check ECCTALE feature whether support + */ +#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00 + static const struct smu_temperature_range smu13_thermal_policy[] = { {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, @@ -190,6 +196,7 @@ static const struct cmn2asic_mapping aldebaran_table_map[SMU_TABLE_COUNT] = { TAB_MAP(SMU_METRICS), TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(I2C_COMMANDS), + TAB_MAP(ECCINFO), }; static const uint8_t aldebaran_throttler_map[] = { @@ -223,6 +230,9 @@ static int aldebaran_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; @@ -235,6 +245,10 @@ static int aldebaran_tables_init(struct smu_context *smu) return -ENOMEM; } + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); + if (!smu_table->ecc_table) + return -ENOMEM; + return 0; } @@ -248,16 +262,6 @@ static int aldebaran_allocate_dpm_context(struct smu_context *smu) return -ENOMEM; smu_dpm->dpm_context_size = sizeof(struct smu_13_0_dpm_context); - smu_dpm->dpm_current_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_current_power_state) - return -ENOMEM; - - smu_dpm->dpm_request_power_state = kzalloc(sizeof(struct smu_power_state), - GFP_KERNEL); - if (!smu_dpm->dpm_request_power_state) - return -ENOMEM; - return 0; } @@ -1765,6 +1769,98 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static int aldebaran_check_ecc_table_support(struct smu_context *smu) +{ + uint32_t if_version = 0xff, smu_version = 0xff; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); + if (ret) { + /* return not support if failed get smu_version */ + ret = -EOPNOTSUPP; + } + + if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION) + ret = -EOPNOTSUPP; + + return ret; +} + +static ssize_t aldebaran_get_ecc_info(struct smu_context *smu, + void *table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + EccInfoTable_t *ecc_table = NULL; + struct ecc_info_per_ch *ecc_info_per_channel = NULL; + int i, ret = 0; + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; + + ret = aldebaran_check_ecc_table_support(smu); + if (ret) + return ret; + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ECCINFO, + 0, + smu_table->ecc_table, + false); + if (ret) { + dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n"); + return ret; + } + + ecc_table = (EccInfoTable_t *)smu_table->ecc_table; + + for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) { + ecc_info_per_channel = &(eccinfo->ecc[i]); + ecc_info_per_channel->ce_count_lo_chip = + ecc_table->EccInfo[i].ce_count_lo_chip; + ecc_info_per_channel->ce_count_hi_chip = + ecc_table->EccInfo[i].ce_count_hi_chip; + ecc_info_per_channel->mca_umc_status = + ecc_table->EccInfo[i].mca_umc_status; + ecc_info_per_channel->mca_umc_addr = + ecc_table->EccInfo[i].mca_umc_addr; + } + + return ret; +} + +static int aldebaran_mode1_reset(struct smu_context *smu) +{ + u32 smu_version, fatal_err, param; + int ret = 0; + struct amdgpu_device *adev = smu->adev; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + fatal_err = 0; + param = SMU_RESET_MODE_1; + + /* + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07 + */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (smu_version < 0x00440700) { + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + } + else { + /* fatal error triggered by ras, PMFW supports the flag + from 68.44.0 */ + if ((smu_version >= 0x00442c00) && ras && + atomic_read(&ras->in_recovery)) + fatal_err = 1; + + param |= (fatal_err << 16); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GfxDeviceDriverReset, param, NULL); + } + + if (!ret) + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + static int aldebaran_mode2_reset(struct smu_context *smu) { u32 smu_version; @@ -1925,13 +2021,14 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .get_gpu_metrics = aldebaran_get_gpu_metrics, .mode1_reset_is_support = aldebaran_is_mode1_reset_supported, .mode2_reset_is_support = aldebaran_is_mode2_reset_supported, - .mode1_reset = smu_v13_0_mode1_reset, + .mode1_reset = aldebaran_mode1_reset, .set_mp1_state = aldebaran_set_mp1_state, .mode2_reset = aldebaran_mode2_reset, .wait_for_event = smu_v13_0_wait_for_event, .i2c_init = aldebaran_i2c_control_init, .i2c_fini = aldebaran_i2c_control_fini, .send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num, + .get_ecc_info = aldebaran_get_ecc_info, }; void aldebaran_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 35145db6eedf..677a246212f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); #define SMU13_VOLTAGE_SCALE 4 -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms - #define LINK_WIDTH_MAX 6 #define LINK_SPEED_MAX 3 @@ -198,6 +196,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) int smu_v13_0_check_fw_version(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; uint8_t smu_minor, smu_debug; @@ -210,6 +209,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu_major = (smu_version >> 16) & 0xffff; smu_minor = (smu_version >> 8) & 0xff; smu_debug = (smu_version >> 0) & 0xff; + if (smu->is_apu) + adev->pm.fw_version = smu_version; switch (smu->adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): @@ -226,9 +227,6 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) break; } - dev_info(smu->adev->dev, "smu fw reported version = 0x%08x (%d.%d.%d)\n", - smu_version, smu_major, smu_minor, smu_debug); - /* * 1. if_version mismatch is not critical as our fw is designed * to be backward compatible. @@ -430,8 +428,10 @@ int smu_v13_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = NULL; + kfree(smu_table->ecc_table); kfree(smu_table->metrics_table); kfree(smu_table->watermarks_table); + smu_table->ecc_table = NULL; smu_table->metrics_table = NULL; smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; @@ -1424,25 +1424,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu) return ret; } -int smu_v13_0_mode1_reset(struct smu_context *smu) -{ - u32 smu_version; - int ret = 0; - /* - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07 - */ - smu_cmn_get_smc_version(smu, NULL, &smu_version); - if (smu_version < 0x00440700) - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); - else - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL); - - if (!ret) - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); - - return ret; -} - static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu, uint64_t event_arg) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index ea6f50c08c5f..ee1a312fd497 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -94,10 +94,10 @@ static void smu_cmn_read_arg(struct smu_context *smu, /** * __smu_cmn_poll_stat -- poll for a status from the SMU - * smu: a pointer to SMU context + * @smu: a pointer to SMU context * * Returns the status of the SMU, which could be, - * 0, the SMU is busy with your previous command; + * 0, the SMU is busy with your command; * 1, execution status: success, execution result: success; * 0xFF, execution status: success, execution result: failure; * 0xFE, unknown command; @@ -257,10 +257,11 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu, uint16_t msg_index, uint32_t param) { + struct amdgpu_device *adev = smu->adev; u32 reg; int res; - if (smu->adev->no_hw_access) + if (adev->no_hw_access) return 0; reg = __smu_cmn_poll_stat(smu); @@ -272,6 +273,12 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu, __smu_cmn_send_msg(smu, msg_index, param); res = 0; Out: + if (unlikely(adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && + res && (res != -ETIME)) { + amdgpu_device_halt(adev); + WARN_ON(1); + } + return res; } @@ -288,9 +295,18 @@ Out: int smu_cmn_wait_for_response(struct smu_context *smu) { u32 reg; + int res; reg = __smu_cmn_poll_stat(smu); - return __smu_cmn_reg2errno(smu, reg); + res = __smu_cmn_reg2errno(smu, reg); + + if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && + res && (res != -ETIME)) { + amdgpu_device_halt(smu->adev); + WARN_ON(1); + } + + return res; } /** @@ -328,10 +344,11 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, uint32_t param, uint32_t *read_arg) { + struct amdgpu_device *adev = smu->adev; int res, index; u32 reg; - if (smu->adev->no_hw_access) + if (adev->no_hw_access) return 0; index = smu_cmn_to_asic_specific_index(smu, @@ -352,11 +369,16 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, __smu_cmn_send_msg(smu, (uint16_t) index, param); reg = __smu_cmn_poll_stat(smu); res = __smu_cmn_reg2errno(smu, reg); - if (res == -EREMOTEIO) + if (res != 0) __smu_cmn_reg_print_error(smu, reg, index, param, msg); if (read_arg) smu_cmn_read_arg(smu, read_arg); Out: + if (unlikely(adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && res) { + amdgpu_device_halt(adev); + WARN_ON(1); + } + mutex_unlock(&smu->message_lock); return res; } diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig index 3a9e966e0e78..58a242871b28 100644 --- a/drivers/gpu/drm/arm/Kconfig +++ b/drivers/gpu/drm/arm/Kconfig @@ -6,7 +6,6 @@ config DRM_HDLCD depends on DRM && OF && (ARM || ARM64 || COMPILE_TEST) depends on COMMON_CLK select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER help Choose this option if you have an ARM High Definition Colour LCD controller. @@ -27,7 +26,6 @@ config DRM_MALI_DISPLAY depends on DRM && OF && (ARM || ARM64 || COMPILE_TEST) depends on COMMON_CLK select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select VIDEOMODE_HELPERS help diff --git a/drivers/gpu/drm/arm/display/Kconfig b/drivers/gpu/drm/arm/display/Kconfig index cec0639e3aa1..e91598b60781 100644 --- a/drivers/gpu/drm/arm/display/Kconfig +++ b/drivers/gpu/drm/arm/display/Kconfig @@ -4,7 +4,6 @@ config DRM_KOMEDA depends on DRM && OF depends on COMMON_CLK select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select VIDEOMODE_HELPERS help diff --git a/drivers/gpu/drm/aspeed/Kconfig b/drivers/gpu/drm/aspeed/Kconfig index 5e95bcea43e9..024ccab14f88 100644 --- a/drivers/gpu/drm/aspeed/Kconfig +++ b/drivers/gpu/drm/aspeed/Kconfig @@ -5,7 +5,7 @@ config DRM_ASPEED_GFX depends on (COMPILE_TEST || ARCH_ASPEED) depends on MMU select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DMA_CMA if HAVE_DMA_CONTIGUOUS select CMA if HAVE_DMA_CONTIGUOUS select MFD_SYSCON diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c index b53fee6f1c17..65f172807a0d 100644 --- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c +++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c @@ -291,7 +291,7 @@ vga_pw_show(struct device *dev, struct device_attribute *attr, char *buf) if (rc) return rc; - return sprintf(buf, "%u\n", reg & 1); + return sprintf(buf, "%u\n", reg); } static DEVICE_ATTR_RO(vga_pw); diff --git a/drivers/gpu/drm/ast/Makefile b/drivers/gpu/drm/ast/Makefile index 438a2d05b115..21f71160bc3e 100644 --- a/drivers/gpu/drm/ast/Makefile +++ b/drivers/gpu/drm/ast/Makefile @@ -3,6 +3,6 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -ast-y := ast_drv.o ast_main.o ast_mm.o ast_mode.o ast_post.o ast_dp501.o +ast-y := ast_drv.o ast_i2c.o ast_main.o ast_mm.o ast_mode.o ast_post.o ast_dp501.o obj-$(CONFIG_DRM_AST) := ast.o diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 86d5cd7b6318..6d8613f6fe1c 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -26,7 +26,6 @@ * Authors: Dave Airlie <airlied@redhat.com> */ -#include <linux/console.h> #include <linux/module.h> #include <linux/pci.h> @@ -233,7 +232,7 @@ static struct pci_driver ast_pci_driver = { static int __init ast_init(void) { - if (vgacon_text_force() && ast_modeset == -1) + if (drm_firmware_drivers_only() && ast_modeset == -1) return -EINVAL; if (ast_modeset == 0) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 2cfce7dc95af..00bfa41ff7cb 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -357,4 +357,7 @@ bool ast_dp501_read_edid(struct drm_device *dev, u8 *ediddata); u8 ast_get_dp501_max_clk(struct drm_device *dev); void ast_init_3rdtx(struct drm_device *dev); +/* ast_i2c.c */ +struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev); + #endif diff --git a/drivers/gpu/drm/ast/ast_i2c.c b/drivers/gpu/drm/ast/ast_i2c.c new file mode 100644 index 000000000000..93e91c36d649 --- /dev/null +++ b/drivers/gpu/drm/ast/ast_i2c.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: MIT +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#include <drm/drm_managed.h> +#include <drm/drm_print.h> + +#include "ast_drv.h" + +static void ast_i2c_setsda(void *i2c_priv, int data) +{ + struct ast_i2c_chan *i2c = i2c_priv; + struct ast_private *ast = to_ast_private(i2c->dev); + int i; + u8 ujcrb7, jtemp; + + for (i = 0; i < 0x10000; i++) { + ujcrb7 = ((data & 0x01) ? 0 : 1) << 2; + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7); + jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04); + if (ujcrb7 == jtemp) + break; + } +} + +static void ast_i2c_setscl(void *i2c_priv, int clock) +{ + struct ast_i2c_chan *i2c = i2c_priv; + struct ast_private *ast = to_ast_private(i2c->dev); + int i; + u8 ujcrb7, jtemp; + + for (i = 0; i < 0x10000; i++) { + ujcrb7 = ((clock & 0x01) ? 0 : 1); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7); + jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01); + if (ujcrb7 == jtemp) + break; + } +} + +static int ast_i2c_getsda(void *i2c_priv) +{ + struct ast_i2c_chan *i2c = i2c_priv; + struct ast_private *ast = to_ast_private(i2c->dev); + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); + + return val & 1 ? 1 : 0; +} + +static int ast_i2c_getscl(void *i2c_priv) +{ + struct ast_i2c_chan *i2c = i2c_priv; + struct ast_private *ast = to_ast_private(i2c->dev); + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); + + return val & 1 ? 1 : 0; +} + +static void ast_i2c_release(struct drm_device *dev, void *res) +{ + struct ast_i2c_chan *i2c = res; + + i2c_del_adapter(&i2c->adapter); + kfree(i2c); +} + +struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev) +{ + struct ast_i2c_chan *i2c; + int ret; + + i2c = kzalloc(sizeof(struct ast_i2c_chan), GFP_KERNEL); + if (!i2c) + return NULL; + + i2c->adapter.owner = THIS_MODULE; + i2c->adapter.class = I2C_CLASS_DDC; + i2c->adapter.dev.parent = dev->dev; + i2c->dev = dev; + i2c_set_adapdata(&i2c->adapter, i2c); + snprintf(i2c->adapter.name, sizeof(i2c->adapter.name), + "AST i2c bit bus"); + i2c->adapter.algo_data = &i2c->bit; + + i2c->bit.udelay = 20; + i2c->bit.timeout = 2; + i2c->bit.data = i2c; + i2c->bit.setsda = ast_i2c_setsda; + i2c->bit.setscl = ast_i2c_setscl; + i2c->bit.getsda = ast_i2c_getsda; + i2c->bit.getscl = ast_i2c_getscl; + ret = i2c_bit_add_bus(&i2c->adapter); + if (ret) { + drm_err(dev, "Failed to register bit i2c\n"); + goto out_kfree; + } + + ret = drmm_add_action_or_reset(dev, ast_i2c_release, i2c); + if (ret) + return NULL; + return i2c; + +out_kfree: + kfree(i2c); + return NULL; +} diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1e30eaeb0e1b..44c2aafcb7c2 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -47,9 +47,6 @@ #include "ast_drv.h" #include "ast_tables.h" -static struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev); -static void ast_i2c_destroy(struct ast_i2c_chan *i2c); - static inline void ast_load_palette_index(struct ast_private *ast, u8 index, u8 red, u8 green, u8 blue) @@ -1210,9 +1207,9 @@ static int ast_get_modes(struct drm_connector *connector) { struct ast_connector *ast_connector = to_ast_connector(connector); struct ast_private *ast = to_ast_private(connector->dev); - struct edid *edid; - int ret; + struct edid *edid = NULL; bool flags = false; + int ret; if (ast->tx_chip_type == AST_TX_DP501) { ast->dp501_maxclk = 0xff; @@ -1226,7 +1223,7 @@ static int ast_get_modes(struct drm_connector *connector) else kfree(edid); } - if (!flags) + if (!flags && ast_connector->i2c) edid = drm_get_edid(connector, &ast_connector->i2c->adapter); if (edid) { drm_connector_update_edid_property(&ast_connector->base, edid); @@ -1300,14 +1297,6 @@ static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, return flags; } -static void ast_connector_destroy(struct drm_connector *connector) -{ - struct ast_connector *ast_connector = to_ast_connector(connector); - - ast_i2c_destroy(ast_connector->i2c); - drm_connector_cleanup(connector); -} - static const struct drm_connector_helper_funcs ast_connector_helper_funcs = { .get_modes = ast_get_modes, .mode_valid = ast_mode_valid, @@ -1316,7 +1305,7 @@ static const struct drm_connector_helper_funcs ast_connector_helper_funcs = { static const struct drm_connector_funcs ast_connector_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = ast_connector_destroy, + .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -1332,10 +1321,13 @@ static int ast_connector_init(struct drm_device *dev) if (!ast_connector->i2c) drm_err(dev, "failed to add ddc bus for connector\n"); - drm_connector_init_with_ddc(dev, connector, - &ast_connector_funcs, - DRM_MODE_CONNECTOR_VGA, - &ast_connector->i2c->adapter); + if (ast_connector->i2c) + drm_connector_init_with_ddc(dev, connector, &ast_connector_funcs, + DRM_MODE_CONNECTOR_VGA, + &ast_connector->i2c->adapter); + else + drm_connector_init(dev, connector, &ast_connector_funcs, + DRM_MODE_CONNECTOR_VGA); drm_connector_helper_add(connector, &ast_connector_helper_funcs); @@ -1413,124 +1405,3 @@ int ast_mode_config_init(struct ast_private *ast) return 0; } - -static int get_clock(void *i2c_priv) -{ - struct ast_i2c_chan *i2c = i2c_priv; - struct ast_private *ast = to_ast_private(i2c->dev); - uint32_t val, val2, count, pass; - - count = 0; - pass = 0; - val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; - do { - val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; - if (val == val2) { - pass++; - } else { - pass = 0; - val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; - } - } while ((pass < 5) && (count++ < 0x10000)); - - return val & 1 ? 1 : 0; -} - -static int get_data(void *i2c_priv) -{ - struct ast_i2c_chan *i2c = i2c_priv; - struct ast_private *ast = to_ast_private(i2c->dev); - uint32_t val, val2, count, pass; - - count = 0; - pass = 0; - val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; - do { - val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; - if (val == val2) { - pass++; - } else { - pass = 0; - val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; - } - } while ((pass < 5) && (count++ < 0x10000)); - - return val & 1 ? 1 : 0; -} - -static void set_clock(void *i2c_priv, int clock) -{ - struct ast_i2c_chan *i2c = i2c_priv; - struct ast_private *ast = to_ast_private(i2c->dev); - int i; - u8 ujcrb7, jtemp; - - for (i = 0; i < 0x10000; i++) { - ujcrb7 = ((clock & 0x01) ? 0 : 1); - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7); - jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01); - if (ujcrb7 == jtemp) - break; - } -} - -static void set_data(void *i2c_priv, int data) -{ - struct ast_i2c_chan *i2c = i2c_priv; - struct ast_private *ast = to_ast_private(i2c->dev); - int i; - u8 ujcrb7, jtemp; - - for (i = 0; i < 0x10000; i++) { - ujcrb7 = ((data & 0x01) ? 0 : 1) << 2; - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7); - jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04); - if (ujcrb7 == jtemp) - break; - } -} - -static struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev) -{ - struct ast_i2c_chan *i2c; - int ret; - - i2c = kzalloc(sizeof(struct ast_i2c_chan), GFP_KERNEL); - if (!i2c) - return NULL; - - i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; - i2c->adapter.dev.parent = dev->dev; - i2c->dev = dev; - i2c_set_adapdata(&i2c->adapter, i2c); - snprintf(i2c->adapter.name, sizeof(i2c->adapter.name), - "AST i2c bit bus"); - i2c->adapter.algo_data = &i2c->bit; - - i2c->bit.udelay = 20; - i2c->bit.timeout = 2; - i2c->bit.data = i2c; - i2c->bit.setsda = set_data; - i2c->bit.setscl = set_clock; - i2c->bit.getsda = get_data; - i2c->bit.getscl = get_clock; - ret = i2c_bit_add_bus(&i2c->adapter); - if (ret) { - drm_err(dev, "Failed to register bit i2c\n"); - goto out_free; - } - - return i2c; -out_free: - kfree(i2c); - return NULL; -} - -static void ast_i2c_destroy(struct ast_i2c_chan *i2c) -{ - if (!i2c) - return; - i2c_del_adapter(&i2c->adapter); - kfree(i2c); -} diff --git a/drivers/gpu/drm/atmel-hlcdc/Kconfig b/drivers/gpu/drm/atmel-hlcdc/Kconfig index 5f67f001553b..8ae679f1a518 100644 --- a/drivers/gpu/drm/atmel-hlcdc/Kconfig +++ b/drivers/gpu/drm/atmel-hlcdc/Kconfig @@ -4,7 +4,6 @@ config DRM_ATMEL_HLCDC depends on DRM && OF && COMMON_CLK && MFD_ATMEL_HLCDC && ARM select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_PANEL help Choose this option if you have an ATMEL SoC with an HLCDC display diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 001fb39d9919..2346dbcc505f 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -850,7 +850,7 @@ static int sp_tx_edid_read(struct anx7625_data *ctx, int count, blocks_num; u8 pblock_buf[MAX_DPCD_BUFFER_SIZE]; u8 i, j; - u8 g_edid_break = 0; + int g_edid_break = 0; int ret; struct device *dev = &ctx->client->dev; @@ -881,7 +881,7 @@ static int sp_tx_edid_read(struct anx7625_data *ctx, g_edid_break = edid_read(ctx, offset, pblock_buf); - if (g_edid_break) + if (g_edid_break < 0) break; memcpy(&pedid_blocks_buf[offset], @@ -1636,7 +1636,7 @@ static int anx7625_register_audio(struct device *dev, struct anx7625_data *ctx) sizeof(codec_data)); if (IS_ERR(ctx->audio_pdev)) - return IS_ERR(ctx->audio_pdev); + return PTR_ERR(ctx->audio_pdev); DRM_DEV_DEBUG_DRIVER(dev, "bound to %s", HDMI_CODEC_DRV_NAME); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index d2f45a0f79c8..dafb1b47c15f 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -586,7 +586,7 @@ lt9611_connector_detect(struct drm_connector *connector, bool force) int connected = 0; regmap_read(lt9611->regmap, 0x825e, ®_val); - connected = (reg_val & BIT(2)); + connected = (reg_val & BIT(0)); lt9611->status = connected ? connector_status_connected : connector_status_disconnected; @@ -892,7 +892,7 @@ static enum drm_connector_status lt9611_bridge_detect(struct drm_bridge *bridge) int connected; regmap_read(lt9611->regmap, 0x825e, ®_val); - connected = reg_val & BIT(2); + connected = reg_val & BIT(0); lt9611->status = connected ? connector_status_connected : connector_status_disconnected; diff --git a/drivers/gpu/drm/bridge/lvds-codec.c b/drivers/gpu/drm/bridge/lvds-codec.c index f991842a161f..702ea803a743 100644 --- a/drivers/gpu/drm/bridge/lvds-codec.c +++ b/drivers/gpu/drm/bridge/lvds-codec.c @@ -21,6 +21,7 @@ struct lvds_codec { struct device *dev; struct drm_bridge bridge; struct drm_bridge *panel_bridge; + struct drm_bridge_timings timings; struct regulator *vcc; struct gpio_desc *powerdown_gpio; u32 connector_type; @@ -119,6 +120,7 @@ static int lvds_codec_probe(struct platform_device *pdev) struct device_node *bus_node; struct drm_panel *panel; struct lvds_codec *lvds_codec; + u32 val; int ret; lvds_codec = devm_kzalloc(dev, sizeof(*lvds_codec), GFP_KERNEL); @@ -188,11 +190,24 @@ static int lvds_codec_probe(struct platform_device *pdev) } /* + * Encoder might sample data on different clock edge than the display, + * for example OnSemi FIN3385 has a dedicated strapping pin to select + * the sampling edge. + */ + if (lvds_codec->connector_type == DRM_MODE_CONNECTOR_LVDS && + !of_property_read_u32(dev->of_node, "pclk-sample", &val)) { + lvds_codec->timings.input_bus_flags = val ? + DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE : + DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE; + } + + /* * The panel_bridge bridge is attached to the panel's of_node, * but we need a bridge attached to our of_node for our user * to look up. */ lvds_codec->bridge.of_node = dev->of_node; + lvds_codec->bridge.timings = &lvds_codec->timings; drm_bridge_add(&lvds_codec->bridge); platform_set_drvdata(pdev, lvds_codec); diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index d2808c4a6fb1..cce98bf2a4e7 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -306,19 +306,10 @@ out: mutex_unlock(&ge_b850v3_lvds_dev_mutex); } -static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, - const struct i2c_device_id *id) +static int ge_b850v3_register(void) { + struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; struct device *dev = &stdp4028_i2c->dev; - int ret; - - ret = ge_b850v3_lvds_init(dev); - - if (ret) - return ret; - - ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; - i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); /* drm bridge initialization */ ge_b850v3_lvds_ptr->bridge.funcs = &ge_b850v3_lvds_funcs; @@ -343,6 +334,27 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); } +static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, + const struct i2c_device_id *id) +{ + struct device *dev = &stdp4028_i2c->dev; + int ret; + + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; + + ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; + i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); + + /* Only register after both bridges are probed */ + if (!ge_b850v3_lvds_ptr->stdp2690_i2c) + return 0; + + return ge_b850v3_register(); +} + static int stdp4028_ge_b850v3_fw_remove(struct i2c_client *stdp4028_i2c) { ge_b850v3_lvds_remove(); @@ -386,7 +398,11 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c, ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c; i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr); - return 0; + /* Only register after both bridges are probed */ + if (!ge_b850v3_lvds_ptr->stdp4028_i2c) + return 0; + + return ge_b850v3_register(); } static int stdp2690_ge_b850v3_fw_remove(struct i2c_client *stdp2690_i2c) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 26898042ba3d..818704bf5e86 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -449,6 +449,7 @@ static int ps8640_bridge_attach(struct drm_bridge *bridge, if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) return -EINVAL; + ps_bridge->aux.drm_dev = bridge->dev; ret = drm_dp_aux_register(&ps_bridge->aux); if (ret) { dev_err(dev, "failed to register DP AUX channel: %d\n", ret); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 02b490671f8f..dab8f76618f3 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -213,6 +213,7 @@ static const struct regmap_config ti_sn65dsi86_regmap_config = { .val_bits = 8, .volatile_table = &ti_sn_bridge_volatile_table, .cache_type = REGCACHE_NONE, + .max_register = 0xFF, }; static int __maybe_unused ti_sn65dsi86_read_u16(struct ti_sn65dsi86 *pdata, @@ -704,7 +705,7 @@ static struct ti_sn65dsi86 *bridge_to_ti_sn65dsi86(struct drm_bridge *bridge) static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) { - int ret, val; + int val; struct mipi_dsi_host *host; struct mipi_dsi_device *dsi; struct device *dev = pdata->dev; @@ -714,16 +715,12 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) }; host = of_find_mipi_dsi_host_by_node(pdata->host_node); - if (!host) { - DRM_ERROR("failed to find dsi host\n"); - return -ENODEV; - } + if (!host) + return -EPROBE_DEFER; dsi = devm_mipi_dsi_device_register_full(dev, host, &info); - if (IS_ERR(dsi)) { - DRM_ERROR("failed to create dsi device\n"); + if (IS_ERR(dsi)) return PTR_ERR(dsi); - } /* TODO: setting to 4 MIPI lanes always for now */ dsi->lanes = 4; @@ -739,13 +736,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) pdata->dsi = dsi; - ret = devm_mipi_dsi_attach(dev, dsi); - if (ret < 0) { - DRM_ERROR("failed to attach dsi to host\n"); - return ret; - } - - return 0; + return devm_mipi_dsi_attach(dev, dsi); } static int ti_sn_bridge_attach(struct drm_bridge *bridge, @@ -1267,8 +1258,10 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, drm_bridge_add(&pdata->bridge); ret = ti_sn_attach_host(pdata); - if (ret) + if (ret) { + dev_err_probe(pdata->dev, ret, "failed to attach dsi host\n"); goto err_remove_bridge; + } return 0; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index ff1416cd609a..21174efd91be 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -74,7 +74,7 @@ int drm_crtc_commit_wait(struct drm_crtc_commit *commit) ret = wait_for_completion_timeout(&commit->hw_done, timeout); if (!ret) { - DRM_ERROR("hw_done timed out\n"); + drm_err(commit->crtc->dev, "hw_done timed out\n"); return -ETIMEDOUT; } @@ -84,7 +84,7 @@ int drm_crtc_commit_wait(struct drm_crtc_commit *commit) */ ret = wait_for_completion_timeout(&commit->flip_done, timeout); if (!ret) { - DRM_ERROR("flip_done timed out\n"); + drm_err(commit->crtc->dev, "flip_done timed out\n"); return -ETIMEDOUT; } @@ -140,7 +140,7 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) state->dev = dev; - DRM_DEBUG_ATOMIC("Allocated atomic state %p\n", state); + drm_dbg_atomic(dev, "Allocated atomic state %p\n", state); return 0; fail: @@ -191,7 +191,7 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) struct drm_mode_config *config = &dev->mode_config; int i; - DRM_DEBUG_ATOMIC("Clearing atomic state %p\n", state); + drm_dbg_atomic(dev, "Clearing atomic state %p\n", state); for (i = 0; i < state->num_connector; i++) { struct drm_connector *connector = state->connectors[i].ptr; @@ -301,7 +301,7 @@ void __drm_atomic_state_free(struct kref *ref) drm_atomic_state_clear(state); - DRM_DEBUG_ATOMIC("Freeing atomic state %p\n", state); + drm_dbg_atomic(state->dev, "Freeing atomic state %p\n", state); if (config->funcs->atomic_state_free) { config->funcs->atomic_state_free(state); @@ -358,8 +358,8 @@ drm_atomic_get_crtc_state(struct drm_atomic_state *state, state->crtcs[index].ptr = crtc; crtc_state->state = state; - DRM_DEBUG_ATOMIC("Added [CRTC:%d:%s] %p state to %p\n", - crtc->base.id, crtc->name, crtc_state, state); + drm_dbg_atomic(state->dev, "Added [CRTC:%d:%s] %p state to %p\n", + crtc->base.id, crtc->name, crtc_state, state); return crtc_state; } @@ -379,8 +379,9 @@ static int drm_atomic_crtc_check(const struct drm_crtc_state *old_crtc_state, */ if (new_crtc_state->active && !new_crtc_state->enable) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active without enabled\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] active without enabled\n", + crtc->base.id, crtc->name); return -EINVAL; } @@ -390,15 +391,17 @@ static int drm_atomic_crtc_check(const struct drm_crtc_state *old_crtc_state, */ if (drm_core_check_feature(crtc->dev, DRIVER_ATOMIC) && WARN_ON(new_crtc_state->enable && !new_crtc_state->mode_blob)) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled without mode blob\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] enabled without mode blob\n", + crtc->base.id, crtc->name); return -EINVAL; } if (drm_core_check_feature(crtc->dev, DRIVER_ATOMIC) && WARN_ON(!new_crtc_state->enable && new_crtc_state->mode_blob)) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] disabled with mode blob\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] disabled with mode blob\n", + crtc->base.id, crtc->name); return -EINVAL; } @@ -414,8 +417,9 @@ static int drm_atomic_crtc_check(const struct drm_crtc_state *old_crtc_state, */ if (new_crtc_state->event && !new_crtc_state->active && !old_crtc_state->active) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] requesting event but off\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] requesting event but off\n", + crtc->base.id, crtc->name); return -EINVAL; } @@ -460,8 +464,9 @@ static int drm_atomic_connector_check(struct drm_connector *connector, return 0; if (writeback_job->fb && !state->crtc) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] framebuffer without CRTC\n", - connector->base.id, connector->name); + drm_dbg_atomic(connector->dev, + "[CONNECTOR:%d:%s] framebuffer without CRTC\n", + connector->base.id, connector->name); return -EINVAL; } @@ -470,16 +475,18 @@ static int drm_atomic_connector_check(struct drm_connector *connector, state->crtc); if (writeback_job->fb && !crtc_state->active) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] has framebuffer, but [CRTC:%d] is off\n", - connector->base.id, connector->name, - state->crtc->base.id); + drm_dbg_atomic(connector->dev, + "[CONNECTOR:%d:%s] has framebuffer, but [CRTC:%d] is off\n", + connector->base.id, connector->name, + state->crtc->base.id); return -EINVAL; } if (!writeback_job->fb) { if (writeback_job->out_fence) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", - connector->base.id, connector->name); + drm_dbg_atomic(connector->dev, + "[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", + connector->base.id, connector->name); return -EINVAL; } @@ -537,8 +544,8 @@ drm_atomic_get_plane_state(struct drm_atomic_state *state, state->planes[index].new_state = plane_state; plane_state->state = state; - DRM_DEBUG_ATOMIC("Added [PLANE:%d:%s] %p state to %p\n", - plane->base.id, plane->name, plane_state, state); + drm_dbg_atomic(plane->dev, "Added [PLANE:%d:%s] %p state to %p\n", + plane->base.id, plane->name, plane_state, state); if (plane_state->crtc) { struct drm_crtc_state *crtc_state; @@ -594,12 +601,12 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, /* either *both* CRTC and FB must be set, or neither */ if (crtc && !fb) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] CRTC set but no FB\n", - plane->base.id, plane->name); + drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no FB\n", + plane->base.id, plane->name); return -EINVAL; } else if (fb && !crtc) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] FB set but no CRTC\n", - plane->base.id, plane->name); + drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] FB set but no CRTC\n", + plane->base.id, plane->name); return -EINVAL; } @@ -609,9 +616,10 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, /* Check whether this plane is usable on this CRTC */ if (!(plane->possible_crtcs & drm_crtc_mask(crtc))) { - DRM_DEBUG_ATOMIC("Invalid [CRTC:%d:%s] for [PLANE:%d:%s]\n", - crtc->base.id, crtc->name, - plane->base.id, plane->name); + drm_dbg_atomic(plane->dev, + "Invalid [CRTC:%d:%s] for [PLANE:%d:%s]\n", + crtc->base.id, crtc->name, + plane->base.id, plane->name); return -EINVAL; } @@ -619,9 +627,10 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, ret = drm_plane_check_pixel_format(plane, fb->format->format, fb->modifier); if (ret) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] invalid pixel format %p4cc, modifier 0x%llx\n", - plane->base.id, plane->name, - &fb->format->format, fb->modifier); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] invalid pixel format %p4cc, modifier 0x%llx\n", + plane->base.id, plane->name, + &fb->format->format, fb->modifier); return ret; } @@ -630,10 +639,11 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, new_plane_state->crtc_x > INT_MAX - (int32_t) new_plane_state->crtc_w || new_plane_state->crtc_h > INT_MAX || new_plane_state->crtc_y > INT_MAX - (int32_t) new_plane_state->crtc_h) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] invalid CRTC coordinates %ux%u+%d+%d\n", - plane->base.id, plane->name, - new_plane_state->crtc_w, new_plane_state->crtc_h, - new_plane_state->crtc_x, new_plane_state->crtc_y); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] invalid CRTC coordinates %ux%u+%d+%d\n", + plane->base.id, plane->name, + new_plane_state->crtc_w, new_plane_state->crtc_h, + new_plane_state->crtc_x, new_plane_state->crtc_y); return -ERANGE; } @@ -645,18 +655,19 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, new_plane_state->src_x > fb_width - new_plane_state->src_w || new_plane_state->src_h > fb_height || new_plane_state->src_y > fb_height - new_plane_state->src_h) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] invalid source coordinates " - "%u.%06ux%u.%06u+%u.%06u+%u.%06u (fb %ux%u)\n", - plane->base.id, plane->name, - new_plane_state->src_w >> 16, - ((new_plane_state->src_w & 0xffff) * 15625) >> 10, - new_plane_state->src_h >> 16, - ((new_plane_state->src_h & 0xffff) * 15625) >> 10, - new_plane_state->src_x >> 16, - ((new_plane_state->src_x & 0xffff) * 15625) >> 10, - new_plane_state->src_y >> 16, - ((new_plane_state->src_y & 0xffff) * 15625) >> 10, - fb->width, fb->height); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] invalid source coordinates " + "%u.%06ux%u.%06u+%u.%06u+%u.%06u (fb %ux%u)\n", + plane->base.id, plane->name, + new_plane_state->src_w >> 16, + ((new_plane_state->src_w & 0xffff) * 15625) >> 10, + new_plane_state->src_h >> 16, + ((new_plane_state->src_h & 0xffff) * 15625) >> 10, + new_plane_state->src_x >> 16, + ((new_plane_state->src_x & 0xffff) * 15625) >> 10, + new_plane_state->src_y >> 16, + ((new_plane_state->src_y & 0xffff) * 15625) >> 10, + fb->width, fb->height); return -ENOSPC; } @@ -671,9 +682,10 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, clips->y1 < 0 || clips->x2 > fb_width || clips->y2 > fb_height) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] invalid damage clip %d %d %d %d\n", - plane->base.id, plane->name, clips->x1, - clips->y1, clips->x2, clips->y2); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] invalid damage clip %d %d %d %d\n", + plane->base.id, plane->name, clips->x1, + clips->y1, clips->x2, clips->y2); return -EINVAL; } clips++; @@ -681,8 +693,9 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, } if (plane_switching_crtc(old_plane_state, new_plane_state)) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] switching CRTC directly\n", - plane->base.id, plane->name); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] switching CRTC directly\n", + plane->base.id, plane->name); return -EINVAL; } @@ -846,8 +859,9 @@ drm_atomic_get_private_obj_state(struct drm_atomic_state *state, state->num_private_objs = num_objs; - DRM_DEBUG_ATOMIC("Added new private object %p state %p to %p\n", - obj, obj_state, state); + drm_dbg_atomic(state->dev, + "Added new private object %p state %p to %p\n", + obj, obj_state, state); return obj_state; } @@ -1027,7 +1041,7 @@ drm_atomic_get_connector_state(struct drm_atomic_state *state, state->connectors[index].ptr = connector; connector_state->state = state; - DRM_DEBUG_ATOMIC("Added [CONNECTOR:%d:%s] %p state to %p\n", + drm_dbg_atomic(connector->dev, "Added [CONNECTOR:%d:%s] %p state to %p\n", connector->base.id, connector->name, connector_state, state); @@ -1160,8 +1174,9 @@ drm_atomic_add_encoder_bridges(struct drm_atomic_state *state, if (!encoder) return 0; - DRM_DEBUG_ATOMIC("Adding all bridges for [encoder:%d:%s] to %p\n", - encoder->base.id, encoder->name, state); + drm_dbg_atomic(encoder->dev, + "Adding all bridges for [encoder:%d:%s] to %p\n", + encoder->base.id, encoder->name, state); drm_for_each_bridge_in_chain(encoder, bridge) { /* Skip bridges that don't implement the atomic state hooks. */ @@ -1213,8 +1228,9 @@ drm_atomic_add_affected_connectors(struct drm_atomic_state *state, if (ret) return ret; - DRM_DEBUG_ATOMIC("Adding all current connectors for [CRTC:%d:%s] to %p\n", - crtc->base.id, crtc->name, state); + drm_dbg_atomic(crtc->dev, + "Adding all current connectors for [CRTC:%d:%s] to %p\n", + crtc->base.id, crtc->name, state); /* * Changed connectors are already in @state, so only need to look @@ -1267,8 +1283,9 @@ drm_atomic_add_affected_planes(struct drm_atomic_state *state, WARN_ON(!drm_atomic_get_new_crtc_state(state, crtc)); - DRM_DEBUG_ATOMIC("Adding all current planes for [CRTC:%d:%s] to %p\n", - crtc->base.id, crtc->name, state); + drm_dbg_atomic(crtc->dev, + "Adding all current planes for [CRTC:%d:%s] to %p\n", + crtc->base.id, crtc->name, state); drm_for_each_plane_mask(plane, state->dev, old_crtc_state->plane_mask) { struct drm_plane_state *plane_state = @@ -1308,7 +1325,7 @@ int drm_atomic_check_only(struct drm_atomic_state *state) unsigned int affected_crtc = 0; int i, ret = 0; - DRM_DEBUG_ATOMIC("checking %p\n", state); + drm_dbg_atomic(dev, "checking %p\n", state); for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) requested_crtc |= drm_crtc_mask(crtc); @@ -1316,8 +1333,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state) for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { ret = drm_atomic_plane_check(old_plane_state, new_plane_state); if (ret) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic core check failed\n", - plane->base.id, plane->name); + drm_dbg_atomic(dev, "[PLANE:%d:%s] atomic core check failed\n", + plane->base.id, plane->name); return ret; } } @@ -1325,8 +1342,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state) for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { ret = drm_atomic_crtc_check(old_crtc_state, new_crtc_state); if (ret) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic core check failed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "[CRTC:%d:%s] atomic core check failed\n", + crtc->base.id, crtc->name); return ret; } } @@ -1334,8 +1351,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state) for_each_new_connector_in_state(state, conn, conn_state, i) { ret = drm_atomic_connector_check(conn, conn_state); if (ret) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] atomic core check failed\n", - conn->base.id, conn->name); + drm_dbg_atomic(dev, "[CONNECTOR:%d:%s] atomic core check failed\n", + conn->base.id, conn->name); return ret; } } @@ -1344,8 +1361,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state) ret = config->funcs->atomic_check(state->dev, state); if (ret) { - DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n", - state, ret); + drm_dbg_atomic(dev, "atomic driver check for %p failed: %d\n", + state, ret); return ret; } } @@ -1353,8 +1370,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state) if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] requires full modeset\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "[CRTC:%d:%s] requires full modeset\n", + crtc->base.id, crtc->name); return -EINVAL; } } @@ -1374,8 +1391,9 @@ int drm_atomic_check_only(struct drm_atomic_state *state) * so compositors know what's going on. */ if (affected_crtc != requested_crtc) { - DRM_DEBUG_ATOMIC("driver added CRTC to commit: requested 0x%x, affected 0x%0x\n", - requested_crtc, affected_crtc); + drm_dbg_atomic(dev, + "driver added CRTC to commit: requested 0x%x, affected 0x%0x\n", + requested_crtc, affected_crtc); WARN(!state->allow_modeset, "adding CRTC not allowed without modesets: requested 0x%x, affected 0x%0x\n", requested_crtc, affected_crtc); } @@ -1407,7 +1425,7 @@ int drm_atomic_commit(struct drm_atomic_state *state) if (ret) return ret; - DRM_DEBUG_ATOMIC("committing %p\n", state); + drm_dbg_atomic(state->dev, "committing %p\n", state); return config->funcs->atomic_commit(state->dev, state, false); } @@ -1436,7 +1454,7 @@ int drm_atomic_nonblocking_commit(struct drm_atomic_state *state) if (ret) return ret; - DRM_DEBUG_ATOMIC("committing %p nonblocking\n", state); + drm_dbg_atomic(state->dev, "committing %p nonblocking\n", state); return config->funcs->atomic_commit(state->dev, state, true); } @@ -1633,11 +1651,11 @@ void drm_atomic_print_new_state(const struct drm_atomic_state *state, int i; if (!p) { - DRM_ERROR("invalid drm printer\n"); + drm_err(state->dev, "invalid drm printer\n"); return; } - DRM_DEBUG_ATOMIC("checking %p\n", state); + drm_dbg_atomic(state->dev, "checking %p\n", state); for_each_new_plane_in_state(state, plane, plane_state, i) drm_atomic_plane_print_state(p, plane_state); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 2c0c6ec92820..aef2fbd676e5 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -132,9 +132,10 @@ static int handle_conflicting_encoders(struct drm_atomic_state *state, if (new_encoder) { if (encoder_mask & drm_encoder_mask(new_encoder)) { - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] on [CONNECTOR:%d:%s] already assigned\n", - new_encoder->base.id, new_encoder->name, - connector->base.id, connector->name); + drm_dbg_atomic(connector->dev, + "[ENCODER:%d:%s] on [CONNECTOR:%d:%s] already assigned\n", + new_encoder->base.id, new_encoder->name, + connector->base.id, connector->name); return -EINVAL; } @@ -169,11 +170,12 @@ static int handle_conflicting_encoders(struct drm_atomic_state *state, continue; if (!disable_conflicting_encoders) { - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s] by [CONNECTOR:%d:%s]\n", - encoder->base.id, encoder->name, - connector->state->crtc->base.id, - connector->state->crtc->name, - connector->base.id, connector->name); + drm_dbg_atomic(connector->dev, + "[ENCODER:%d:%s] in use on [CRTC:%d:%s] by [CONNECTOR:%d:%s]\n", + encoder->base.id, encoder->name, + connector->state->crtc->base.id, + connector->state->crtc->name, + connector->base.id, connector->name); ret = -EINVAL; goto out; } @@ -184,10 +186,11 @@ static int handle_conflicting_encoders(struct drm_atomic_state *state, goto out; } - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], disabling [CONNECTOR:%d:%s]\n", - encoder->base.id, encoder->name, - new_conn_state->crtc->base.id, new_conn_state->crtc->name, - connector->base.id, connector->name); + drm_dbg_atomic(connector->dev, + "[ENCODER:%d:%s] in use on [CRTC:%d:%s], disabling [CONNECTOR:%d:%s]\n", + encoder->base.id, encoder->name, + new_conn_state->crtc->base.id, new_conn_state->crtc->name, + connector->base.id, connector->name); crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc); @@ -268,9 +271,10 @@ steal_encoder(struct drm_atomic_state *state, encoder_crtc = old_connector_state->crtc; - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], stealing it\n", - encoder->base.id, encoder->name, - encoder_crtc->base.id, encoder_crtc->name); + drm_dbg_atomic(encoder->dev, + "[ENCODER:%d:%s] in use on [CRTC:%d:%s], stealing it\n", + encoder->base.id, encoder->name, + encoder_crtc->base.id, encoder_crtc->name); set_best_encoder(state, new_connector_state, NULL); @@ -291,9 +295,8 @@ update_connector_routing(struct drm_atomic_state *state, struct drm_encoder *new_encoder; struct drm_crtc_state *crtc_state; - DRM_DEBUG_ATOMIC("Updating routing for [CONNECTOR:%d:%s]\n", - connector->base.id, - connector->name); + drm_dbg_atomic(connector->dev, "Updating routing for [CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); if (old_connector_state->crtc != new_connector_state->crtc) { if (old_connector_state->crtc) { @@ -308,9 +311,8 @@ update_connector_routing(struct drm_atomic_state *state, } if (!new_connector_state->crtc) { - DRM_DEBUG_ATOMIC("Disabling [CONNECTOR:%d:%s]\n", - connector->base.id, - connector->name); + drm_dbg_atomic(connector->dev, "Disabling [CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); set_best_encoder(state, new_connector_state, NULL); @@ -339,8 +341,9 @@ update_connector_routing(struct drm_atomic_state *state, */ if (!state->duplicated && drm_connector_is_unregistered(connector) && crtc_state->active) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n", - connector->base.id, connector->name); + drm_dbg_atomic(connector->dev, + "[CONNECTOR:%d:%s] is not registered\n", + connector->base.id, connector->name); return -EINVAL; } @@ -354,31 +357,33 @@ update_connector_routing(struct drm_atomic_state *state, new_encoder = drm_connector_get_single_encoder(connector); if (!new_encoder) { - DRM_DEBUG_ATOMIC("No suitable encoder found for [CONNECTOR:%d:%s]\n", - connector->base.id, - connector->name); + drm_dbg_atomic(connector->dev, + "No suitable encoder found for [CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); return -EINVAL; } if (!drm_encoder_crtc_ok(new_encoder, new_connector_state->crtc)) { - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] incompatible with [CRTC:%d:%s]\n", - new_encoder->base.id, - new_encoder->name, - new_connector_state->crtc->base.id, - new_connector_state->crtc->name); + drm_dbg_atomic(connector->dev, + "[ENCODER:%d:%s] incompatible with [CRTC:%d:%s]\n", + new_encoder->base.id, + new_encoder->name, + new_connector_state->crtc->base.id, + new_connector_state->crtc->name); return -EINVAL; } if (new_encoder == new_connector_state->best_encoder) { set_best_encoder(state, new_connector_state, new_encoder); - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d:%s]\n", - connector->base.id, - connector->name, - new_encoder->base.id, - new_encoder->name, - new_connector_state->crtc->base.id, - new_connector_state->crtc->name); + drm_dbg_atomic(connector->dev, + "[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d:%s]\n", + connector->base.id, + connector->name, + new_encoder->base.id, + new_encoder->name, + new_connector_state->crtc->base.id, + new_connector_state->crtc->name); return 0; } @@ -389,13 +394,14 @@ update_connector_routing(struct drm_atomic_state *state, crtc_state->connectors_changed = true; - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n", - connector->base.id, - connector->name, - new_encoder->base.id, - new_encoder->name, - new_connector_state->crtc->base.id, - new_connector_state->crtc->name); + drm_dbg_atomic(connector->dev, + "[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n", + connector->base.id, + connector->name, + new_encoder->base.id, + new_encoder->name, + new_connector_state->crtc->base.id, + new_connector_state->crtc->name); return 0; } @@ -443,7 +449,7 @@ mode_fixup(struct drm_atomic_state *state) new_crtc_state, new_conn_state); if (ret) { - DRM_DEBUG_ATOMIC("Bridge atomic check failed\n"); + drm_dbg_atomic(encoder->dev, "Bridge atomic check failed\n"); return ret; } @@ -451,16 +457,18 @@ mode_fixup(struct drm_atomic_state *state) ret = funcs->atomic_check(encoder, new_crtc_state, new_conn_state); if (ret) { - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] check failed\n", - encoder->base.id, encoder->name); + drm_dbg_atomic(encoder->dev, + "[ENCODER:%d:%s] check failed\n", + encoder->base.id, encoder->name); return ret; } } else if (funcs && funcs->mode_fixup) { ret = funcs->mode_fixup(encoder, &new_crtc_state->mode, &new_crtc_state->adjusted_mode); if (!ret) { - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] fixup failed\n", - encoder->base.id, encoder->name); + drm_dbg_atomic(encoder->dev, + "[ENCODER:%d:%s] fixup failed\n", + encoder->base.id, encoder->name); return -EINVAL; } } @@ -483,8 +491,8 @@ mode_fixup(struct drm_atomic_state *state) ret = funcs->mode_fixup(crtc, &new_crtc_state->mode, &new_crtc_state->adjusted_mode); if (!ret) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] fixup failed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] fixup failed\n", + crtc->base.id, crtc->name); return -EINVAL; } } @@ -502,8 +510,9 @@ static enum drm_mode_status mode_valid_path(struct drm_connector *connector, ret = drm_encoder_mode_valid(encoder, mode); if (ret != MODE_OK) { - DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] mode_valid() failed\n", - encoder->base.id, encoder->name); + drm_dbg_atomic(encoder->dev, + "[ENCODER:%d:%s] mode_valid() failed\n", + encoder->base.id, encoder->name); return ret; } @@ -511,14 +520,14 @@ static enum drm_mode_status mode_valid_path(struct drm_connector *connector, ret = drm_bridge_chain_mode_valid(bridge, &connector->display_info, mode); if (ret != MODE_OK) { - DRM_DEBUG_ATOMIC("[BRIDGE] mode_valid() failed\n"); + drm_dbg_atomic(encoder->dev, "[BRIDGE] mode_valid() failed\n"); return ret; } ret = drm_crtc_mode_valid(crtc, mode); if (ret != MODE_OK) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode_valid() failed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(encoder->dev, "[CRTC:%d:%s] mode_valid() failed\n", + crtc->base.id, crtc->name); return ret; } @@ -619,14 +628,14 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); if (!drm_mode_equal(&old_crtc_state->mode, &new_crtc_state->mode)) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode changed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "[CRTC:%d:%s] mode changed\n", + crtc->base.id, crtc->name); new_crtc_state->mode_changed = true; } if (old_crtc_state->enable != new_crtc_state->enable) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enable changed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "[CRTC:%d:%s] enable changed\n", + crtc->base.id, crtc->name); /* * For clarity this assignment is done here, but @@ -641,14 +650,14 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, } if (old_crtc_state->active != new_crtc_state->active) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active changed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "[CRTC:%d:%s] active changed\n", + crtc->base.id, crtc->name); new_crtc_state->active_changed = true; } if (new_crtc_state->enable != has_connectors) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled/connectors mismatch\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "[CRTC:%d:%s] enabled/connectors mismatch\n", + crtc->base.id, crtc->name); return -EINVAL; } @@ -708,10 +717,11 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) continue; - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] needs all connectors, enable: %c, active: %c\n", - crtc->base.id, crtc->name, - new_crtc_state->enable ? 'y' : 'n', - new_crtc_state->active ? 'y' : 'n'); + drm_dbg_atomic(dev, + "[CRTC:%d:%s] needs all connectors, enable: %c, active: %c\n", + crtc->base.id, crtc->name, + new_crtc_state->enable ? 'y' : 'n', + new_crtc_state->active ? 'y' : 'n'); ret = drm_atomic_add_affected_connectors(state, crtc); if (ret != 0) @@ -818,7 +828,8 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, } if (!crtc_state->enable && !can_update_disabled) { - DRM_DEBUG_KMS("Cannot update plane of a disabled CRTC.\n"); + drm_dbg_kms(plane_state->crtc->dev, + "Cannot update plane of a disabled CRTC.\n"); return -EINVAL; } @@ -828,7 +839,8 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); if (hscale < 0 || vscale < 0) { - DRM_DEBUG_KMS("Invalid scaling of plane\n"); + drm_dbg_kms(plane_state->crtc->dev, + "Invalid scaling of plane\n"); drm_rect_debug_print("src: ", &plane_state->src, true); drm_rect_debug_print("dst: ", &plane_state->dst, false); return -ERANGE; @@ -852,7 +864,8 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, return 0; if (!can_position && !drm_rect_equals(dst, &clip)) { - DRM_DEBUG_KMS("Plane must cover entire CRTC\n"); + drm_dbg_kms(plane_state->crtc->dev, + "Plane must cover entire CRTC\n"); drm_rect_debug_print("dst: ", dst, false); drm_rect_debug_print("clip: ", &clip, false); return -EINVAL; @@ -904,8 +917,9 @@ drm_atomic_helper_check_planes(struct drm_device *dev, ret = funcs->atomic_check(plane, state); if (ret) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n", - plane->base.id, plane->name); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] atomic driver check failed\n", + plane->base.id, plane->name); return ret; } } @@ -920,8 +934,9 @@ drm_atomic_helper_check_planes(struct drm_device *dev, ret = funcs->atomic_check(crtc, state); if (ret) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] atomic driver check failed\n", + crtc->base.id, crtc->name); return ret; } } @@ -1049,8 +1064,8 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) funcs = encoder->helper_private; - DRM_DEBUG_ATOMIC("disabling [ENCODER:%d:%s]\n", - encoder->base.id, encoder->name); + drm_dbg_atomic(dev, "disabling [ENCODER:%d:%s]\n", + encoder->base.id, encoder->name); /* * Each encoder has at most one connector (since we always steal @@ -1087,8 +1102,8 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) funcs = crtc->helper_private; - DRM_DEBUG_ATOMIC("disabling [CRTC:%d:%s]\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "disabling [CRTC:%d:%s]\n", + crtc->base.id, crtc->name); /* Right function depends upon target state. */ @@ -1229,8 +1244,8 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state) funcs = crtc->helper_private; if (new_crtc_state->enable && funcs->mode_set_nofb) { - DRM_DEBUG_ATOMIC("modeset on [CRTC:%d:%s]\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "modeset on [CRTC:%d:%s]\n", + crtc->base.id, crtc->name); funcs->mode_set_nofb(crtc); } @@ -1254,8 +1269,8 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state) if (!new_crtc_state->mode_changed) continue; - DRM_DEBUG_ATOMIC("modeset on [ENCODER:%d:%s]\n", - encoder->base.id, encoder->name); + drm_dbg_atomic(dev, "modeset on [ENCODER:%d:%s]\n", + encoder->base.id, encoder->name); /* * Each encoder has at most one connector (since we always steal @@ -1357,8 +1372,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, funcs = crtc->helper_private; if (new_crtc_state->enable) { - DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(dev, "enabling [CRTC:%d:%s]\n", + crtc->base.id, crtc->name); if (funcs->atomic_enable) funcs->atomic_enable(crtc, old_state); else if (funcs->commit) @@ -1381,8 +1396,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, encoder = new_conn_state->best_encoder; funcs = encoder->helper_private; - DRM_DEBUG_ATOMIC("enabling [ENCODER:%d:%s]\n", - encoder->base.id, encoder->name); + drm_dbg_atomic(dev, "enabling [ENCODER:%d:%s]\n", + encoder->base.id, encoder->name); /* * Each encoder has at most one connector (since we always steal @@ -1551,8 +1566,8 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev, ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ); if (ret == 0) - DRM_ERROR("[CRTC:%d:%s] flip_done timed out\n", - crtc->base.id, crtc->name); + drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n", + crtc->base.id, crtc->name); } if (old_state->fake_commit) @@ -1739,8 +1754,9 @@ int drm_atomic_helper_async_check(struct drm_device *dev, */ if (old_plane_state->commit && !try_wait_for_completion(&old_plane_state->commit->hw_done)) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] inflight previous commit preventing async commit\n", - plane->base.id, plane->name); + drm_dbg_atomic(dev, + "[PLANE:%d:%s] inflight previous commit preventing async commit\n", + plane->base.id, plane->name); return -EBUSY; } @@ -1962,8 +1978,9 @@ static int stall_checks(struct drm_crtc *crtc, bool nonblock) */ if (!completed && nonblock) { spin_unlock(&crtc->commit_lock); - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] busy with a previous commit\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] busy with a previous commit\n", + crtc->base.id, crtc->name); return -EBUSY; } @@ -1985,8 +2002,8 @@ static int stall_checks(struct drm_crtc *crtc, bool nonblock) ret = wait_for_completion_interruptible_timeout(&stall_commit->cleanup_done, 10*HZ); if (ret == 0) - DRM_ERROR("[CRTC:%d:%s] cleanup_done timed out\n", - crtc->base.id, crtc->name); + drm_err(crtc->dev, "[CRTC:%d:%s] cleanup_done timed out\n", + crtc->base.id, crtc->name); drm_crtc_commit_put(stall_commit); @@ -2150,8 +2167,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, */ if (nonblock && old_conn_state->commit && !try_wait_for_completion(&old_conn_state->commit->flip_done)) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] busy with a previous commit\n", - conn->base.id, conn->name); + drm_dbg_atomic(conn->dev, + "[CONNECTOR:%d:%s] busy with a previous commit\n", + conn->base.id, conn->name); return -EBUSY; } @@ -2171,8 +2189,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, */ if (nonblock && old_plane_state->commit && !try_wait_for_completion(&old_plane_state->commit->flip_done)) { - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] busy with a previous commit\n", - plane->base.id, plane->name); + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] busy with a previous commit\n", + plane->base.id, plane->name); return -EBUSY; } @@ -2218,22 +2237,25 @@ void drm_atomic_helper_wait_for_dependencies(struct drm_atomic_state *old_state) for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) { ret = drm_crtc_commit_wait(old_crtc_state->commit); if (ret) - DRM_ERROR("[CRTC:%d:%s] commit wait timed out\n", - crtc->base.id, crtc->name); + drm_err(crtc->dev, + "[CRTC:%d:%s] commit wait timed out\n", + crtc->base.id, crtc->name); } for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { ret = drm_crtc_commit_wait(old_conn_state->commit); if (ret) - DRM_ERROR("[CONNECTOR:%d:%s] commit wait timed out\n", - conn->base.id, conn->name); + drm_err(conn->dev, + "[CONNECTOR:%d:%s] commit wait timed out\n", + conn->base.id, conn->name); } for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { ret = drm_crtc_commit_wait(old_plane_state->commit); if (ret) - DRM_ERROR("[PLANE:%d:%s] commit wait timed out\n", - plane->base.id, plane->name); + drm_err(plane->dev, + "[PLANE:%d:%s] commit wait timed out\n", + plane->base.id, plane->name); } } EXPORT_SYMBOL(drm_atomic_helper_wait_for_dependencies); @@ -3120,7 +3142,9 @@ void drm_atomic_helper_shutdown(struct drm_device *dev) ret = drm_atomic_helper_disable_all(dev, &ctx); if (ret) - DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret); + drm_err(dev, + "Disabling all crtc's during unload failed with %i\n", + ret); DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); } @@ -3380,8 +3404,9 @@ static int page_flip_common(struct drm_atomic_state *state, /* Make sure we don't accidentally do a full modeset. */ state->allow_modeset = false; if (!crtc_state->active) { - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] disabled, rejecting legacy flip\n", - crtc->base.id, crtc->name); + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] disabled, rejecting legacy flip\n", + crtc->base.id, crtc->name); return -EINVAL; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index cdd31fc78bfc..9781722519c3 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -773,7 +773,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, state->scaling_mode = val; } else if (property == config->content_protection_property) { if (val == DRM_MODE_CONTENT_PROTECTION_ENABLED) { - DRM_DEBUG_KMS("only drivers can set CP Enabled\n"); + drm_dbg_kms(dev, "only drivers can set CP Enabled\n"); return -EINVAL; } state->content_protection = val; diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 60a6b21474b1..6e433d465f41 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -106,7 +106,7 @@ int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) auth->magic = file_priv->magic; mutex_unlock(&dev->master_mutex); - DRM_DEBUG("%u\n", auth->magic); + drm_dbg_core(dev, "%u\n", auth->magic); return ret < 0 ? ret : 0; } @@ -117,7 +117,7 @@ int drm_authmagic(struct drm_device *dev, void *data, struct drm_auth *auth = data; struct drm_file *file; - DRM_DEBUG("%u\n", auth->magic); + drm_dbg_core(dev, "%u\n", auth->magic); mutex_lock(&dev->master_mutex); file = idr_find(&file_priv->master->magic_map, auth->magic); @@ -274,7 +274,9 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, } if (file_priv->master->lessor != NULL) { - DRM_DEBUG_LEASE("Attempt to set lessee %d as master\n", file_priv->master->lessee_id); + drm_dbg_lease(dev, + "Attempt to set lessee %d as master\n", + file_priv->master->lessee_id); ret = -EINVAL; goto out_unlock; } @@ -315,7 +317,9 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data, } if (file_priv->master->lessor != NULL) { - DRM_DEBUG_LEASE("Attempt to drop lessee %d as master\n", file_priv->master->lessee_id); + drm_dbg_lease(dev, + "Attempt to drop lessee %d as master\n", + file_priv->master->lessee_id); ret = -EINVAL; goto out_unlock; } diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index dbe3e830096e..0f28dd2bdd72 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -409,6 +409,61 @@ void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch, } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888_toio); +static void drm_fb_xrgb8888_to_xrgb2101010_line(u32 *dbuf, const u32 *sbuf, + unsigned int pixels) +{ + unsigned int x; + u32 val32; + + for (x = 0; x < pixels; x++) { + val32 = ((sbuf[x] & 0x000000FF) << 2) | + ((sbuf[x] & 0x0000FF00) << 4) | + ((sbuf[x] & 0x00FF0000) << 6); + *dbuf++ = val32 | ((val32 >> 8) & 0x00300C03); + } +} + +/** + * drm_fb_xrgb8888_to_xrgb2101010_toio - Convert XRGB8888 to XRGB2101010 clip + * buffer + * @dst: XRGB2101010 destination buffer (iomem) + * @dst_pitch: Number of bytes between two consecutive scanlines within dst + * @vaddr: XRGB8888 source buffer + * @fb: DRM framebuffer + * @clip: Clip rectangle area to copy + * + * Drivers can use this function for XRGB2101010 devices that don't natively + * support XRGB8888. + */ +void drm_fb_xrgb8888_to_xrgb2101010_toio(void __iomem *dst, + unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, + const struct drm_rect *clip) +{ + size_t linepixels = clip->x2 - clip->x1; + size_t dst_len = linepixels * sizeof(u32); + unsigned int y, lines = clip->y2 - clip->y1; + void *dbuf; + + if (!dst_pitch) + dst_pitch = dst_len; + + dbuf = kmalloc(dst_len, GFP_KERNEL); + if (!dbuf) + return; + + vaddr += clip_offset(clip, fb->pitches[0], sizeof(u32)); + for (y = 0; y < lines; y++) { + drm_fb_xrgb8888_to_xrgb2101010_line(dbuf, vaddr, linepixels); + memcpy_toio(dst, dbuf, dst_len); + vaddr += fb->pitches[0]; + dst += dst_pitch; + } + + kfree(dbuf); +} +EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010_toio); + /** * drm_fb_xrgb8888_to_gray8 - Convert XRGB8888 to grayscale * @dst: 8-bit grayscale destination buffer @@ -500,6 +555,10 @@ int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_for fb_format = DRM_FORMAT_XRGB8888; if (dst_format == DRM_FORMAT_ARGB8888) dst_format = DRM_FORMAT_XRGB8888; + if (fb_format == DRM_FORMAT_ARGB2101010) + fb_format = DRM_FORMAT_XRGB2101010; + if (dst_format == DRM_FORMAT_ARGB2101010) + dst_format = DRM_FORMAT_XRGB2101010; if (dst_format == fb_format) { drm_fb_memcpy_toio(dst, dst_pitch, vmap, fb, clip); @@ -515,6 +574,11 @@ int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_for drm_fb_xrgb8888_to_rgb888_toio(dst, dst_pitch, vmap, fb, clip); return 0; } + } else if (dst_format == DRM_FORMAT_XRGB2101010) { + if (fb_format == DRM_FORMAT_XRGB8888) { + drm_fb_xrgb8888_to_xrgb2101010_toio(dst, dst_pitch, vmap, fb, clip); + return 0; + } } return -EINVAL; diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 25837b1d6639..07741b678798 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -269,6 +269,9 @@ const struct drm_format_info *__drm_format_info(u32 format) .num_planes = 3, .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0, .vsub = 0, .is_yuv = true }, + { .format = DRM_FORMAT_P030, .depth = 0, .num_planes = 2, + .char_per_block = { 4, 8, 0 }, .block_w = { 3, 3, 0 }, .block_h = { 1, 1, 0 }, + .hsub = 2, .vsub = 2, .is_yuv = true}, }; unsigned int i; diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 09e2cb80de08..cefd0cbf9deb 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -32,14 +32,18 @@ * The DRM GEM/CMA helpers use this allocator as a means to provide buffer * objects that are physically contiguous in memory. This is useful for * display drivers that are unable to map scattered buffers via an IOMMU. + * + * For GEM callback helpers in struct &drm_gem_object functions, see likewise + * named functions with an _object_ infix (e.g., drm_gem_cma_object_vmap() wraps + * drm_gem_cma_vmap()). These helpers perform the necessary type conversion. */ static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = { - .free = drm_gem_cma_free_object, - .print_info = drm_gem_cma_print_info, - .get_sg_table = drm_gem_cma_get_sg_table, - .vmap = drm_gem_cma_vmap, - .mmap = drm_gem_cma_mmap, + .free = drm_gem_cma_object_free, + .print_info = drm_gem_cma_object_print_info, + .get_sg_table = drm_gem_cma_object_get_sg_table, + .vmap = drm_gem_cma_object_vmap, + .mmap = drm_gem_cma_object_mmap, .vm_ops = &drm_gem_cma_vm_ops, }; @@ -63,18 +67,21 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size, bool private) struct drm_gem_object *gem_obj; int ret = 0; - if (drm->driver->gem_create_object) + if (drm->driver->gem_create_object) { gem_obj = drm->driver->gem_create_object(drm, size); - else - gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL); - if (!gem_obj) - return ERR_PTR(-ENOMEM); + if (IS_ERR(gem_obj)) + return ERR_CAST(gem_obj); + cma_obj = to_drm_gem_cma_obj(gem_obj); + } else { + cma_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL); + if (!cma_obj) + return ERR_PTR(-ENOMEM); + gem_obj = &cma_obj->base; + } if (!gem_obj->funcs) gem_obj->funcs = &drm_gem_cma_default_funcs; - cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base); - if (private) { drm_gem_private_object_init(drm, gem_obj, size); @@ -192,18 +199,16 @@ drm_gem_cma_create_with_handle(struct drm_file *file_priv, } /** - * drm_gem_cma_free_object - free resources associated with a CMA GEM object - * @gem_obj: GEM object to free + * drm_gem_cma_free - free resources associated with a CMA GEM object + * @cma_obj: CMA GEM object to free * * This function frees the backing memory of the CMA GEM object, cleans up the * GEM object state and frees the memory used to store the object itself. * If the buffer is imported and the virtual address is set, it is released. - * Drivers using the CMA helpers should set this as their - * &drm_gem_object_funcs.free callback. */ -void drm_gem_cma_free_object(struct drm_gem_object *gem_obj) +void drm_gem_cma_free(struct drm_gem_cma_object *cma_obj) { - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(gem_obj); + struct drm_gem_object *gem_obj = &cma_obj->base; struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(cma_obj->vaddr); if (gem_obj->import_attach) { @@ -224,7 +229,7 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj) kfree(cma_obj); } -EXPORT_SYMBOL_GPL(drm_gem_cma_free_object); +EXPORT_SYMBOL_GPL(drm_gem_cma_free); /** * drm_gem_cma_dumb_create_internal - create a dumb buffer object @@ -371,18 +376,15 @@ EXPORT_SYMBOL_GPL(drm_gem_cma_get_unmapped_area); /** * drm_gem_cma_print_info() - Print &drm_gem_cma_object info for debugfs + * @cma_obj: CMA GEM object * @p: DRM printer * @indent: Tab indentation level - * @obj: GEM object * - * This function can be used as the &drm_driver->gem_print_info callback. - * It prints paddr and vaddr for use in e.g. debugfs output. + * This function prints paddr and vaddr for use in e.g. debugfs output. */ -void drm_gem_cma_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_gem_object *obj) +void drm_gem_cma_print_info(const struct drm_gem_cma_object *cma_obj, + struct drm_printer *p, unsigned int indent) { - const struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); - drm_printf_indent(p, indent, "paddr=%pad\n", &cma_obj->paddr); drm_printf_indent(p, indent, "vaddr=%p\n", cma_obj->vaddr); } @@ -391,18 +393,17 @@ EXPORT_SYMBOL(drm_gem_cma_print_info); /** * drm_gem_cma_get_sg_table - provide a scatter/gather table of pinned * pages for a CMA GEM object - * @obj: GEM object + * @cma_obj: CMA GEM object * - * This function exports a scatter/gather table by - * calling the standard DMA mapping API. Drivers using the CMA helpers should - * set this as their &drm_gem_object_funcs.get_sg_table callback. + * This function exports a scatter/gather table by calling the standard + * DMA mapping API. * * Returns: * A pointer to the scatter/gather table of pinned pages or NULL on failure. */ -struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj) +struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_cma_object *cma_obj) { - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + struct drm_gem_object *obj = &cma_obj->base; struct sg_table *sgt; int ret; @@ -468,23 +469,19 @@ EXPORT_SYMBOL_GPL(drm_gem_cma_prime_import_sg_table); /** * drm_gem_cma_vmap - map a CMA GEM object into the kernel's virtual * address space - * @obj: GEM object + * @cma_obj: CMA GEM object * @map: Returns the kernel virtual address of the CMA GEM object's backing * store. * - * This function maps a buffer into the kernel's - * virtual address space. Since the CMA buffers are already mapped into the - * kernel virtual address space this simply returns the cached virtual - * address. Drivers using the CMA helpers should set this as their DRM - * driver's &drm_gem_object_funcs.vmap callback. + * This function maps a buffer into the kernel's virtual address space. + * Since the CMA buffers are already mapped into the kernel virtual address + * space this simply returns the cached virtual address. * * Returns: * 0 on success, or a negative error code otherwise. */ -int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, struct dma_buf_map *map) { - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); - dma_buf_map_set_vaddr(map, cma_obj->vaddr); return 0; @@ -493,20 +490,19 @@ EXPORT_SYMBOL_GPL(drm_gem_cma_vmap); /** * drm_gem_cma_mmap - memory-map an exported CMA GEM object - * @obj: GEM object + * @cma_obj: CMA GEM object * @vma: VMA for the area to be mapped * * This function maps a buffer into a userspace process's address space. * In addition to the usual GEM VMA setup it immediately faults in the entire - * object instead of using on-demand faulting. Drivers that use the CMA - * helpers should set this as their &drm_gem_object_funcs.mmap callback. + * object instead of using on-demand faulting. * * Returns: * 0 on success or a negative error code on failure. */ -int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *vma) { - struct drm_gem_cma_object *cma_obj; + struct drm_gem_object *obj = &cma_obj->base; int ret; /* @@ -517,8 +513,6 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); vma->vm_flags &= ~VM_PFNMAP; - cma_obj = to_drm_gem_cma_obj(obj); - if (cma_obj->map_noncoherent) { vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 0eeda1012364..621924116eb4 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -10,6 +10,7 @@ #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/module.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> @@ -56,14 +57,17 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) size = PAGE_ALIGN(size); - if (dev->driver->gem_create_object) + if (dev->driver->gem_create_object) { obj = dev->driver->gem_create_object(dev, size); - else - obj = kzalloc(sizeof(*shmem), GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - shmem = to_drm_gem_shmem_obj(obj); + if (IS_ERR(obj)) + return ERR_CAST(obj); + shmem = to_drm_gem_shmem_obj(obj); + } else { + shmem = kzalloc(sizeof(*shmem), GFP_KERNEL); + if (!shmem) + return ERR_PTR(-ENOMEM); + obj = &shmem->base; + } if (!obj->funcs) obj->funcs = &drm_gem_shmem_funcs; diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index bfa386b98134..3f00192215d1 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -197,8 +197,8 @@ struct drm_gem_vram_object *drm_gem_vram_create(struct drm_device *dev, if (dev->driver->gem_create_object) { gem = dev->driver->gem_create_object(dev, size); - if (!gem) - return ERR_PTR(-ENOMEM); + if (IS_ERR(gem)) + return ERR_CAST(gem); gbo = drm_gem_vram_of_gem(gem); } else { gbo = kzalloc(sizeof(*gbo), GFP_KERNEL); diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c index c50fa6f0709f..60afa1865559 100644 --- a/drivers/gpu/drm/drm_hashtab.c +++ b/drivers/gpu/drm/drm_hashtab.c @@ -32,16 +32,16 @@ * Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ -#include <linux/export.h> #include <linux/hash.h> #include <linux/mm.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/vmalloc.h> -#include <drm/drm_hashtab.h> #include <drm/drm_print.h> +#include "drm_legacy.h" + int drm_ht_create(struct drm_open_hash *ht, unsigned int order) { unsigned int size = 1 << order; @@ -58,7 +58,6 @@ int drm_ht_create(struct drm_open_hash *ht, unsigned int order) } return 0; } -EXPORT_SYMBOL(drm_ht_create); void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key) { @@ -135,7 +134,6 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) } return 0; } -EXPORT_SYMBOL(drm_ht_insert_item); /* * Just insert an item and return any "bits" bit key that hasn't been @@ -164,7 +162,6 @@ int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *it } return 0; } -EXPORT_SYMBOL(drm_ht_just_insert_please); int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item) @@ -178,7 +175,6 @@ int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, *item = hlist_entry(list, struct drm_hash_item, head); return 0; } -EXPORT_SYMBOL(drm_ht_find_item); int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key) { @@ -197,7 +193,6 @@ int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item) hlist_del_init_rcu(&item->head); return 0; } -EXPORT_SYMBOL(drm_ht_remove_item); void drm_ht_remove(struct drm_open_hash *ht) { @@ -206,4 +201,3 @@ void drm_ht_remove(struct drm_open_hash *ht) ht->table = NULL; } } -EXPORT_SYMBOL(drm_ht_remove); diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h index c9206840c87f..70c9dba114a6 100644 --- a/drivers/gpu/drm/drm_legacy.h +++ b/drivers/gpu/drm/drm_legacy.h @@ -35,9 +35,47 @@ #include <drm/drm_legacy.h> struct agp_memory; +struct drm_buf_desc; struct drm_device; struct drm_file; -struct drm_buf_desc; +struct drm_hash_item; +struct drm_open_hash; + +/* + * Hash-table Support + */ + +#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) + +/* drm_hashtab.c */ +#if IS_ENABLED(CONFIG_DRM_LEGACY) +int drm_ht_create(struct drm_open_hash *ht, unsigned int order); +int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item); +int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, + unsigned long seed, int bits, int shift, + unsigned long add); +int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item); + +void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key); +int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key); +int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item); +void drm_ht_remove(struct drm_open_hash *ht); +#endif + +/* + * RCU-safe interface + * + * The user of this API needs to make sure that two or more instances of the + * hash table manipulation functions are never run simultaneously. + * The lookup function drm_ht_find_item_rcu may, however, run simultaneously + * with any of the manipulation functions as long as it's called from within + * an RCU read-locked section. + */ +#define drm_ht_insert_item_rcu drm_ht_insert_item +#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please +#define drm_ht_remove_key_rcu drm_ht_remove_key +#define drm_ht_remove_item_rcu drm_ht_remove_item +#define drm_ht_find_item_rcu drm_ht_find_item /* * Generic DRM Contexts diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index b75403f3251a..ded8968b3e8a 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -15,9 +15,10 @@ #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> -#include <drm/drm_gem_cma_helper.h> +#include <drm/drm_file.h> #include <drm/drm_format_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_gem.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_mipi_dbi.h> #include <drm/drm_modes.h> @@ -200,13 +201,19 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb, struct drm_rect *clip, bool swap) { struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0); - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(gem); - void *src = cma_obj->vaddr; + struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; + void *src; int ret; ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) return ret; + src = data[0].vaddr; /* TODO: Use mapping abstraction properly */ + + ret = drm_gem_fb_vmap(fb, map, data); + if (ret) + goto out_drm_gem_fb_end_cpu_access; switch (fb->format->format) { case DRM_FORMAT_RGB565: @@ -221,9 +228,11 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb, default: drm_err_once(fb->dev, "Format is not supported: %p4cc\n", &fb->format->format); - return -EINVAL; + ret = -EINVAL; } + drm_gem_fb_vunmap(fb, map); +out_drm_gem_fb_end_cpu_access: drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); return ret; @@ -249,8 +258,8 @@ static void mipi_dbi_set_window_address(struct mipi_dbi_dev *dbidev, static void mipi_dbi_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *rect) { - struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0); - struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(gem); + struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); unsigned int height = rect->y2 - rect->y1; unsigned int width = rect->x2 - rect->x1; @@ -266,6 +275,10 @@ static void mipi_dbi_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *rect) if (!drm_dev_enter(fb->dev, &idx)) return; + ret = drm_gem_fb_vmap(fb, map, data); + if (ret) + goto err_drm_dev_exit; + full = width == fb->width && height == fb->height; DRM_DEBUG_KMS("Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect)); @@ -277,7 +290,7 @@ static void mipi_dbi_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *rect) if (ret) goto err_msg; } else { - tr = cma_obj->vaddr; + tr = data[0].vaddr; /* TODO: Use mapping abstraction properly */ } mipi_dbi_set_window_address(dbidev, rect->x1, rect->x2 - 1, rect->y1, @@ -289,6 +302,9 @@ err_msg: if (ret) drm_err_once(fb->dev, "Failed to update display %d\n", ret); + drm_gem_fb_vunmap(fb, map); + +err_drm_dev_exit: drm_dev_exit(idx); } @@ -1117,8 +1133,8 @@ int mipi_dbi_spi_init(struct spi_device *spi, struct mipi_dbi *dbi, /* * Even though it's not the SPI device that does DMA (the master does), - * the dma mask is necessary for the dma_alloc_wc() in - * drm_gem_cma_create(). The dma_addr returned will be a physical + * the dma mask is necessary for the dma_alloc_wc() in the GEM code + * (e.g., drm_gem_cma_create()). The dma_addr returned will be a physical * address which might be different from the bus address, but this is * not a problem since the address will not be used. * The virtual address is used in the transfer and the SPI core diff --git a/drivers/gpu/drm/drm_nomodeset.c b/drivers/gpu/drm/drm_nomodeset.c new file mode 100644 index 000000000000..f3978d5bd3a1 --- /dev/null +++ b/drivers/gpu/drm/drm_nomodeset.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/module.h> +#include <linux/types.h> + +static bool drm_nomodeset; + +bool drm_firmware_drivers_only(void) +{ + return drm_nomodeset; +} +EXPORT_SYMBOL(drm_firmware_drivers_only); + +static int __init disable_modeset(char *str) +{ + drm_nomodeset = true; + + pr_warn("Booted with the nomodeset parameter. Only the system framebuffer will be available\n"); + + return 1; +} + +/* Disable kernel modesetting */ +__setup("nomodeset", disable_modeset); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index a9359878f4ed..042bb80383c9 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -262,6 +262,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Book X90F / X91F / X91L */ + .matches = { + /* Non exact match to match all versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* OneGX1 Pro */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"), diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c9a9d74f338c..c313a5b4549c 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -404,8 +404,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (*fence) { ret = dma_fence_chain_find_seqno(fence, point); - if (!ret) + if (!ret) { + /* If the requested seqno is already signaled + * drm_syncobj_find_fence may return a NULL + * fence. To make sure the recipient gets + * signalled, use a new fence instead. + */ + if (!*fence) + *fence = dma_fence_get_stub(); + goto out; + } dma_fence_put(*fence); } else { ret = -EINVAL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 7dcc6392792d..0b756ecb1bc2 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -589,6 +589,7 @@ static int compare_str(struct device *dev, void *data) static int etnaviv_pdev_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct device_node *first_node = NULL; struct component_match *match = NULL; if (!dev->platform_data) { @@ -598,6 +599,9 @@ static int etnaviv_pdev_probe(struct platform_device *pdev) if (!of_device_is_available(core_node)) continue; + if (!first_node) + first_node = core_node; + drm_of_component_match_add(&pdev->dev, &match, compare_of, core_node); } @@ -609,6 +613,32 @@ static int etnaviv_pdev_probe(struct platform_device *pdev) component_match_add(dev, &match, compare_str, names[i]); } + /* + * PTA and MTLB can have 40 bit base addresses, but + * unfortunately, an entry in the MTLB can only point to a + * 32 bit base address of a STLB. Moreover, to initialize the + * MMU we need a command buffer with a 32 bit address because + * without an MMU there is only an indentity mapping between + * the internal 32 bit addresses and the bus addresses. + * + * To make things easy, we set the dma_coherent_mask to 32 + * bit to make sure we are allocating the command buffers and + * TLBs in the lower 4 GiB address space. + */ + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(40)) || + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) { + dev_dbg(&pdev->dev, "No suitable DMA available\n"); + return -ENODEV; + } + + /* + * Apply the same DMA configuration to the virtual etnaviv + * device as the GPU we found. This assumes that all Vivante + * GPUs in the system share the same DMA constraints. + */ + if (first_node) + of_dma_configure(&pdev->dev, first_node, true); + return component_master_add_with_match(dev, &etnaviv_master_ops, match); } @@ -653,21 +683,12 @@ static int __init etnaviv_init(void) if (!of_device_is_available(np)) continue; - pdev = platform_device_alloc("etnaviv", -1); + pdev = platform_device_alloc("etnaviv", PLATFORM_DEVID_NONE); if (!pdev) { ret = -ENOMEM; of_node_put(np); goto unregister_platform_driver; } - pdev->dev.coherent_dma_mask = DMA_BIT_MASK(40); - pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; - - /* - * Apply the same DMA configuration to the virtual etnaviv - * device as the GPU we found. This assumes that all Vivante - * GPUs in the system share the same DMA constraints. - */ - of_dma_configure(&pdev->dev, np, true); ret = platform_device_add(pdev); if (ret) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index b5e8ce86dbe7..b03c20c14ca1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -469,6 +469,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } + if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K || + args->nr_bos > SZ_64K || args->nr_pmrs > 128) { + DRM_ERROR("submit arguments out of size limits\n"); + return -EINVAL; + } + /* * Copy the command submission and bo array to kernel space in * one go, and do this outside of any locks. diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 242a5fd8b932..ba5fd012a40a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1658,7 +1658,7 @@ etnaviv_gpu_cooling_set_cur_state(struct thermal_cooling_device *cdev, return 0; } -static struct thermal_cooling_device_ops cooling_ops = { +static const struct thermal_cooling_device_ops cooling_ops = { .get_max_state = etnaviv_gpu_cooling_get_max_state, .get_cur_state = etnaviv_gpu_cooling_get_cur_state, .set_cur_state = etnaviv_gpu_cooling_set_cur_state, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 1c75c8ed5bce..85eddd492774 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -130,6 +130,7 @@ struct etnaviv_gpu { /* hang detection */ u32 hangcheck_dma_addr; + u32 hangcheck_fence; void __iomem *mmio; int irq; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 180bb633d5c5..58f593b278c1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -107,8 +107,10 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job */ dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); change = dma_addr - gpu->hangcheck_dma_addr; - if (change < 0 || change > 16) { + if (gpu->completed_fence != gpu->hangcheck_fence || + change < 0 || change > 16) { gpu->hangcheck_dma_addr = dma_addr; + gpu->hangcheck_fence = gpu->completed_fence; goto out_no_timeout; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index d8f1cf4d6b69..9743b6b17447 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -102,16 +102,7 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_RENDER_ALLOW), }; -static const struct file_operations exynos_drm_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = exynos_drm_gem_mmap, - .poll = drm_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .release = drm_release, -}; +DEFINE_DRM_GEM_FOPS(exynos_drm_driver_fops); static const struct drm_driver exynos_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM @@ -124,7 +115,7 @@ static const struct drm_driver exynos_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = exynos_drm_gem_prime_import, .gem_prime_import_sg_table = exynos_drm_gem_prime_import_sg_table, - .gem_prime_mmap = exynos_drm_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, .ioctls = exynos_ioctls, .num_ioctls = ARRAY_SIZE(exynos_ioctls), .fops = &exynos_drm_driver_fops, diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 8d137857818c..32a36572b894 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -13,7 +13,6 @@ #include <linux/gpio/consumer.h> #include <linux/irq.h> #include <linux/of_device.h> -#include <linux/of_gpio.h> #include <linux/of_graph.h> #include <linux/phy/phy.h> #include <linux/regulator/consumer.h> @@ -265,7 +264,7 @@ struct exynos_dsi { struct clk **clks; struct regulator_bulk_data supplies[2]; int irq; - int te_gpio; + struct gpio_desc *te_gpio; u32 pll_clk_rate; u32 burst_clk_rate; @@ -1298,14 +1297,14 @@ static void exynos_dsi_enable_irq(struct exynos_dsi *dsi) { enable_irq(dsi->irq); - if (gpio_is_valid(dsi->te_gpio)) - enable_irq(gpio_to_irq(dsi->te_gpio)); + if (dsi->te_gpio) + enable_irq(gpiod_to_irq(dsi->te_gpio)); } static void exynos_dsi_disable_irq(struct exynos_dsi *dsi) { - if (gpio_is_valid(dsi->te_gpio)) - disable_irq(gpio_to_irq(dsi->te_gpio)); + if (dsi->te_gpio) + disable_irq(gpiod_to_irq(dsi->te_gpio)); disable_irq(dsi->irq); } @@ -1335,42 +1334,31 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi, int ret; int te_gpio_irq; - dsi->te_gpio = of_get_named_gpio(panel->of_node, "te-gpios", 0); - if (dsi->te_gpio == -ENOENT) - return 0; - - if (!gpio_is_valid(dsi->te_gpio)) { - ret = dsi->te_gpio; - dev_err(dsi->dev, "cannot get te-gpios, %d\n", ret); - goto out; - } - - ret = gpio_request(dsi->te_gpio, "te_gpio"); - if (ret) { - dev_err(dsi->dev, "gpio request failed with %d\n", ret); - goto out; + dsi->te_gpio = devm_gpiod_get_optional(dsi->dev, "te", GPIOD_IN); + if (IS_ERR(dsi->te_gpio)) { + dev_err(dsi->dev, "gpio request failed with %ld\n", + PTR_ERR(dsi->te_gpio)); + return PTR_ERR(dsi->te_gpio); } - te_gpio_irq = gpio_to_irq(dsi->te_gpio); + te_gpio_irq = gpiod_to_irq(dsi->te_gpio); ret = request_threaded_irq(te_gpio_irq, exynos_dsi_te_irq_handler, NULL, IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN, "TE", dsi); if (ret) { dev_err(dsi->dev, "request interrupt failed with %d\n", ret); - gpio_free(dsi->te_gpio); - goto out; + gpiod_put(dsi->te_gpio); + return ret; } -out: - return ret; + return 0; } static void exynos_dsi_unregister_te_irq(struct exynos_dsi *dsi) { - if (gpio_is_valid(dsi->te_gpio)) { - free_irq(gpio_to_irq(dsi->te_gpio), dsi); - gpio_free(dsi->te_gpio); - dsi->te_gpio = -ENOENT; + if (dsi->te_gpio) { + free_irq(gpiod_to_irq(dsi->te_gpio), dsi); + gpiod_put(dsi->te_gpio); } } @@ -1745,9 +1733,6 @@ static int exynos_dsi_probe(struct platform_device *pdev) if (!dsi) return -ENOMEM; - /* To be checked as invalid one */ - dsi->te_gpio = -ENOENT; - init_completion(&dsi->completed); spin_lock_init(&dsi->transfer_lock); INIT_LIST_HEAD(&dsi->transfer_list); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 5147f5929be7..02c97b9ca926 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -15,6 +15,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_prime.h> #include <drm/drm_probe_helper.h> #include <drm/exynos_drm.h> @@ -39,25 +40,8 @@ static int exynos_drm_fb_mmap(struct fb_info *info, struct drm_fb_helper *helper = info->par; struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(helper); struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem; - unsigned long vm_size; - int ret; - - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - - vm_size = vma->vm_end - vma->vm_start; - - if (vm_size > exynos_gem->size) - return -EINVAL; - ret = dma_mmap_attrs(to_dma_dev(helper->dev), vma, exynos_gem->cookie, - exynos_gem->dma_addr, exynos_gem->size, - exynos_gem->dma_attrs); - if (ret < 0) { - DRM_DEV_ERROR(to_dma_dev(helper->dev), "failed to mmap.\n"); - return ret; - } - - return 0; + return drm_gem_prime_mmap(&exynos_gem->base, vma); } static const struct fb_ops exynos_drm_fb_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index ecfd82d0afb7..023f54ee61a8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -782,8 +782,8 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, sc->hratio = (src_w << 14) / (dst_w << hfactor); sc->vratio = (src_h << 14) / (dst_h << vfactor); - sc->up_h = (dst_w >= src_w) ? true : false; - sc->up_v = (dst_h >= src_h) ? true : false; + sc->up_h = (dst_w >= src_w); + sc->up_v = (dst_h >= src_h); DRM_DEV_DEBUG_KMS(ctx->dev, "hratio[%d]vratio[%d]up_h[%d]up_v[%d]\n", sc->hratio, sc->vratio, sc->up_h, sc->up_v); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 0a0c042a3155..3e493f48e0d4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -20,6 +20,8 @@ MODULE_IMPORT_NS(DMA_BUF); +static int exynos_drm_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem, bool kvmap) { struct drm_device *dev = exynos_gem->base.dev; @@ -138,6 +140,7 @@ static const struct vm_operations_struct exynos_drm_gem_vm_ops = { static const struct drm_gem_object_funcs exynos_drm_gem_object_funcs = { .free = exynos_drm_gem_free_object, .get_sg_table = exynos_drm_gem_prime_get_sg_table, + .mmap = exynos_drm_gem_mmap, .vm_ops = &exynos_drm_gem_vm_ops, }; @@ -357,12 +360,16 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, return 0; } -static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma) +static int exynos_drm_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); int ret; + if (obj->import_attach) + return dma_buf_mmap(obj->dma_buf, vma, 0); + + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + DRM_DEV_DEBUG_KMS(to_dma_dev(obj->dev), "flags = 0x%x\n", exynos_gem->flags); @@ -388,26 +395,6 @@ err_close_vm: return ret; } -int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_gem_object *obj; - int ret; - - /* set vm_area_struct. */ - ret = drm_gem_mmap(filp, vma); - if (ret < 0) { - DRM_ERROR("failed to mmap.\n"); - return ret; - } - - obj = vma->vm_private_data; - - if (obj->import_attach) - return dma_buf_mmap(obj->dma_buf, vma, 0); - - return exynos_drm_gem_mmap_obj(obj, vma); -} - /* low-level interface prime helpers */ struct drm_gem_object *exynos_drm_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) @@ -469,15 +456,3 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device *dev, exynos_gem->sgt = sgt; return &exynos_gem->base; } - -int exynos_drm_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap_obj(obj, obj->size, vma); - if (ret < 0) - return ret; - - return exynos_drm_gem_mmap_obj(obj, vma); -} diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index a23272fb96fb..79d7e1a87419 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -96,9 +96,6 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -/* set vm_flags and we can change the vm attribute to other one at here. */ -int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); - /* low-level interface prime helpers */ struct drm_gem_object *exynos_drm_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); @@ -107,7 +104,5 @@ struct drm_gem_object * exynos_drm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); -int exynos_drm_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma); #endif diff --git a/drivers/gpu/drm/fsl-dcu/Kconfig b/drivers/gpu/drm/fsl-dcu/Kconfig index d7dd8ba90e3a..e95e96c565ba 100644 --- a/drivers/gpu/drm/fsl-dcu/Kconfig +++ b/drivers/gpu/drm/fsl-dcu/Kconfig @@ -3,8 +3,8 @@ config DRM_FSL_DCU tristate "DRM Support for Freescale DCU" depends on DRM && OF && ARM && COMMON_CLK select BACKLIGHT_CLASS_DEVICE + select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_PANEL select REGMAP_MMIO select VIDEOMODE_HELPERS diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig index 290553e2f6b4..b770f7662830 100644 --- a/drivers/gpu/drm/hisilicon/kirin/Kconfig +++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig @@ -4,7 +4,6 @@ config DRM_HISI_KIRIN depends on DRM && OF && ARM64 select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER - select DRM_KMS_CMA_HELPER select DRM_MIPI_DSI help Choose this option if you have a hisilicon Kirin chipsets(hi6220). diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index cd818a629183..00e53de4812b 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -225,12 +225,29 @@ static int hyperv_vmbus_remove(struct hv_device *hdev) { struct drm_device *dev = hv_get_drvdata(hdev); struct hyperv_drm_device *hv = to_hv(dev); + struct pci_dev *pdev; drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - vmbus_free_mmio(hv->mem->start, hv->fb_size); + + /* + * Free allocated MMIO memory only on Gen2 VMs. + * On Gen1 VMs, release the PCI device + */ + if (efi_enabled(EFI_BOOT)) { + vmbus_free_mmio(hv->mem->start, hv->fb_size); + } else { + pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, + PCI_DEVICE_ID_HYPERV_VIDEO, NULL); + if (!pdev) { + drm_err(dev, "Unable to find PCI Hyper-V video\n"); + return -ENODEV; + } + pci_release_region(pdev, 0); + pci_dev_put(pdev); + } return 0; } diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 568d711a3537..1b62b9f65196 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -60,7 +60,6 @@ i915-y += i915_driver.o \ # core library code i915-y += \ - dma_resv_utils.o \ i915_memcpy.o \ i915_mm.o \ i915_sw_fence.o \ @@ -154,6 +153,7 @@ gem-y += \ gem/i915_gem_throttle.o \ gem/i915_gem_tiling.o \ gem/i915_gem_ttm.o \ + gem/i915_gem_ttm_move.o \ gem/i915_gem_ttm_pm.o \ gem/i915_gem_userptr.o \ gem/i915_gem_wait.o \ @@ -163,6 +163,7 @@ i915-y += \ i915_active.o \ i915_buddy.o \ i915_cmd_parser.o \ + i915_deps.o \ i915_gem_evict.o \ i915_gem_gtt.o \ i915_gem_ww.o \ @@ -173,6 +174,7 @@ i915-y += \ i915_trace_points.o \ i915_ttm_buddy_manager.o \ i915_vma.o \ + i915_vma_snapshot.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support @@ -258,6 +260,7 @@ i915-y += \ display/intel_crt.o \ display/intel_ddi.o \ display/intel_ddi_buf_trans.o \ + display/intel_display_trace.o \ display/intel_dp.o \ display/intel_dp_aux.o \ display/intel_dp_aux_backlight.o \ diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index dc41868d01ef..f37677df6ebf 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -9,6 +9,7 @@ #include "intel_audio.h" #include "intel_backlight.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index f5b4dd5b4275..06e00b1eaa7c 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -8,6 +8,7 @@ #include "g4x_hdmi.h" #include "intel_audio.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dpio_phy.h" diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 2194f74101ae..85950ff67609 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -13,6 +13,7 @@ #include "intel_de.h" #include "intel_display_types.h" #include "intel_fb.h" +#include "intel_fbc.h" #include "intel_sprite.h" #include "i9xx_plane.h" @@ -120,6 +121,15 @@ static bool i9xx_plane_has_fbc(struct drm_i915_private *dev_priv, return i9xx_plane == PLANE_A; } +static struct intel_fbc *i9xx_plane_fbc(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) +{ + if (i9xx_plane_has_fbc(dev_priv, i9xx_plane)) + return dev_priv->fbc; + else + return NULL; +} + static bool i9xx_plane_has_windowing(struct intel_plane *plane) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -807,10 +817,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) plane->id = PLANE_PRIMARY; plane->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, plane->id); - if (i9xx_plane_has_fbc(dev_priv, plane->i9xx_plane)) - plane->fbc = &dev_priv->fbc; - if (plane->fbc) - plane->fbc->possible_framebuffer_bits |= plane->frontbuffer_bit; + intel_fbc_add_plane(i9xx_plane_fbc(dev_priv, plane->i9xx_plane), plane); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { formats = vlv_primary_formats; diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index b4e7ac51aa31..a62550711e98 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -139,6 +139,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, new_conn_state->base.picture_aspect_ratio != old_conn_state->base.picture_aspect_ratio || new_conn_state->base.content_type != old_conn_state->base.content_type || new_conn_state->base.scaling_mode != old_conn_state->base.scaling_mode || + new_conn_state->base.privacy_screen_sw_state != old_conn_state->base.privacy_screen_sw_state || !drm_connector_atomic_hdr_metadata_equal(old_state, new_state)) crtc_state->mode_changed = true; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index e3a0bfb7be84..c2c512cd8ec0 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -35,15 +35,16 @@ #include <drm/drm_fourcc.h> #include <drm/drm_plane_helper.h> -#include "i915_trace.h" +#include "gt/intel_rps.h" + #include "intel_atomic_plane.h" #include "intel_cdclk.h" +#include "intel_display_trace.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_pm.h" #include "intel_sprite.h" -#include "gt/intel_rps.h" static void intel_plane_state_reset(struct intel_plane_state *plane_state, struct intel_plane *plane) @@ -395,7 +396,7 @@ int intel_plane_atomic_check(struct intel_atomic_state *state, const struct intel_plane_state *old_plane_state = intel_atomic_get_old_plane_state(state, plane); const struct intel_plane_state *new_master_plane_state; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(i915, plane->pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(i915, plane->pipe); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); struct intel_crtc_state *new_crtc_state = @@ -818,7 +819,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, * maximum clocks following a vblank miss (see do_rps_boost()). */ if (!state->rps_interactive) { - intel_rps_mark_interactive(&dev_priv->gt.rps, true); + intel_rps_mark_interactive(&to_gt(dev_priv)->rps, true); state->rps_interactive = true; } @@ -852,7 +853,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, return; if (state->rps_interactive) { - intel_rps_mark_interactive(&dev_priv->gt.rps, false); + intel_rps_mark_interactive(&to_gt(dev_priv)->rps, false); state->rps_interactive = false; } diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 03c3111ebdf0..3bdca0fe2cee 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -31,6 +31,7 @@ #include "intel_atomic.h" #include "intel_audio.h" #include "intel_cdclk.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_lpe_audio.h" @@ -1019,7 +1020,7 @@ static void glk_force_audio_cdclk(struct drm_i915_private *dev_priv, struct intel_crtc *crtc; int ret; - crtc = intel_get_first_crtc(dev_priv); + crtc = intel_first_crtc(dev_priv); if (!crtc) return; diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 2b1423a43437..9d989c9f5da4 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1555,12 +1555,24 @@ static const u8 gen9bc_tgp_ddc_pin_map[] = { [DDC_BUS_DDI_D] = GMBUS_PIN_10_TC2_ICP, }; +static const u8 adlp_ddc_pin_map[] = { + [ICL_DDC_BUS_DDI_A] = GMBUS_PIN_1_BXT, + [ICL_DDC_BUS_DDI_B] = GMBUS_PIN_2_BXT, + [ADLP_DDC_BUS_PORT_TC1] = GMBUS_PIN_9_TC1_ICP, + [ADLP_DDC_BUS_PORT_TC2] = GMBUS_PIN_10_TC2_ICP, + [ADLP_DDC_BUS_PORT_TC3] = GMBUS_PIN_11_TC3_ICP, + [ADLP_DDC_BUS_PORT_TC4] = GMBUS_PIN_12_TC4_ICP, +}; + static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) { const u8 *ddc_pin_map; int n_entries; - if (IS_ALDERLAKE_S(i915)) { + if (IS_ALDERLAKE_P(i915)) { + ddc_pin_map = adlp_ddc_pin_map; + n_entries = ARRAY_SIZE(adlp_ddc_pin_map); + } else if (IS_ALDERLAKE_S(i915)) { ddc_pin_map = adls_ddc_pin_map; n_entries = ARRAY_SIZE(adls_ddc_pin_map); } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) { diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index abec394f6869..2da4aacc956b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -634,7 +634,7 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, for_each_pipe(dev_priv, pipe) data_rate += bw_state->data_rate[pipe]; - if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active()) + if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv)) data_rate = data_rate * 105 / 100; return data_rate; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 91c19e0a98d7..c30cf8d2b835 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -24,9 +24,11 @@ #include <linux/time.h> #include "intel_atomic.h" +#include "intel_atomic_plane.h" #include "intel_audio.h" #include "intel_bw.h" #include "intel_cdclk.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_pcode.h" @@ -67,7 +69,7 @@ void intel_cdclk_get_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk_funcs->get_cdclk(dev_priv, cdclk_config); } -int intel_cdclk_bw_calc_min_cdclk(struct intel_atomic_state *state) +static int intel_cdclk_bw_calc_min_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); return dev_priv->cdclk_funcs->bw_calc_min_cdclk(state); @@ -1212,6 +1214,19 @@ static void skl_cdclk_uninit_hw(struct drm_i915_private *dev_priv) skl_set_cdclk(dev_priv, &cdclk_config, INVALID_PIPE); } +static bool has_cdclk_squasher(struct drm_i915_private *i915) +{ + return IS_DG2(i915); +} + +struct intel_cdclk_vals { + u32 cdclk; + u16 refclk; + u16 waveform; + u8 divider; /* CD2X divider * 2 */ + u8 ratio; +}; + static const struct intel_cdclk_vals bxt_cdclk_table[] = { { .refclk = 19200, .cdclk = 144000, .divider = 8, .ratio = 60 }, { .refclk = 19200, .cdclk = 288000, .divider = 4, .ratio = 60 }, @@ -1313,12 +1328,19 @@ static const struct intel_cdclk_vals adlp_cdclk_table[] = { }; static const struct intel_cdclk_vals dg2_cdclk_table[] = { - { .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio = 9 }, - { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, - { .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 }, - { .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 }, - { .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 }, - { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 }, + { .refclk = 38400, .cdclk = 163200, .divider = 2, .ratio = 34, .waveform = 0x8888 }, + { .refclk = 38400, .cdclk = 204000, .divider = 2, .ratio = 34, .waveform = 0x9248 }, + { .refclk = 38400, .cdclk = 244800, .divider = 2, .ratio = 34, .waveform = 0xa4a4 }, + { .refclk = 38400, .cdclk = 285600, .divider = 2, .ratio = 34, .waveform = 0xa54a }, + { .refclk = 38400, .cdclk = 326400, .divider = 2, .ratio = 34, .waveform = 0xaaaa }, + { .refclk = 38400, .cdclk = 367200, .divider = 2, .ratio = 34, .waveform = 0xad5a }, + { .refclk = 38400, .cdclk = 408000, .divider = 2, .ratio = 34, .waveform = 0xb6b6 }, + { .refclk = 38400, .cdclk = 448800, .divider = 2, .ratio = 34, .waveform = 0xdbb6 }, + { .refclk = 38400, .cdclk = 489600, .divider = 2, .ratio = 34, .waveform = 0xeeee }, + { .refclk = 38400, .cdclk = 530400, .divider = 2, .ratio = 34, .waveform = 0xf7de }, + { .refclk = 38400, .cdclk = 571200, .divider = 2, .ratio = 34, .waveform = 0xfefe }, + { .refclk = 38400, .cdclk = 612000, .divider = 2, .ratio = 34, .waveform = 0xfffe }, + { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34, .waveform = 0xffff }, {} }; @@ -1454,6 +1476,7 @@ static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, static void bxt_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_config *cdclk_config) { + u32 squash_ctl = 0; u32 divider; int div; @@ -1491,7 +1514,21 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, return; } - cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_config->vco, div); + if (has_cdclk_squasher(dev_priv)) + squash_ctl = intel_de_read(dev_priv, CDCLK_SQUASH_CTL); + + if (squash_ctl & CDCLK_SQUASH_ENABLE) { + u16 waveform; + int size; + + size = REG_FIELD_GET(CDCLK_SQUASH_WINDOW_SIZE_MASK, squash_ctl) + 1; + waveform = REG_FIELD_GET(CDCLK_SQUASH_WAVEFORM_MASK, squash_ctl) >> (16 - size); + + cdclk_config->cdclk = DIV_ROUND_CLOSEST(hweight16(waveform) * + cdclk_config->vco, size * div); + } else { + cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_config->vco, div); + } out: /* @@ -1626,6 +1663,26 @@ static u32 bxt_cdclk_cd2x_div_sel(struct drm_i915_private *dev_priv, } } +static u32 cdclk_squash_waveform(struct drm_i915_private *dev_priv, + int cdclk) +{ + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + if (cdclk == dev_priv->cdclk.hw.bypass) + return 0; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk == cdclk) + return table[i].waveform; + + drm_WARN(&dev_priv->drm, 1, "cdclk %d not valid for refclk %u\n", + cdclk, dev_priv->cdclk.hw.ref); + + return 0xffff; +} + static void bxt_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) @@ -1633,6 +1690,8 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk = cdclk_config->cdclk; int vco = cdclk_config->vco; u32 val; + u16 waveform; + int clock; int ret; /* Inform power controller of upcoming frequency change. */ @@ -1676,7 +1735,24 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, bxt_de_pll_enable(dev_priv, vco); } - val = bxt_cdclk_cd2x_div_sel(dev_priv, cdclk, vco) | + waveform = cdclk_squash_waveform(dev_priv, cdclk); + + if (waveform) + clock = vco / 2; + else + clock = cdclk; + + if (has_cdclk_squasher(dev_priv)) { + u32 squash_ctl = 0; + + if (waveform) + squash_ctl = CDCLK_SQUASH_ENABLE | + CDCLK_SQUASH_WINDOW_SIZE(0xf) | waveform; + + intel_de_write(dev_priv, CDCLK_SQUASH_CTL, squash_ctl); + } + + val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) | bxt_cdclk_cd2x_pipe(dev_priv, pipe) | skl_cdclk_decimal(cdclk); @@ -1690,7 +1766,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, intel_de_write(dev_priv, CDCLK_CTL, val); if (pipe != INVALID_PIPE) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(intel_crtc_for_pipe(dev_priv, pipe)); if (DISPLAY_VER(dev_priv) >= 11) { ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, @@ -1728,7 +1804,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; - int cdclk, vco; + int cdclk, clock, vco; intel_update_cdclk(dev_priv); intel_dump_cdclk_config(&dev_priv->cdclk.hw, "Current CDCLK"); @@ -1764,8 +1840,12 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) expected = skl_cdclk_decimal(cdclk); /* Figure out what CD2X divider we should be using for this cdclk */ - expected |= bxt_cdclk_cd2x_div_sel(dev_priv, - dev_priv->cdclk.hw.cdclk, + if (has_cdclk_squasher(dev_priv)) + clock = dev_priv->cdclk.hw.vco / 2; + else + clock = dev_priv->cdclk.hw.cdclk; + + expected |= bxt_cdclk_cd2x_div_sel(dev_priv, clock, dev_priv->cdclk.hw.vco); /* @@ -1881,6 +1961,25 @@ static bool intel_cdclk_can_crawl(struct drm_i915_private *dev_priv, a->ref == b->ref; } +static bool intel_cdclk_can_squash(struct drm_i915_private *dev_priv, + const struct intel_cdclk_config *a, + const struct intel_cdclk_config *b) +{ + /* + * FIXME should store a bit more state in intel_cdclk_config + * to differentiate squasher vs. cd2x divider properly. For + * the moment all platforms with squasher use a fixed cd2x + * divider. + */ + if (!has_cdclk_squasher(dev_priv)) + return false; + + return a->cdclk != b->cdclk && + a->vco != 0 && + a->vco == b->vco && + a->ref == b->ref; +} + /** * intel_cdclk_needs_modeset - Determine if changong between the CDCLK * configurations requires a modeset on all pipes @@ -1918,7 +2017,17 @@ static bool intel_cdclk_can_cd2x_update(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) < 10 && !IS_BROXTON(dev_priv)) return false; + /* + * FIXME should store a bit more state in intel_cdclk_config + * to differentiate squasher vs. cd2x divider properly. For + * the moment all platforms with squasher use a fixed cd2x + * divider. + */ + if (has_cdclk_squasher(dev_priv)) + return false; + return a->cdclk != b->cdclk && + a->vco != 0 && a->vco == b->vco && a->ref == b->ref; } @@ -2529,6 +2638,58 @@ intel_atomic_get_cdclk_state(struct intel_atomic_state *state) return to_intel_cdclk_state(cdclk_state); } +int intel_cdclk_atomic_check(struct intel_atomic_state *state, + bool *need_cdclk_calc) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_cdclk_state *old_cdclk_state; + const struct intel_cdclk_state *new_cdclk_state; + struct intel_plane_state *plane_state; + struct intel_bw_state *new_bw_state; + struct intel_plane *plane; + int min_cdclk = 0; + enum pipe pipe; + int ret; + int i; + + /* + * active_planes bitmask has been updated, and potentially affected + * planes are part of the state. We can now compute the minimum cdclk + * for each plane. + */ + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + ret = intel_plane_calc_min_cdclk(state, plane, need_cdclk_calc); + if (ret) + return ret; + } + + old_cdclk_state = intel_atomic_get_old_cdclk_state(state); + new_cdclk_state = intel_atomic_get_new_cdclk_state(state); + + if (new_cdclk_state && + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) + *need_cdclk_calc = true; + + ret = intel_cdclk_bw_calc_min_cdclk(state); + if (ret) + return ret; + + new_bw_state = intel_atomic_get_new_bw_state(state); + + if (!new_cdclk_state || !new_bw_state) + return 0; + + for_each_pipe(i915, pipe) { + min_cdclk = max(new_cdclk_state->min_cdclk[pipe], min_cdclk); + + /* Currently do this change only if we need to increase */ + if (new_bw_state->min_cdclk > min_cdclk) + *need_cdclk_calc = true; + } + + return 0; +} + int intel_cdclk_init(struct drm_i915_private *dev_priv) { struct intel_cdclk_state *cdclk_state; @@ -2592,7 +2753,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) struct intel_crtc_state *crtc_state; pipe = ilog2(new_cdclk_state->active_pipes); - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc = intel_crtc_for_pipe(dev_priv, pipe); crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); if (IS_ERR(crtc_state)) @@ -2602,9 +2763,14 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) pipe = INVALID_PIPE; } - if (intel_cdclk_can_crawl(dev_priv, - &old_cdclk_state->actual, - &new_cdclk_state->actual)) { + if (intel_cdclk_can_squash(dev_priv, + &old_cdclk_state->actual, + &new_cdclk_state->actual)) { + drm_dbg_kms(&dev_priv->drm, + "Can change cdclk via squasher\n"); + } else if (intel_cdclk_can_crawl(dev_priv, + &old_cdclk_state->actual, + &new_cdclk_state->actual)) { drm_dbg_kms(&dev_priv->drm, "Can change cdclk via crawl\n"); } else if (pipe != INVALID_PIPE) { diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 309b3f394e24..fc638522e445 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -16,13 +16,6 @@ struct drm_i915_private; struct intel_atomic_state; struct intel_crtc_state; -struct intel_cdclk_vals { - u32 cdclk; - u16 refclk; - u8 divider; /* CD2X divider * 2 */ - u8 ratio; -}; - struct intel_cdclk_state { struct intel_global_state base; @@ -70,7 +63,8 @@ void intel_dump_cdclk_config(const struct intel_cdclk_config *cdclk_config, int intel_modeset_calc_cdclk(struct intel_atomic_state *state); void intel_cdclk_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_config *cdclk_config); -int intel_cdclk_bw_calc_min_cdclk(struct intel_atomic_state *state); +int intel_cdclk_atomic_check(struct intel_atomic_state *state, + bool *need_cdclk_calc); struct intel_cdclk_state * intel_atomic_get_cdclk_state(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 840f13b75492..de3ded1e327a 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -808,6 +808,14 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state) } } +static int glk_degamma_lut_size(struct drm_i915_private *i915) +{ + if (DISPLAY_VER(i915) >= 13) + return 131; + else + return 35; +} + static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -827,8 +835,8 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) for (i = 0; i < lut_size; i++) { /* - * First 33 entries represent range from 0 to 1.0 - * 34th and 35th entry will represent extended range + * First lut_size entries represent range from 0 to 1.0 + * 3 additional lut entries will represent extended range * inputs 3.0 and 7.0 respectively, currently clamped * at 1.0. Since the precision is 16bit, the user * value can be directly filled to register. @@ -844,7 +852,7 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) } /* Clamp values > 1.0. */ - while (i++ < 35) + while (i++ < glk_degamma_lut_size(dev_priv)) intel_de_write_fw(dev_priv, PRE_CSC_GAMC_DATA(pipe), 1 << 16); intel_de_write_fw(dev_priv, PRE_CSC_GAMC_INDEX(pipe), 0); @@ -1574,6 +1582,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); u32 gamma_mode = 0; if (crtc_state->hw.degamma_lut) @@ -1586,6 +1596,13 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) if (!crtc_state->hw.gamma_lut || crtc_state_is_legacy_gamma(crtc_state)) gamma_mode |= GAMMA_MODE_MODE_8BIT; + /* + * Enable 10bit gamma for D13 + * ToDo: Extend to Logarithmic Gamma once the new UAPI + * is acccepted and implemented by a userspace consumer + */ + else if (DISPLAY_VER(i915) >= 13) + gamma_mode |= GAMMA_MODE_MODE_10BIT; else gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index f0f28572dfdc..6a3893c8ff22 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -321,8 +321,8 @@ static void hsw_enable_crt(struct intel_atomic_state *state, intel_crt_set_dpms(encoder, crtc_state, DRM_MODE_DPMS_ON); - intel_wait_for_vblank(dev_priv, pipe); - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); + intel_crtc_wait_for_next_vblank(crtc); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } @@ -721,7 +721,7 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe) intel_uncore_posting_read(uncore, pipeconf_reg); /* Wait for next Vblank to substitue * border color for Color info */ - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(intel_crtc_for_pipe(dev_priv, pipe)); st00 = intel_uncore_read8(uncore, _VGA_MSR_WRITE); status = ((st00 & (1 << 4)) != 0) ? connector_status_connected : diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 243d5cc29734..16c3ca66d9f0 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -12,8 +12,8 @@ #include <drm/drm_plane_helper.h> #include <drm/drm_vblank_work.h> -#include "i915_trace.h" #include "i915_vgpu.h" +#include "i9xx_plane.h" #include "icl_dsi.h" #include "intel_atomic.h" #include "intel_atomic_plane.h" @@ -21,13 +21,13 @@ #include "intel_crtc.h" #include "intel_cursor.h" #include "intel_display_debugfs.h" +#include "intel_display_trace.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_pipe_crc.h" #include "intel_psr.h" #include "intel_sprite.h" #include "intel_vrr.h" -#include "i9xx_plane.h" #include "skl_universal_plane.h" static void assert_vblank_disabled(struct drm_crtc *crtc) @@ -36,6 +36,38 @@ static void assert_vblank_disabled(struct drm_crtc *crtc) drm_crtc_vblank_put(crtc); } +struct intel_crtc *intel_first_crtc(struct drm_i915_private *i915) +{ + return to_intel_crtc(drm_crtc_from_index(&i915->drm, 0)); +} + +struct intel_crtc *intel_crtc_for_pipe(struct drm_i915_private *i915, + enum pipe pipe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + if (crtc->pipe == pipe) + return crtc; + } + + return NULL; +} + +void intel_crtc_wait_for_next_vblank(struct intel_crtc *crtc) +{ + drm_crtc_wait_one_vblank(&crtc->base); +} + +void intel_wait_for_vblank_if_active(struct drm_i915_private *i915, + enum pipe pipe) +{ + struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe); + + if (crtc->active) + intel_crtc_wait_for_next_vblank(crtc); +} + u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -327,18 +359,6 @@ int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) if (ret) goto fail; - BUG_ON(pipe >= ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping) || - dev_priv->pipe_to_crtc_mapping[pipe] != NULL); - dev_priv->pipe_to_crtc_mapping[pipe] = crtc; - - if (DISPLAY_VER(dev_priv) < 9) { - enum i9xx_plane_id i9xx_plane = primary->i9xx_plane; - - BUG_ON(i9xx_plane >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || - dev_priv->plane_to_crtc_mapping[i9xx_plane] != NULL); - dev_priv->plane_to_crtc_mapping[i9xx_plane] = crtc; - } - if (DISPLAY_VER(dev_priv) >= 11) drm_crtc_create_scaling_filter_property(&crtc->base, BIT(DRM_SCALING_FILTER_DEFAULT) | diff --git a/drivers/gpu/drm/i915/display/intel_crtc.h b/drivers/gpu/drm/i915/display/intel_crtc.h index a0039fdb1eb0..73077137fb99 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.h +++ b/drivers/gpu/drm/i915/display/intel_crtc.h @@ -8,6 +8,7 @@ #include <linux/types.h> +enum i9xx_plane_id; enum pipe; struct drm_display_mode; struct drm_i915_private; @@ -28,5 +29,11 @@ void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state); void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state); void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state); void intel_wait_for_vblank_workers(struct intel_atomic_state *state); +struct intel_crtc *intel_first_crtc(struct drm_i915_private *i915); +struct intel_crtc *intel_crtc_for_pipe(struct drm_i915_private *i915, + enum pipe pipe); +void intel_wait_for_vblank_if_active(struct drm_i915_private *i915, + enum pipe pipe); +void intel_crtc_wait_for_next_vblank(struct intel_crtc *crtc); #endif diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f9e7e3d1c7d0..9c9d574f0b8c 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -25,6 +25,7 @@ * */ +#include <drm/drm_privacy_screen_consumer.h> #include <drm/drm_scdc_helper.h> #include "i915_drv.h" @@ -2905,6 +2906,7 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state, if (port == PORT_A && DISPLAY_VER(dev_priv) < 9) intel_dp_stop_link_train(intel_dp, crtc_state); + drm_connector_update_privacy_screen(conn_state); intel_edp_backlight_on(crtc_state, conn_state); if (!dig_port->lspcon.active || dig_port->dp.has_hdmi_sink) @@ -3111,6 +3113,7 @@ static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state, intel_drrs_update(intel_dp, crtc_state); intel_backlight_update(state, encoder, crtc_state, conn_state); + drm_connector_update_privacy_screen(conn_state); } void intel_ddi_update_pipe(struct intel_atomic_state *state, @@ -3922,6 +3925,19 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) return NULL; } + if (dig_port->base.type == INTEL_OUTPUT_EDP) { + struct drm_device *dev = dig_port->base.base.dev; + struct drm_privacy_screen *privacy_screen; + + privacy_screen = drm_privacy_screen_get(dev->dev, NULL); + if (!IS_ERR(privacy_screen)) { + drm_connector_attach_privacy_screen_provider(&connector->base, + privacy_screen); + } else if (PTR_ERR(privacy_screen) != -ENODEV) { + drm_warn(dev, "Error getting privacy-screen\n"); + } + } + return connector; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 78cd8f77b49d..1e689d573512 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -1032,6 +1032,21 @@ bool is_hobl_buf_trans(const struct intel_ddi_buf_trans *table) return table == &tgl_combo_phy_trans_edp_hbr2_hobl; } +static bool use_edp_hobl(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + return i915->vbt.edp.hobl && !intel_dp->hobl_failed; +} + +static bool use_edp_low_vswing(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + return i915->vbt.edp.low_vswing; +} + static const struct intel_ddi_buf_trans * intel_get_buf_trans(const struct intel_ddi_buf_trans *trans, int *num_entries) { @@ -1057,14 +1072,12 @@ bdw_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) return intel_get_buf_trans(&bdw_trans_fdi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&bdw_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return intel_get_buf_trans(&bdw_trans_edp, n_entries); else return intel_get_buf_trans(&bdw_trans_dp, n_entries); @@ -1094,12 +1107,10 @@ skl_y_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&skl_y_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return _skl_get_buf_trans_dp(encoder, &skl_y_trans_edp, n_entries); else return _skl_get_buf_trans_dp(encoder, &skl_y_trans_dp, n_entries); @@ -1110,12 +1121,10 @@ skl_u_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&skl_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return _skl_get_buf_trans_dp(encoder, &skl_u_trans_edp, n_entries); else return _skl_get_buf_trans_dp(encoder, &skl_u_trans_dp, n_entries); @@ -1126,12 +1135,10 @@ skl_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&skl_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return _skl_get_buf_trans_dp(encoder, &skl_trans_edp, n_entries); else return _skl_get_buf_trans_dp(encoder, &skl_trans_dp, n_entries); @@ -1142,12 +1149,10 @@ kbl_y_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&skl_y_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return _skl_get_buf_trans_dp(encoder, &skl_y_trans_edp, n_entries); else return _skl_get_buf_trans_dp(encoder, &kbl_y_trans_dp, n_entries); @@ -1158,12 +1163,10 @@ kbl_u_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&skl_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return _skl_get_buf_trans_dp(encoder, &skl_u_trans_edp, n_entries); else return _skl_get_buf_trans_dp(encoder, &kbl_u_trans_dp, n_entries); @@ -1174,12 +1177,10 @@ kbl_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&skl_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return _skl_get_buf_trans_dp(encoder, &skl_trans_edp, n_entries); else return _skl_get_buf_trans_dp(encoder, &kbl_trans_dp, n_entries); @@ -1190,12 +1191,10 @@ bxt_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&bxt_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - i915->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return intel_get_buf_trans(&bxt_trans_edp, n_entries); else return intel_get_buf_trans(&bxt_trans_dp, n_entries); @@ -1215,12 +1214,10 @@ icl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (crtc_state->port_clock > 540000) { return intel_get_buf_trans(&icl_combo_phy_trans_dp_hbr2_edp_hbr3, n_entries); - } else if (dev_priv->vbt.edp.low_vswing) { + } else if (use_edp_low_vswing(encoder)) { return intel_get_buf_trans(&icl_combo_phy_trans_edp_hbr2, n_entries); } @@ -1282,12 +1279,10 @@ ehl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&icl_combo_phy_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - dev_priv->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return ehl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else return intel_get_buf_trans(&ehl_combo_phy_trans_dp, n_entries); @@ -1309,12 +1304,10 @@ jsl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return intel_get_buf_trans(&icl_combo_phy_trans_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && - dev_priv->vbt.edp.low_vswing) + use_edp_low_vswing(encoder)) return jsl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else return intel_get_buf_trans(&icl_combo_phy_trans_dp_hbr2_edp_hbr3, n_entries); @@ -1346,16 +1339,13 @@ tgl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (crtc_state->port_clock > 540000) { return intel_get_buf_trans(&icl_combo_phy_trans_dp_hbr2_edp_hbr3, n_entries); - } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + } else if (use_edp_hobl(encoder)) { return intel_get_buf_trans(&tgl_combo_phy_trans_edp_hbr2_hobl, n_entries); - } else if (dev_priv->vbt.edp.low_vswing) { + } else if (use_edp_low_vswing(encoder)) { return intel_get_buf_trans(&icl_combo_phy_trans_edp_hbr2, n_entries); } @@ -1394,16 +1384,13 @@ dg1_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (crtc_state->port_clock > 540000) return intel_get_buf_trans(&icl_combo_phy_trans_dp_hbr2_edp_hbr3, n_entries); - else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) + else if (use_edp_hobl(encoder)) return intel_get_buf_trans(&tgl_combo_phy_trans_edp_hbr2_hobl, n_entries); - else if (dev_priv->vbt.edp.low_vswing) + else if (use_edp_low_vswing(encoder)) return intel_get_buf_trans(&icl_combo_phy_trans_edp_hbr2, n_entries); else @@ -1439,16 +1426,13 @@ rkl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (crtc_state->port_clock > 540000) { return intel_get_buf_trans(&icl_combo_phy_trans_dp_hbr2_edp_hbr3, n_entries); - } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + } else if (use_edp_hobl(encoder)) { return intel_get_buf_trans(&tgl_combo_phy_trans_edp_hbr2_hobl, n_entries); - } else if (dev_priv->vbt.edp.low_vswing) { + } else if (use_edp_low_vswing(encoder)) { return intel_get_buf_trans(&icl_combo_phy_trans_edp_hbr2, n_entries); } @@ -1485,14 +1469,11 @@ adls_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (crtc_state->port_clock > 540000) return intel_get_buf_trans(&adls_combo_phy_trans_edp_hbr3, n_entries); - else if (i915->vbt.edp.hobl && !intel_dp->hobl_failed) + else if (use_edp_hobl(encoder)) return intel_get_buf_trans(&tgl_combo_phy_trans_edp_hbr2_hobl, n_entries); - else if (i915->vbt.edp.low_vswing) + else if (use_edp_low_vswing(encoder)) return intel_get_buf_trans(&adls_combo_phy_trans_edp_hbr2, n_entries); else return adls_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); @@ -1527,16 +1508,13 @@ adlp_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (crtc_state->port_clock > 540000) { return intel_get_buf_trans(&adlp_combo_phy_trans_edp_hbr3, n_entries); - } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + } else if (use_edp_hobl(encoder)) { return intel_get_buf_trans(&tgl_combo_phy_trans_edp_hbr2_hobl, n_entries); - } else if (dev_priv->vbt.edp.low_vswing) { + } else if (use_edp_low_vswing(encoder)) { return intel_get_buf_trans(&adlp_combo_phy_trans_edp_up_to_hbr2, n_entries); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b2d51cd79d6c..bf7ce684dd8e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/dma-resv.h> #include <linux/slab.h> +#include <linux/vga_switcheroo.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -41,6 +42,7 @@ #include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_plane_helper.h> +#include <drm/drm_privacy_screen_consumer.h> #include <drm/drm_probe_helper.h> #include <drm/drm_rect.h> @@ -697,7 +699,7 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, * the highest stride limits of them all, * if in case pipe A is disabled, use the first pipe from pipe_mask. */ - crtc = intel_get_first_crtc(dev_priv); + crtc = intel_first_crtc(dev_priv); if (!crtc) return 0; @@ -775,7 +777,7 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc, */ if (HAS_GMCH(dev_priv) && intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -785,7 +787,7 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc, intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); intel_plane_disable_arm(plane, crtc_state); - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); } unsigned int @@ -841,7 +843,7 @@ __intel_display_resume(struct drm_device *dev, static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) { return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display && - intel_has_gpu_reset(&dev_priv->gt)); + intel_has_gpu_reset(to_gt(dev_priv))); } void intel_display_prepare_reset(struct drm_i915_private *dev_priv) @@ -860,14 +862,14 @@ void intel_display_prepare_reset(struct drm_i915_private *dev_priv) return; /* We have a modeset vs reset deadlock, defensively unbreak it. */ - set_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags); + set_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags); smp_mb__after_atomic(); - wake_up_bit(&dev_priv->gt.reset.flags, I915_RESET_MODESET); + wake_up_bit(&to_gt(dev_priv)->reset.flags, I915_RESET_MODESET); if (atomic_read(&dev_priv->gpu_error.pending_fb_pin)) { drm_dbg_kms(&dev_priv->drm, "Modeset potentially stuck, unbreaking through wedging\n"); - intel_gt_set_wedged(&dev_priv->gt); + intel_gt_set_wedged(to_gt(dev_priv)); } /* @@ -918,7 +920,7 @@ void intel_display_finish_reset(struct drm_i915_private *dev_priv) return; /* reset doesn't touch the display */ - if (!test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags)) + if (!test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags)) return; state = fetch_and_zero(&dev_priv->modeset_restore_state); @@ -956,7 +958,7 @@ unlock: drm_modeset_acquire_fini(ctx); mutex_unlock(&dev->mode_config.mutex); - clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags); + clear_bit_unlock(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags); } static void icl_set_pipe_chicken(const struct intel_crtc_state *crtc_state) @@ -991,6 +993,10 @@ static void icl_set_pipe_chicken(const struct intel_crtc_state *crtc_state) else if (DISPLAY_VER(dev_priv) >= 13) tmp |= UNDERRUN_RECOVERY_DISABLE_ADLP; + /* Wa_14010547955:dg2 */ + if (IS_DG2_DISPLAY_STEP(dev_priv, STEP_B0, STEP_FOREVER)) + tmp |= DG2_RENDER_CCSTAG_4_3_EN; + intel_de_write(dev_priv, PIPE_CHICKEN(pipe), tmp); } @@ -1011,7 +1017,7 @@ bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv) if (cleanup_done) continue; - drm_crtc_wait_one_vblank(crtc); + intel_crtc_wait_for_next_vblank(to_intel_crtc(crtc)); return true; } @@ -1158,7 +1164,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) } /* We need to wait for a vblank before we can disable the plane. */ - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); } static void intel_crtc_dpms_overlay_disable(struct intel_crtc *crtc) @@ -1293,7 +1299,7 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - return crtc_state->uapi.async_flip && intel_vtd_active() && + return crtc_state->uapi.async_flip && intel_vtd_active(i915) && (DISPLAY_VER(i915) == 9 || IS_BROADWELL(i915) || IS_HASWELL(i915)); } @@ -1389,7 +1395,6 @@ static void intel_crtc_disable_flip_done(struct intel_atomic_state *state, static void intel_crtc_async_flip_disable_wa(struct intel_atomic_state *state, struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(state->base.dev); const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); const struct intel_crtc_state *new_crtc_state = @@ -1415,7 +1420,7 @@ static void intel_crtc_async_flip_disable_wa(struct intel_atomic_state *state, } if (need_vbl_wait) - intel_wait_for_vblank(i915, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); } static void intel_pre_plane_update(struct intel_atomic_state *state, @@ -1434,7 +1439,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, hsw_disable_ips(old_crtc_state); if (intel_fbc_pre_update(state, crtc)) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); if (!needs_async_flip_vtd_wa(old_crtc_state) && needs_async_flip_vtd_wa(new_crtc_state)) @@ -1466,7 +1471,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, */ if (HAS_GMCH(dev_priv) && old_crtc_state->hw.active && new_crtc_state->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); /* * IVB workaround: must disable low power watermarks for at least @@ -1477,7 +1482,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, */ if (old_crtc_state->hw.active && new_crtc_state->disable_lp_wm && ilk_disable_lp_wm(dev_priv)) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); /* * If we're doing a modeset we don't need to do any @@ -1893,8 +1898,8 @@ static void ilk_crtc_enable(struct intel_atomic_state *state, * in case there are more corner cases we don't know about. */ if (new_crtc_state->has_pch_encoder) { - intel_wait_for_vblank(dev_priv, pipe); - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); + intel_crtc_wait_for_next_vblank(crtc); } intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true); @@ -2094,7 +2099,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, intel_encoders_enable(state, crtc); if (psl_clkgate_wa) { - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); } @@ -2102,8 +2107,12 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, * to change the workaround. */ hsw_workaround_pipe = new_crtc_state->hsw_workaround_pipe; if (IS_HASWELL(dev_priv) && hsw_workaround_pipe != INVALID_PIPE) { - intel_wait_for_vblank(dev_priv, hsw_workaround_pipe); - intel_wait_for_vblank(dev_priv, hsw_workaround_pipe); + struct intel_crtc *wa_crtc; + + wa_crtc = intel_crtc_for_pipe(dev_priv, hsw_workaround_pipe); + + intel_crtc_wait_for_next_vblank(wa_crtc); + intel_crtc_wait_for_next_vblank(wa_crtc); } } @@ -2529,7 +2538,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state, /* prevents spurious underruns */ if (DISPLAY_VER(dev_priv) == 2) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); } static void i9xx_pfit_disable(const struct intel_crtc_state *old_crtc_state) @@ -2560,7 +2569,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state *state, * wait for planes to fully turn off before disabling the pipe. */ if (DISPLAY_VER(dev_priv) == 2) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); intel_encoders_disable(state, crtc); @@ -4645,7 +4654,8 @@ found: drm_atomic_state_put(state); /* let the connector get through one full cycle before testing */ - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); + return true; fail: @@ -4830,7 +4840,7 @@ intel_encoder_current_mode(struct intel_encoder *encoder) if (!encoder->get_hw_state(encoder, &pipe)) return NULL; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc = intel_crtc_for_pipe(dev_priv, pipe); mode = kzalloc(sizeof(*mode), GFP_KERNEL); if (!mode) @@ -5159,13 +5169,13 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) if (icl_is_hdr_plane(dev_priv, plane->id)) { if (linked->id == PLANE_SPRITE5) - plane_state->cus_ctl |= PLANE_CUS_PLANE_7; + plane_state->cus_ctl |= PLANE_CUS_Y_PLANE_7_ICL; else if (linked->id == PLANE_SPRITE4) - plane_state->cus_ctl |= PLANE_CUS_PLANE_6; + plane_state->cus_ctl |= PLANE_CUS_Y_PLANE_6_ICL; else if (linked->id == PLANE_SPRITE3) - plane_state->cus_ctl |= PLANE_CUS_PLANE_5_RKL; + plane_state->cus_ctl |= PLANE_CUS_Y_PLANE_5_RKL; else if (linked->id == PLANE_SPRITE2) - plane_state->cus_ctl |= PLANE_CUS_PLANE_4_RKL; + plane_state->cus_ctl |= PLANE_CUS_Y_PLANE_4_RKL; else MISSING_CASE(linked->id); } @@ -7546,59 +7556,6 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state) return 0; } -static int intel_atomic_check_cdclk(struct intel_atomic_state *state, - bool *need_cdclk_calc) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - const struct intel_cdclk_state *old_cdclk_state; - const struct intel_cdclk_state *new_cdclk_state; - struct intel_plane_state *plane_state; - struct intel_bw_state *new_bw_state; - struct intel_plane *plane; - int min_cdclk = 0; - enum pipe pipe; - int ret; - int i; - /* - * active_planes bitmask has been updated, and potentially - * affected planes are part of the state. We can now - * compute the minimum cdclk for each plane. - */ - for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - ret = intel_plane_calc_min_cdclk(state, plane, need_cdclk_calc); - if (ret) - return ret; - } - - old_cdclk_state = intel_atomic_get_old_cdclk_state(state); - new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - - if (new_cdclk_state && - old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) - *need_cdclk_calc = true; - - ret = intel_cdclk_bw_calc_min_cdclk(state); - if (ret) - return ret; - - new_bw_state = intel_atomic_get_new_bw_state(state); - - if (!new_cdclk_state || !new_bw_state) - return 0; - - for_each_pipe(dev_priv, pipe) { - min_cdclk = max(new_cdclk_state->min_cdclk[pipe], min_cdclk); - - /* - * Currently do this change only if we need to increase - */ - if (new_bw_state->min_cdclk > min_cdclk) - *need_cdclk_calc = true; - } - - return 0; -} - static int intel_atomic_check_crtcs(struct intel_atomic_state *state) { struct intel_crtc_state *crtc_state; @@ -8039,7 +7996,6 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - intel_fbc_choose_crtc(dev_priv, state); ret = intel_compute_global_watermarks(state); if (ret) goto fail; @@ -8048,7 +8004,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = intel_atomic_check_cdclk(state, &any_ms); + ret = intel_cdclk_atomic_check(state, &any_ms); if (ret) goto fail; @@ -8071,6 +8027,10 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + ret = intel_fbc_atomic_check(state); + if (ret) + goto fail; + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (new_crtc_state->uapi.async_flip) { @@ -8462,7 +8422,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb, &old_crtc_state->wm.skl.ddb) && (update_pipes | modeset_pipes)) - intel_wait_for_vblank(dev_priv, pipe); + intel_crtc_wait_for_next_vblank(crtc); } } @@ -8553,19 +8513,19 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat for (;;) { prepare_to_wait(&intel_state->commit_ready.wait, &wait_fence, TASK_UNINTERRUPTIBLE); - prepare_to_wait(bit_waitqueue(&dev_priv->gt.reset.flags, + prepare_to_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags, I915_RESET_MODESET), &wait_reset, TASK_UNINTERRUPTIBLE); if (i915_sw_fence_done(&intel_state->commit_ready) || - test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags)) + test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags)) break; schedule(); } finish_wait(&intel_state->commit_ready.wait, &wait_fence); - finish_wait(bit_waitqueue(&dev_priv->gt.reset.flags, + finish_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags, I915_RESET_MODESET), &wait_reset); } @@ -8815,7 +8775,7 @@ static void intel_atomic_commit_work(struct work_struct *work) intel_atomic_commit_tail(state); } -static int __i915_sw_fence_call +static int intel_atomic_commit_ready(struct i915_sw_fence *fence, enum i915_sw_fence_notify notify) { @@ -8962,8 +8922,8 @@ static void intel_plane_possible_crtcs_init(struct drm_i915_private *dev_priv) struct intel_plane *plane; for_each_intel_plane(&dev_priv->drm, plane) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, - plane->pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, + plane->pipe); plane->base.possible_crtcs = drm_crtc_mask(&crtc->base); } @@ -9956,7 +9916,7 @@ int intel_modeset_init(struct drm_i915_private *i915) void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); /* 640x480@60Hz, ~25175 kHz */ struct dpll clock = { .m1 = 18, @@ -10029,7 +9989,7 @@ void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); drm_dbg_kms(&dev_priv->drm, "disabling pipe %c due to force quirk\n", pipe_name(pipe)); @@ -10081,7 +10041,7 @@ intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv) "[PLANE:%d:%s] attached to the wrong pipe, disabling plane\n", plane->base.base.id, plane->base.name); - plane_crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + plane_crtc = intel_crtc_for_pipe(dev_priv, pipe); intel_plane_disable_noatomic(plane_crtc, plane); } } @@ -10334,7 +10294,7 @@ static void readout_plane_state(struct drm_i915_private *dev_priv) visible = plane->get_hw_state(plane, &pipe); - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc = intel_crtc_for_pipe(dev_priv, pipe); crtc_state = to_intel_crtc_state(crtc->base.state); intel_set_plane_visible(crtc_state, plane_state, visible); @@ -10401,7 +10361,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc = intel_crtc_for_pipe(dev_priv, pipe); crtc_state = to_intel_crtc_state(crtc->base.state); encoder->base.crtc = &crtc->base; @@ -10854,6 +10814,27 @@ void intel_modeset_driver_remove_nogem(struct drm_i915_private *i915) intel_bios_driver_remove(i915); } +bool intel_modeset_probe_defer(struct pci_dev *pdev) +{ + struct drm_privacy_screen *privacy_screen; + + /* + * apple-gmux is needed on dual GPU MacBook Pro + * to probe the panel if we're the inactive GPU. + */ + if (vga_switcheroo_client_probe_defer(pdev)) + return true; + + /* If the LCD panel has a privacy-screen, wait for it */ + privacy_screen = drm_privacy_screen_get(&pdev->dev, NULL); + if (IS_ERR(privacy_screen) && PTR_ERR(privacy_screen) == -EPROBE_DEFER) + return true; + + drm_privacy_screen_put(privacy_screen); + + return false; +} + void intel_display_driver_register(struct drm_i915_private *i915) { if (!HAS_DISPLAY(i915)) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 38c15ec30ee7..b61b75248ded 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -57,6 +57,7 @@ struct intel_plane; struct intel_plane_state; struct intel_remapped_info; struct intel_rotation_info; +struct pci_dev; enum i915_gpio { GPIOA, @@ -346,9 +347,33 @@ enum phy_fia { FIA3, }; +enum hpd_pin { + HPD_NONE = 0, + HPD_TV = HPD_NONE, /* TV is known to be unreliable */ + HPD_CRT, + HPD_SDVO_B, + HPD_SDVO_C, + HPD_PORT_A, + HPD_PORT_B, + HPD_PORT_C, + HPD_PORT_D, + HPD_PORT_E, + HPD_PORT_TC1, + HPD_PORT_TC2, + HPD_PORT_TC3, + HPD_PORT_TC4, + HPD_PORT_TC5, + HPD_PORT_TC6, + + HPD_NUM_PINS +}; + +#define for_each_hpd_pin(__pin) \ + for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) + #define for_each_pipe(__dev_priv, __p) \ for ((__p) = 0; (__p) < I915_MAX_PIPES; (__p)++) \ - for_each_if(INTEL_INFO(__dev_priv)->pipe_mask & BIT(__p)) + for_each_if(INTEL_INFO(__dev_priv)->display.pipe_mask & BIT(__p)) #define for_each_pipe_masked(__dev_priv, __p, __mask) \ for_each_pipe(__dev_priv, __p) \ @@ -356,7 +381,7 @@ enum phy_fia { #define for_each_cpu_transcoder(__dev_priv, __t) \ for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++) \ - for_each_if (INTEL_INFO(__dev_priv)->cpu_transcoder_mask & BIT(__t)) + for_each_if (INTEL_INFO(__dev_priv)->display.cpu_transcoder_mask & BIT(__t)) #define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ for_each_cpu_transcoder(__dev_priv, __t) \ @@ -616,6 +641,7 @@ void intel_display_driver_register(struct drm_i915_private *i915); void intel_display_driver_unregister(struct drm_i915_private *i915); /* modesetting */ +bool intel_modeset_probe_defer(struct pci_dev *pdev); void intel_modeset_init_hw(struct drm_i915_private *i915); int intel_modeset_init_noirq(struct drm_i915_private *i915); int intel_modeset_init_nogem(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index acf70ae66a29..572445299b04 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -40,52 +40,6 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) return 0; } -static int i915_fbc_status(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_fbc *fbc = &dev_priv->fbc; - intel_wakeref_t wakeref; - - if (!HAS_FBC(dev_priv)) - return -ENODEV; - - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&fbc->lock); - - if (intel_fbc_is_active(fbc)) { - seq_puts(m, "FBC enabled\n"); - seq_printf(m, "Compressing: %s\n", - yesno(intel_fbc_is_compressing(fbc))); - } else { - seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); - } - - mutex_unlock(&fbc->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - - return 0; -} - -static int i915_fbc_false_color_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->fbc.false_color; - - return 0; -} - -static int i915_fbc_false_color_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - - return intel_fbc_set_false_color(&dev_priv->fbc, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops, - i915_fbc_false_color_get, i915_fbc_false_color_set, - "%llu\n"); - static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2044,9 +1998,7 @@ i915_fifo_underrun_reset_write(struct file *filp, return ret; } - ret = intel_fbc_reset_underrun(&dev_priv->fbc); - if (ret) - return ret; + intel_fbc_reset_underrun(dev_priv); return cnt; } @@ -2060,7 +2012,6 @@ static const struct file_operations i915_fifo_underrun_reset_ops = { static const struct drm_info_list intel_display_debugfs_list[] = { {"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0}, - {"i915_fbc_status", i915_fbc_status, 0}, {"i915_ips_status", i915_ips_status, 0}, {"i915_sr_status", i915_sr_status, 0}, {"i915_opregion", i915_opregion, 0}, @@ -2085,7 +2036,6 @@ static const struct { {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, {"i915_spr_wm_latency", &i915_spr_wm_latency_fops}, {"i915_cur_wm_latency", &i915_cur_wm_latency_fops}, - {"i915_fbc_false_color", &i915_fbc_false_color_fops}, {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, @@ -2112,6 +2062,8 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) drm_debugfs_create_files(intel_display_debugfs_list, ARRAY_SIZE(intel_display_debugfs_list), minor->debugfs_root, minor); + + intel_fbc_debugfs_register(i915); } static int i915_panel_show(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 229b4c127c6c..05babdcf5f2e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -5370,7 +5370,7 @@ static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv) static void icl_mbus_init(struct drm_i915_private *dev_priv) { - unsigned long abox_regs = INTEL_INFO(dev_priv)->abox_mask; + unsigned long abox_regs = INTEL_INFO(dev_priv)->display.abox_mask; u32 mask, val, i; if (IS_ALDERLAKE_P(dev_priv)) @@ -5830,7 +5830,7 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) enum intel_dram_type type = dev_priv->dram_info.type; u8 num_channels = dev_priv->dram_info.num_channels; const struct buddy_page_mask *table; - unsigned long abox_mask = INTEL_INFO(dev_priv)->abox_mask; + unsigned long abox_mask = INTEL_INFO(dev_priv)->display.abox_mask; int config, i; /* BW_BUDDY registers are not used on dgpu's beyond DG1 */ diff --git a/drivers/gpu/drm/i915/display/intel_display_trace.c b/drivers/gpu/drm/i915/display/intel_display_trace.c new file mode 100644 index 000000000000..737979ada869 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_trace.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "intel_display_trace.h" +#endif diff --git a/drivers/gpu/drm/i915/display/intel_display_trace.h b/drivers/gpu/drm/i915/display/intel_display_trace.h new file mode 100644 index 000000000000..4043e1276383 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_trace.h @@ -0,0 +1,587 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2021 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 + +#if !defined(__INTEL_DISPLAY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __INTEL_DISPLAY_TRACE_H__ + +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include "i915_drv.h" +#include "intel_crtc.h" +#include "intel_display_types.h" + +TRACE_EVENT(intel_pipe_enable, + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(enum pipe, pipe) + ), + TP_fast_assign( + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); + } + __entry->pipe = crtc->pipe; + ), + + TP_printk("pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + +TRACE_EVENT(intel_pipe_disable, + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(enum pipe, pipe) + ), + + TP_fast_assign( + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); + } + __entry->pipe = crtc->pipe; + ), + + TP_printk("pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + +TRACE_EVENT(intel_pipe_crc, + TP_PROTO(struct intel_crtc *crtc, const u32 *crcs), + TP_ARGS(crtc, crcs), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __array(u32, crcs, 5) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->crcs, crcs, sizeof(__entry->crcs)); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u crc=%08x %08x %08x %08x %08x", + pipe_name(__entry->pipe), __entry->frame, __entry->scanline, + __entry->crcs[0], __entry->crcs[1], __entry->crcs[2], + __entry->crcs[3], __entry->crcs[4]) +); + +TRACE_EVENT(intel_cpu_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), + TP_ARGS(dev_priv, pipe), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); + __entry->pipe = pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_pch_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pch_transcoder), + TP_ARGS(dev_priv, pch_transcoder), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + enum pipe pipe = pch_transcoder; + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); + __entry->pipe = pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pch transcoder %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_memory_cxsr, + TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), + TP_ARGS(dev_priv, old, new), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(bool, old) + __field(bool, new) + ), + + TP_fast_assign( + struct intel_crtc *crtc; + for_each_intel_crtc(&dev_priv->drm, crtc) { + __entry->frame[crtc->pipe] = intel_crtc_get_vblank_counter(crtc); + __entry->scanline[crtc->pipe] = intel_get_crtc_scanline(crtc); + } + __entry->old = old; + __entry->new = new; + ), + + TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + onoff(__entry->old), onoff(__entry->new), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + +TRACE_EVENT(g4x_wm, + TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u16, primary) + __field(u16, sprite) + __field(u16, cursor) + __field(u16, sr_plane) + __field(u16, sr_cursor) + __field(u16, sr_fbc) + __field(u16, hpll_plane) + __field(u16, hpll_cursor) + __field(u16, hpll_fbc) + __field(bool, cxsr) + __field(bool, hpll) + __field(bool, fbc) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + __entry->sr_fbc = wm->sr.fbc; + __entry->hpll_plane = wm->hpll.plane; + __entry->hpll_cursor = wm->hpll.cursor; + __entry->hpll_fbc = wm->hpll.fbc; + __entry->cxsr = wm->cxsr; + __entry->hpll = wm->hpll_en; + __entry->fbc = wm->fbc_en; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s", + pipe_name(__entry->pipe), __entry->frame, __entry->scanline, + __entry->primary, __entry->sprite, __entry->cursor, + yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc, + yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc, + yesno(__entry->fbc)) +); + +TRACE_EVENT(vlv_wm, + TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, level) + __field(u32, cxsr) + __field(u32, primary) + __field(u32, sprite0) + __field(u32, sprite1) + __field(u32, cursor) + __field(u32, sr_plane) + __field(u32, sr_cursor) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->level = wm->level; + __entry->cxsr = wm->cxsr; + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite0 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->sprite1 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE1]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->level, __entry->cxsr, + __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor, + __entry->sr_plane, __entry->sr_cursor) +); + +TRACE_EVENT(vlv_fifo_size, + TP_PROTO(struct intel_crtc *crtc, u32 sprite0_start, u32 sprite1_start, u32 fifo_size), + TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, sprite0_start) + __field(u32, sprite1_start) + __field(u32, fifo_size) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->sprite0_start = sprite0_start; + __entry->sprite1_start = sprite1_start; + __entry->fifo_size = fifo_size; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->sprite0_start, + __entry->sprite1_start, __entry->fifo_size) +); + +TRACE_EVENT(intel_plane_update_noarm, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __array(int, src, 4) + __array(int, dst, 4) + __string(name, plane->name) + ), + + TP_fast_assign( + __assign_str(name, plane->name); + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); + memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, + pipe_name(__entry->pipe), __get_str(name), + __entry->frame, __entry->scanline, + DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), + DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) +); + +TRACE_EVENT(intel_plane_update_arm, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __array(int, src, 4) + __array(int, dst, 4) + __string(name, plane->name) + ), + + TP_fast_assign( + __assign_str(name, plane->name); + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); + memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, + pipe_name(__entry->pipe), __get_str(name), + __entry->frame, __entry->scanline, + DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), + DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) +); + +TRACE_EVENT(intel_plane_disable_arm, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __string(name, plane->name) + ), + + TP_fast_assign( + __assign_str(name, plane->name); + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __get_str(name), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_fbc_activate, + TP_PROTO(struct intel_plane *plane), + TP_ARGS(plane), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(plane->base.dev), + plane->pipe); + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_fbc_deactivate, + TP_PROTO(struct intel_plane *plane), + TP_ARGS(plane), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(plane->base.dev), + plane->pipe); + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_fbc_nuke, + TP_PROTO(struct intel_plane *plane), + TP_ARGS(plane), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(plane->base.dev), + plane->pipe); + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_crtc_vblank_work_start, + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline) +); + +TRACE_EVENT(intel_crtc_vblank_work_end, + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline) +); + +TRACE_EVENT(intel_pipe_update_start, + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, min) + __field(u32, max) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->min = crtc->debug.min_vbl; + __entry->max = crtc->debug.max_vbl; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->min, __entry->max) +); + +TRACE_EVENT(intel_pipe_update_vblank_evaded, + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, min) + __field(u32, max) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->debug.start_vbl_count; + __entry->scanline = crtc->debug.scanline_start; + __entry->min = crtc->debug.min_vbl; + __entry->max = crtc->debug.max_vbl; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->min, __entry->max) +); + +TRACE_EVENT(intel_pipe_update_end, + TP_PROTO(struct intel_crtc *crtc, u32 frame, int scanline_end), + TP_ARGS(crtc, frame, scanline_end), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = frame; + __entry->scanline = scanline_end; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline) +); + +TRACE_EVENT(intel_frontbuffer_invalidate, + TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin), + TP_ARGS(frontbuffer_bits, origin), + + TP_STRUCT__entry( + __field(unsigned int, frontbuffer_bits) + __field(unsigned int, origin) + ), + + TP_fast_assign( + __entry->frontbuffer_bits = frontbuffer_bits; + __entry->origin = origin; + ), + + TP_printk("frontbuffer_bits=0x%08x, origin=%u", + __entry->frontbuffer_bits, __entry->origin) +); + +TRACE_EVENT(intel_frontbuffer_flush, + TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin), + TP_ARGS(frontbuffer_bits, origin), + + TP_STRUCT__entry( + __field(unsigned int, frontbuffer_bits) + __field(unsigned int, origin) + ), + + TP_fast_assign( + __entry->frontbuffer_bits = frontbuffer_bits; + __entry->origin = origin; + ), + + TP_printk("frontbuffer_bits=0x%08x, origin=%u", + __entry->frontbuffer_bits, __entry->origin) +); + +#endif /* __INTEL_DISPLAY_TRACE_H__ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915/display +#define TRACE_INCLUDE_FILE intel_display_trace +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ea1e8a6e10b0..c9c6efadf8b4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -36,6 +36,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_dp_dual_mode_helper.h> #include <drm/drm_dp_mst_helper.h> +#include <drm/drm_dsc.h> #include <drm/drm_encoder.h> #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> @@ -46,12 +47,19 @@ #include <drm/i915_mei_hdcp_interface.h> #include <media/cec-notifier.h> -#include "i915_drv.h" +#include "i915_vma.h" +#include "i915_vma_types.h" +#include "intel_bios.h" +#include "intel_display.h" +#include "intel_display_power.h" +#include "intel_dpll_mgr.h" +#include "intel_pm_types.h" struct drm_printer; struct __intel_global_objs_state; struct intel_ddi_buf_trans; struct intel_fbc; +struct intel_connector; /* * Display related stuff @@ -687,6 +695,8 @@ struct intel_plane_state { /* Clear Color Value */ u64 ccval; + + const char *no_fbc_reason; }; struct intel_initial_plane_config { @@ -1117,8 +1127,6 @@ struct intel_crtc_state { bool crc_enabled; - bool enable_fbc; - bool double_wide; int pbn; @@ -1653,6 +1661,9 @@ struct intel_dp { struct intel_dp_pcon_frl frl; struct intel_psr psr; + + /* When we last wrote the OUI for eDP */ + unsigned long last_oui_write; }; enum lspcon_vendor { @@ -1770,35 +1781,6 @@ vlv_pipe_to_channel(enum pipe pipe) } } -static inline bool intel_pipe_valid(struct drm_i915_private *i915, enum pipe pipe) -{ - return (pipe >= 0 && - pipe < ARRAY_SIZE(i915->pipe_to_crtc_mapping) && - INTEL_INFO(i915)->pipe_mask & BIT(pipe) && - i915->pipe_to_crtc_mapping[pipe]); -} - -static inline struct intel_crtc * -intel_get_first_crtc(struct drm_i915_private *dev_priv) -{ - return to_intel_crtc(drm_crtc_from_index(&dev_priv->drm, 0)); -} - -static inline struct intel_crtc * -intel_get_crtc_for_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) -{ - /* pipe_to_crtc_mapping may have hole on any of 3 display pipe system */ - drm_WARN_ON(&dev_priv->drm, - !(INTEL_INFO(dev_priv)->pipe_mask & BIT(pipe))); - return dev_priv->pipe_to_crtc_mapping[pipe]; -} - -static inline struct intel_crtc * -intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum i9xx_plane_id plane) -{ - return dev_priv->plane_to_crtc_mapping[plane]; -} - struct intel_load_detect_pipe { struct drm_atomic_state *restore_state; }; @@ -1908,11 +1890,7 @@ dp_to_lspcon(struct intel_dp *intel_dp) return &dp_to_dig_port(intel_dp)->lspcon; } -static inline struct drm_i915_private * -dp_to_i915(struct intel_dp *intel_dp) -{ - return to_i915(dp_to_dig_port(intel_dp)->base.base.dev); -} +#define dp_to_i915(__intel_dp) to_i915(dp_to_dig_port(__intel_dp)->base.base.dev) #define CAN_PSR(intel_dp) ((intel_dp)->psr.sink_support && \ (intel_dp)->psr.source_support) @@ -2016,33 +1994,6 @@ intel_crtc_needs_modeset(const struct intel_crtc_state *crtc_state) return drm_atomic_crtc_needs_modeset(&crtc_state->uapi); } -static inline void -intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) -{ - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - drm_crtc_wait_one_vblank(&crtc->base); -} - -static inline void -intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pipe) -{ - const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - if (crtc->active) - intel_wait_for_vblank(dev_priv, pipe); -} - -static inline bool intel_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) -{ - return DISPLAY_VER(i915) >= 13 && modifier != DRM_FORMAT_MOD_LINEAR; -} - -static inline bool intel_fb_uses_dpt(const struct drm_framebuffer *fb) -{ - return fb && intel_modifier_uses_dpt(to_i915(fb->dev), fb->modifier); -} - static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state) { return i915_ggtt_offset(plane_state->ggtt_vma); diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 2dc9d632969d..a69b28d65a9b 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -45,8 +45,10 @@ #define GEN12_DMC_MAX_FW_SIZE ICL_DMC_MAX_FW_SIZE -#define ADLP_DMC_PATH DMC_PATH(adlp, 2, 12) -#define ADLP_DMC_VERSION_REQUIRED DMC_VERSION(2, 12) +#define GEN13_DMC_MAX_FW_SIZE 0x20000 + +#define ADLP_DMC_PATH DMC_PATH(adlp, 2, 14) +#define ADLP_DMC_VERSION_REQUIRED DMC_VERSION(2, 14) MODULE_FIRMWARE(ADLP_DMC_PATH); #define ADLS_DMC_PATH DMC_PATH(adls, 2, 01) @@ -596,7 +598,7 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, continue; offset = readcount + dmc->dmc_info[id].dmc_offset * 4; - if (fw->size - offset < 0) { + if (offset > fw->size) { drm_err(&dev_priv->drm, "Reading beyond the fw_size\n"); continue; } @@ -682,7 +684,7 @@ void intel_dmc_ucode_init(struct drm_i915_private *dev_priv) if (IS_ALDERLAKE_P(dev_priv)) { dmc->fw_path = ADLP_DMC_PATH; dmc->required_version = ADLP_DMC_VERSION_REQUIRED; - dmc->max_fw_size = GEN12_DMC_MAX_FW_SIZE; + dmc->max_fw_size = GEN13_DMC_MAX_FW_SIZE; } else if (IS_ALDERLAKE_S(dev_priv)) { dmc->fw_path = ADLS_DMC_PATH; dmc->required_version = ADLS_DMC_VERSION_REQUIRED; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 0a424bf69396..b5e2508db1cf 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -29,6 +29,7 @@ #include <linux/i2c.h> #include <linux/notifier.h> #include <linux/slab.h> +#include <linux/timekeeping.h> #include <linux/types.h> #include <asm/byteorder.h> @@ -46,6 +47,7 @@ #include "intel_audio.h" #include "intel_backlight.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_ddi.h" #include "intel_de.h" #include "intel_display_types.h" @@ -2010,6 +2012,16 @@ intel_edp_init_source_oui(struct intel_dp *intel_dp, bool careful) if (drm_dp_dpcd_write(&intel_dp->aux, DP_SOURCE_OUI, oui, sizeof(oui)) < 0) drm_err(&i915->drm, "Failed to write source OUI\n"); + + intel_dp->last_oui_write = jiffies; +} + +void intel_dp_wait_source_oui(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + drm_dbg_kms(&i915->drm, "Performing OUI wait\n"); + wait_remaining_ms_from_jiffies(intel_dp->last_oui_write, 30); } /* If the device supports it, try to set the power state appropriately */ @@ -3894,7 +3906,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder, to_intel_crtc_state(crtc->base.state); /* Keep underrun reporting disabled until things are stable */ - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); if (crtc_state->has_pch_encoder) diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index ce229026dc91..b64145a3869a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -119,4 +119,6 @@ void intel_dp_pcon_dsc_configure(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_dp_phy_test(struct intel_encoder *encoder); +void intel_dp_wait_source_oui(struct intel_dp *intel_dp); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 8b9c925c4c16..97cf3cac0105 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -34,8 +34,10 @@ * for some reason. */ +#include "i915_drv.h" #include "intel_backlight.h" #include "intel_display_types.h" +#include "intel_dp.h" #include "intel_dp_aux_backlight.h" /* TODO: @@ -106,6 +108,8 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) int ret; u8 tcon_cap[4]; + intel_dp_wait_source_oui(intel_dp); + ret = drm_dp_dpcd_read(aux, INTEL_EDP_HDR_TCON_CAP0, tcon_cap, sizeof(tcon_cap)); if (ret != sizeof(tcon_cap)) return false; @@ -204,6 +208,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, int ret; u8 old_ctrl, ctrl; + intel_dp_wait_source_oui(intel_dp); + ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl); if (ret != 1) { drm_err(&i915->drm, "Failed to read current backlight control mode: %d\n", ret); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index e264467de8ed..9451f336f28f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -21,11 +21,11 @@ * IN THE SOFTWARE. */ +#include "i915_drv.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" - static void intel_dp_reset_lttpr_common_caps(struct intel_dp *intel_dp) { memset(intel_dp->lttpr_common_caps, 0, sizeof(intel_dp->lttpr_common_caps)); diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 04a7af8340ca..1ce0c171f4fb 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -1823,7 +1823,7 @@ void chv_enable_pll(const struct intel_crtc_state *crtc_state) int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, const struct dpll *dpll) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); struct intel_crtc_state *crtc_state; crtc_state = intel_crtc_state_alloc(crtc); diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 963ca7155b06..8f674745e7e0 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -264,7 +264,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm = &dpt->vm; - vm->gt = &i915->gt; + vm->gt = to_gt(i915); vm->i915 = i915; vm->dma = i915->drm.dev; vm->total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; @@ -279,8 +279,6 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma = dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->vma_ops.set_pages = ggtt_set_pages; - vm->vma_ops.clear_pages = clear_pages; vm->pte_encode = gen8_ggtt_pte_encode; diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c index 6b0301ba046e..a50422e03a7e 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.c +++ b/drivers/gpu/drm/i915/display/intel_dsi.c @@ -4,6 +4,8 @@ */ #include <drm/drm_mipi_dsi.h> + +#include "i915_drv.h" #include "intel_dsi.h" #include "intel_panel.h" diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index c4a743d0913f..23cfe2e5ce2a 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -6,6 +6,7 @@ #include <drm/drm_framebuffer.h> #include <drm/drm_modeset_helper.h> +#include "i915_drv.h" #include "intel_display.h" #include "intel_display_types.h" #include "intel_dpt.h" @@ -658,6 +659,16 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) } } +static bool intel_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) +{ + return DISPLAY_VER(i915) >= 13 && modifier != DRM_FORMAT_MOD_LINEAR; +} + +bool intel_fb_uses_dpt(const struct drm_framebuffer *fb) +{ + return fb && intel_modifier_uses_dpt(to_i915(fb->dev), fb->modifier); +} + unsigned int intel_cursor_alignment(const struct drm_i915_private *i915) { if (IS_I830(i915)) diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index b54997175d6d..ba9df8986c1e 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -90,4 +90,6 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, const struct drm_mode_fb_cmd2 *user_mode_cmd); +bool intel_fb_uses_dpt(const struct drm_framebuffer *fb); + #endif /* __INTEL_FB_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 3b20f69e0240..31c15e5fca95 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -7,13 +7,13 @@ * DOC: display pinning helpers */ -#include "intel_display_types.h" -#include "intel_fb_pin.h" -#include "intel_fb.h" +#include "gem/i915_gem_object.h" +#include "i915_drv.h" +#include "intel_display_types.h" #include "intel_dpt.h" - -#include "gem/i915_gem_object.h" +#include "intel_fb.h" +#include "intel_fb_pin.h" static struct i915_vma * intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index d0c34bc3af6c..8be01b93015f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -41,9 +41,10 @@ #include <drm/drm_fourcc.h> #include "i915_drv.h" -#include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_cdclk.h" #include "intel_de.h" +#include "intel_display_trace.h" #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -58,19 +59,53 @@ struct intel_fbc_funcs { void (*set_false_color)(struct intel_fbc *fbc, bool enable); }; -/* - * For SKL+, the plane source size used by the hardware is based on the value we - * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value - * we wrote to PIPESRC. - */ -static void intel_fbc_get_plane_source_size(const struct intel_fbc_state_cache *cache, - int *width, int *height) -{ - if (width) - *width = cache->plane.src_w; - if (height) - *height = cache->plane.src_h; -} +struct intel_fbc_state { + struct intel_plane *plane; + unsigned int cfb_stride; + unsigned int cfb_size; + unsigned int fence_y_offset; + u16 override_cfb_stride; + u16 interval; + s8 fence_id; +}; + +struct intel_fbc { + struct drm_i915_private *i915; + const struct intel_fbc_funcs *funcs; + + /* + * This is always the inner lock when overlapping with + * struct_mutex and it's the outer lock when overlapping + * with stolen_lock. + */ + struct mutex lock; + unsigned int possible_framebuffer_bits; + unsigned int busy_bits; + + struct drm_mm_node compressed_fb; + struct drm_mm_node compressed_llb; + + u8 limit; + + bool false_color; + + bool active; + bool activated; + bool flip_pending; + + bool underrun_detected; + struct work_struct underrun_work; + + /* + * This structure contains everything that's relevant to program the + * hardware registers. When we want to figure out if we need to disable + * and re-enable FBC for a new configuration we just check if there's + * something different in the struct. The genx_fbc_activate functions + * are supposed to read from it in order to program the registers. + */ + struct intel_fbc_state state; + const char *no_fbc_reason; +}; /* plane stride in pixels */ static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane_state) @@ -86,25 +121,25 @@ static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane } /* plane stride based cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int _intel_fbc_cfb_stride(const struct intel_fbc_state_cache *cache) +static unsigned int _intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) { unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ - return cache->fb.stride * cpp; + return intel_fbc_plane_stride(plane_state) * cpp; } /* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int skl_fbc_min_cfb_stride(struct intel_fbc *fbc, - const struct intel_fbc_state_cache *cache) +static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = fbc->i915; + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); unsigned int limit = 4; /* 1:4 compression limit is the worst case */ unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ + unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; unsigned int height = 4; /* FBC segment is 4 lines */ unsigned int stride; /* minimum segment stride we can use */ - stride = cache->plane.src_w * cpp * height / limit; + stride = width * cpp * height / limit; /* * Wa_16011863758: icl+ @@ -124,11 +159,10 @@ static unsigned int skl_fbc_min_cfb_stride(struct intel_fbc *fbc, } /* properly aligned cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int intel_fbc_cfb_stride(struct intel_fbc *fbc, - const struct intel_fbc_state_cache *cache) +static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = fbc->i915; - unsigned int stride = _intel_fbc_cfb_stride(cache); + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + unsigned int stride = _intel_fbc_cfb_stride(plane_state); /* * At least some of the platforms require each 4 line segment to @@ -136,33 +170,53 @@ static unsigned int intel_fbc_cfb_stride(struct intel_fbc *fbc, * that regardless of the compression limit we choose later. */ if (DISPLAY_VER(i915) >= 9) - return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(fbc, cache)); + return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(plane_state)); else return stride; } -static unsigned int intel_fbc_cfb_size(struct intel_fbc *fbc, - const struct intel_fbc_state_cache *cache) +static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = fbc->i915; - int lines = cache->plane.src_h; + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + int lines = drm_rect_height(&plane_state->uapi.src) >> 16; if (DISPLAY_VER(i915) == 7) lines = min(lines, 2048); else if (DISPLAY_VER(i915) >= 8) lines = min(lines, 2560); - return lines * intel_fbc_cfb_stride(fbc, cache); + return lines * intel_fbc_cfb_stride(plane_state); +} + +static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + unsigned int stride_aligned = intel_fbc_cfb_stride(plane_state); + unsigned int stride = _intel_fbc_cfb_stride(plane_state); + const struct drm_framebuffer *fb = plane_state->hw.fb; + + /* + * Override stride in 64 byte units per 4 line segment. + * + * Gen9 hw miscalculates cfb stride for linear as + * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so + * we always need to use the override there. + */ + if (stride != stride_aligned || + (DISPLAY_VER(i915) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR)) + return stride_aligned * 4 / 64; + + return 0; } static u32 i8xx_fbc_ctl(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; unsigned int cfb_stride; u32 fbc_ctl; - cfb_stride = params->cfb_stride / fbc->limit; + cfb_stride = fbc_state->cfb_stride / fbc->limit; /* FBC_CTL wants 32B or 64B units */ if (DISPLAY_VER(i915) == 2) @@ -171,27 +225,27 @@ static u32 i8xx_fbc_ctl(struct intel_fbc *fbc) cfb_stride = (cfb_stride / 64) - 1; fbc_ctl = FBC_CTL_PERIODIC | - FBC_CTL_INTERVAL(params->interval) | + FBC_CTL_INTERVAL(fbc_state->interval) | FBC_CTL_STRIDE(cfb_stride); if (IS_I945GM(i915)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ - if (params->fence_id >= 0) - fbc_ctl |= FBC_CTL_FENCENO(params->fence_id); + if (fbc_state->fence_id >= 0) + fbc_ctl |= FBC_CTL_FENCENO(fbc_state->fence_id); return fbc_ctl; } static u32 i965_fbc_ctl2(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; u32 fbc_ctl2; fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | - FBC_CTL_PLANE(params->crtc.i9xx_plane); + FBC_CTL_PLANE(fbc_state->plane->i9xx_plane); - if (params->fence_id >= 0) + if (fbc_state->fence_id >= 0) fbc_ctl2 |= FBC_CTL_CPU_FENCE_EN; return fbc_ctl2; @@ -220,7 +274,7 @@ static void i8xx_fbc_deactivate(struct intel_fbc *fbc) static void i8xx_fbc_activate(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; int i; @@ -232,7 +286,7 @@ static void i8xx_fbc_activate(struct intel_fbc *fbc) intel_de_write(i915, FBC_CONTROL2, i965_fbc_ctl2(fbc)); intel_de_write(i915, FBC_FENCE_OFF, - params->fence_y_offset); + fbc_state->fence_y_offset); } intel_de_write(i915, FBC_CONTROL, @@ -252,8 +306,8 @@ static bool i8xx_fbc_is_compressing(struct intel_fbc *fbc) static void i8xx_fbc_nuke(struct intel_fbc *fbc) { - struct intel_fbc_reg_params *params = &fbc->params; - enum i9xx_plane_id i9xx_plane = params->crtc.i9xx_plane; + struct intel_fbc_state *fbc_state = &fbc->state; + enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane; struct drm_i915_private *dev_priv = fbc->i915; spin_lock_irq(&dev_priv->uncore.lock); @@ -288,8 +342,8 @@ static const struct intel_fbc_funcs i8xx_fbc_funcs = { static void i965_fbc_nuke(struct intel_fbc *fbc) { - struct intel_fbc_reg_params *params = &fbc->params; - enum i9xx_plane_id i9xx_plane = params->crtc.i9xx_plane; + struct intel_fbc_state *fbc_state = &fbc->state; + enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane; struct drm_i915_private *dev_priv = fbc->i915; spin_lock_irq(&dev_priv->uncore.lock); @@ -324,21 +378,21 @@ static u32 g4x_dpfc_ctl_limit(struct intel_fbc *fbc) static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; u32 dpfc_ctl; dpfc_ctl = g4x_dpfc_ctl_limit(fbc) | - DPFC_CTL_PLANE_G4X(params->crtc.i9xx_plane); + DPFC_CTL_PLANE_G4X(fbc_state->plane->i9xx_plane); if (IS_G4X(i915)) dpfc_ctl |= DPFC_CTL_SR_EN; - if (params->fence_id >= 0) { + if (fbc_state->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN_G4X; if (DISPLAY_VER(i915) < 6) - dpfc_ctl |= DPFC_CTL_FENCENO(params->fence_id); + dpfc_ctl |= DPFC_CTL_FENCENO(fbc_state->fence_id); } return dpfc_ctl; @@ -346,11 +400,11 @@ static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) static void g4x_fbc_activate(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; intel_de_write(i915, DPFC_FENCE_YOFF, - params->fence_y_offset); + fbc_state->fence_y_offset); intel_de_write(i915, DPFC_CONTROL, DPFC_CTL_EN | g4x_dpfc_ctl(fbc)); @@ -397,11 +451,11 @@ static const struct intel_fbc_funcs g4x_fbc_funcs = { static void ilk_fbc_activate(struct intel_fbc *fbc) { - struct intel_fbc_reg_params *params = &fbc->params; + struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; intel_de_write(i915, ILK_DPFC_FENCE_YOFF, - params->fence_y_offset); + fbc_state->fence_y_offset); intel_de_write(i915, ILK_DPFC_CONTROL, DPFC_CTL_EN | g4x_dpfc_ctl(fbc)); @@ -448,15 +502,15 @@ static const struct intel_fbc_funcs ilk_fbc_funcs = { static void snb_fbc_program_fence(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; u32 ctl = 0; - if (params->fence_id >= 0) - ctl = SNB_DPFC_FENCE_EN | SNB_DPFC_FENCENO(params->fence_id); + if (fbc_state->fence_id >= 0) + ctl = SNB_DPFC_FENCE_EN | SNB_DPFC_FENCENO(fbc_state->fence_id); intel_de_write(i915, SNB_DPFC_CTL_SA, ctl); - intel_de_write(i915, SNB_DPFC_CPU_FENCE_OFFSET, params->fence_y_offset); + intel_de_write(i915, SNB_DPFC_CPU_FENCE_OFFSET, fbc_state->fence_y_offset); } static void snb_fbc_activate(struct intel_fbc *fbc) @@ -485,27 +539,27 @@ static const struct intel_fbc_funcs snb_fbc_funcs = { static void glk_fbc_program_cfb_stride(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; u32 val = 0; - if (params->override_cfb_stride) + if (fbc_state->override_cfb_stride) val |= FBC_STRIDE_OVERRIDE | - FBC_STRIDE(params->override_cfb_stride / fbc->limit); + FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit); intel_de_write(i915, GLK_FBC_STRIDE, val); } static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; u32 val = 0; /* Display WA #0529: skl, kbl, bxt. */ - if (params->override_cfb_stride) + if (fbc_state->override_cfb_stride) val |= CHICKEN_FBC_STRIDE_OVERRIDE | - CHICKEN_FBC_STRIDE(params->override_cfb_stride / fbc->limit); + CHICKEN_FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit); intel_de_rmw(i915, CHICKEN_MISC_4, CHICKEN_FBC_STRIDE_OVERRIDE | @@ -514,16 +568,16 @@ static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) { - const struct intel_fbc_reg_params *params = &fbc->params; + const struct intel_fbc_state *fbc_state = &fbc->state; struct drm_i915_private *i915 = fbc->i915; u32 dpfc_ctl; dpfc_ctl = g4x_dpfc_ctl_limit(fbc); if (IS_IVYBRIDGE(i915)) - dpfc_ctl |= DPFC_CTL_PLANE_IVB(params->crtc.i9xx_plane); + dpfc_ctl |= DPFC_CTL_PLANE_IVB(fbc_state->plane->i9xx_plane); - if (params->fence_id >= 0) + if (fbc_state->fence_id >= 0) dpfc_ctl |= DPFC_CTL_FENCE_EN_IVB; if (fbc->false_color) @@ -577,7 +631,7 @@ static bool intel_fbc_hw_is_active(struct intel_fbc *fbc) static void intel_fbc_hw_activate(struct intel_fbc *fbc) { - trace_intel_fbc_activate(fbc->crtc); + trace_intel_fbc_activate(fbc->state.plane); fbc->active = true; fbc->activated = true; @@ -587,59 +641,31 @@ static void intel_fbc_hw_activate(struct intel_fbc *fbc) static void intel_fbc_hw_deactivate(struct intel_fbc *fbc) { - trace_intel_fbc_deactivate(fbc->crtc); + trace_intel_fbc_deactivate(fbc->state.plane); fbc->active = false; fbc->funcs->deactivate(fbc); } -bool intel_fbc_is_compressing(struct intel_fbc *fbc) +static bool intel_fbc_is_compressing(struct intel_fbc *fbc) { return fbc->funcs->is_compressing(fbc); } static void intel_fbc_nuke(struct intel_fbc *fbc) { - trace_intel_fbc_nuke(fbc->crtc); + trace_intel_fbc_nuke(fbc->state.plane); fbc->funcs->nuke(fbc); } -int intel_fbc_set_false_color(struct intel_fbc *fbc, bool enable) -{ - if (!fbc->funcs || !fbc->funcs->set_false_color) - return -ENODEV; - - mutex_lock(&fbc->lock); - - fbc->false_color = enable; - - fbc->funcs->set_false_color(fbc, enable); - - mutex_unlock(&fbc->lock); - - return 0; -} - -/** - * intel_fbc_is_active - Is FBC active? - * @fbc: The FBC instance - * - * This function is used to verify the current state of FBC. - * - * FIXME: This should be tracked in the plane config eventually - * instead of queried at runtime for most callers. - */ -bool intel_fbc_is_active(struct intel_fbc *fbc) -{ - return fbc->active; -} - static void intel_fbc_activate(struct intel_fbc *fbc) { intel_fbc_hw_activate(fbc); intel_fbc_nuke(fbc); + + fbc->no_fbc_reason = NULL; } static void intel_fbc_deactivate(struct intel_fbc *fbc, const char *reason) @@ -679,9 +705,9 @@ static u64 intel_fbc_stolen_end(struct drm_i915_private *i915) return min(end, intel_fbc_cfb_base_max(i915)); } -static int intel_fbc_min_limit(int fb_cpp) +static int intel_fbc_min_limit(const struct intel_plane_state *plane_state) { - return fb_cpp == 2 ? 2 : 1; + return plane_state->hw.fb->format->cpp[0] == 2 ? 2 : 1; } static int intel_fbc_max_limit(struct drm_i915_private *i915) @@ -784,19 +810,25 @@ static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) void intel_fbc_cleanup(struct drm_i915_private *i915) { - struct intel_fbc *fbc = &i915->fbc; + struct intel_fbc *fbc = i915->fbc; - if (!HAS_FBC(i915)) + if (!fbc) return; mutex_lock(&fbc->lock); __intel_fbc_cleanup_cfb(fbc); mutex_unlock(&fbc->lock); + + kfree(fbc); } -static bool stride_is_valid(struct drm_i915_private *i915, - u64 modifier, unsigned int stride) +static bool stride_is_valid(const struct intel_plane_state *plane_state) { + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int stride = intel_fbc_plane_stride(plane_state) * + fb->format->cpp[0]; + /* This should have been caught earlier. */ if (drm_WARN_ON_ONCE(&i915->drm, (stride & (64 - 1)) != 0)) return false; @@ -813,7 +845,7 @@ static bool stride_is_valid(struct drm_i915_private *i915, /* Display WA #1105: skl,bxt,kbl,cfl,glk */ if ((DISPLAY_VER(i915) == 9 || IS_GEMINILAKE(i915)) && - modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) + fb->modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) return false; if (stride > 16384) @@ -822,10 +854,12 @@ static bool stride_is_valid(struct drm_i915_private *i915, return true; } -static bool pixel_format_is_valid(struct drm_i915_private *i915, - u32 pixel_format) +static bool pixel_format_is_valid(const struct intel_plane_state *plane_state) { - switch (pixel_format) { + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + + switch (fb->format->format) { case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR8888: return true; @@ -843,10 +877,13 @@ static bool pixel_format_is_valid(struct drm_i915_private *i915, } } -static bool rotation_is_valid(struct drm_i915_private *i915, - u32 pixel_format, unsigned int rotation) +static bool rotation_is_valid(const struct intel_plane_state *plane_state) { - if (DISPLAY_VER(i915) >= 9 && pixel_format == DRM_FORMAT_RGB565 && + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + unsigned int rotation = plane_state->hw.rotation; + + if (DISPLAY_VER(i915) >= 9 && fb->format->format == DRM_FORMAT_RGB565 && drm_rotation_90_or_270(rotation)) return false; else if (DISPLAY_VER(i915) <= 4 && !IS_G4X(i915) && @@ -862,10 +899,9 @@ static bool rotation_is_valid(struct drm_i915_private *i915, * the X and Y offset registers. That's why we include the src x/y offsets * instead of just looking at the plane size. */ -static bool intel_fbc_hw_tracking_covers_screen(struct intel_fbc *fbc, - struct intel_crtc *crtc) +static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = fbc->i915; + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); unsigned int effective_w, effective_h, max_w, max_h; if (DISPLAY_VER(i915) >= 10) { @@ -882,18 +918,20 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_fbc *fbc, max_h = 1536; } - intel_fbc_get_plane_source_size(&fbc->state_cache, &effective_w, - &effective_h); - effective_w += fbc->state_cache.plane.adjusted_x; - effective_h += fbc->state_cache.plane.adjusted_y; + effective_w = plane_state->view.color_plane[0].x + + (drm_rect_width(&plane_state->uapi.src) >> 16); + effective_h = plane_state->view.color_plane[0].y + + (drm_rect_height(&plane_state->uapi.src) >> 16); return effective_w <= max_w && effective_h <= max_h; } -static bool tiling_is_valid(struct drm_i915_private *i915, - u64 modifier) +static bool tiling_is_valid(const struct intel_plane_state *plane_state) { - switch (modifier) { + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + const struct drm_framebuffer *fb = plane_state->hw.fb; + + switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: @@ -905,208 +943,163 @@ static bool tiling_is_valid(struct drm_i915_private *i915, } } -static void intel_fbc_update_state_cache(struct intel_crtc *crtc, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static void intel_fbc_update_state(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - struct intel_fbc *fbc = &i915->fbc; - struct intel_fbc_state_cache *cache = &fbc->state_cache; - struct drm_framebuffer *fb = plane_state->hw.fb; - - cache->plane.visible = plane_state->uapi.visible; - if (!cache->plane.visible) - return; - - cache->crtc.mode_flags = crtc_state->hw.adjusted_mode.flags; - if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; - - cache->plane.rotation = plane_state->hw.rotation; - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - cache->plane.src_w = drm_rect_width(&plane_state->uapi.src) >> 16; - cache->plane.src_h = drm_rect_height(&plane_state->uapi.src) >> 16; - cache->plane.adjusted_x = plane_state->view.color_plane[0].x; - cache->plane.adjusted_y = plane_state->view.color_plane[0].y; + struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_plane_state *plane_state = + intel_atomic_get_new_plane_state(state, plane); + struct intel_fbc *fbc = plane->fbc; + struct intel_fbc_state *fbc_state = &fbc->state; - cache->plane.pixel_blend_mode = plane_state->hw.pixel_blend_mode; + WARN_ON(plane_state->no_fbc_reason); - cache->fb.format = fb->format; - cache->fb.modifier = fb->modifier; - cache->fb.stride = intel_fbc_plane_stride(plane_state); + fbc_state->plane = plane; /* FBC1 compression interval: arbitrary choice of 1 second */ - cache->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode); + fbc_state->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode); - cache->fence_y_offset = intel_plane_fence_y_offset(plane_state); + fbc_state->fence_y_offset = intel_plane_fence_y_offset(plane_state); drm_WARN_ON(&i915->drm, plane_state->flags & PLANE_HAS_FENCE && !plane_state->ggtt_vma->fence); if (plane_state->flags & PLANE_HAS_FENCE && plane_state->ggtt_vma->fence) - cache->fence_id = plane_state->ggtt_vma->fence->id; + fbc_state->fence_id = plane_state->ggtt_vma->fence->id; else - cache->fence_id = -1; + fbc_state->fence_id = -1; - cache->psr2_active = crtc_state->has_psr2; + fbc_state->cfb_stride = intel_fbc_cfb_stride(plane_state); + fbc_state->cfb_size = intel_fbc_cfb_size(plane_state); + fbc_state->override_cfb_stride = intel_fbc_override_cfb_stride(plane_state); } -static bool intel_fbc_cfb_size_changed(struct intel_fbc *fbc) +static bool intel_fbc_is_fence_ok(const struct intel_plane_state *plane_state) { - return intel_fbc_cfb_size(fbc, &fbc->state_cache) > - fbc->compressed_fb.size * fbc->limit; -} + struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); -static u16 intel_fbc_override_cfb_stride(struct intel_fbc *fbc, - const struct intel_fbc_state_cache *cache) -{ - unsigned int stride = _intel_fbc_cfb_stride(cache); - unsigned int stride_aligned = intel_fbc_cfb_stride(fbc, cache); - - /* - * Override stride in 64 byte units per 4 line segment. + /* The use of a CPU fence is one of two ways to detect writes by the + * CPU to the scanout and trigger updates to the FBC. * - * Gen9 hw miscalculates cfb stride for linear as - * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so - * we always need to use the override there. + * The other method is by software tracking (see + * intel_fbc_invalidate/flush()), it will manually notify FBC and nuke + * the current compressed buffer and recompress it. + * + * Note that is possible for a tiled surface to be unmappable (and + * so have no fence associated with it) due to aperture constraints + * at the time of pinning. + * + * FIXME with 90/270 degree rotation we should use the fence on + * the normal GTT view (the rotated view doesn't even have a + * fence). Would need changes to the FBC fence Y offset as well. + * For now this will effectively disable FBC with 90/270 degree + * rotation. */ - if (stride != stride_aligned || - (DISPLAY_VER(fbc->i915) == 9 && - cache->fb.modifier == DRM_FORMAT_MOD_LINEAR)) - return stride_aligned * 4 / 64; - - return 0; + return DISPLAY_VER(i915) >= 9 || + (plane_state->flags & PLANE_HAS_FENCE && + plane_state->ggtt_vma->fence); } -static bool intel_fbc_can_enable(struct intel_fbc *fbc) +static bool intel_fbc_is_cfb_ok(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = fbc->i915; - - if (intel_vgpu_active(i915)) { - fbc->no_fbc_reason = "VGPU is active"; - return false; - } - - if (!i915->params.enable_fbc) { - fbc->no_fbc_reason = "disabled per module param or by default"; - return false; - } + struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct intel_fbc *fbc = plane->fbc; - if (fbc->underrun_detected) { - fbc->no_fbc_reason = "underrun detected"; - return false; - } + return intel_fbc_min_limit(plane_state) <= fbc->limit && + intel_fbc_cfb_size(plane_state) <= fbc->compressed_fb.size * fbc->limit; +} - return true; +static bool intel_fbc_is_ok(const struct intel_plane_state *plane_state) +{ + return !plane_state->no_fbc_reason && + intel_fbc_is_fence_ok(plane_state) && + intel_fbc_is_cfb_ok(plane_state); } -static bool intel_fbc_can_activate(struct intel_crtc *crtc) +static int intel_fbc_check_plane(struct intel_atomic_state *state, + struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - struct intel_fbc *fbc = &i915->fbc; - struct intel_fbc_state_cache *cache = &fbc->state_cache; + struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_plane_state *plane_state = + intel_atomic_get_new_plane_state(state, plane); + const struct drm_framebuffer *fb = plane_state->hw.fb; + struct intel_crtc *crtc = to_intel_crtc(plane_state->uapi.crtc); + const struct intel_crtc_state *crtc_state; + struct intel_fbc *fbc = plane->fbc; - if (!intel_fbc_can_enable(fbc)) - return false; + if (!fbc) + return 0; - if (!cache->plane.visible) { - fbc->no_fbc_reason = "primary plane not visible"; - return false; + if (intel_vgpu_active(i915)) { + plane_state->no_fbc_reason = "VGPU active"; + return 0; } - /* We don't need to use a state cache here since this information is - * global for all CRTC. - */ - if (fbc->underrun_detected) { - fbc->no_fbc_reason = "underrun detected"; - return false; + if (!i915->params.enable_fbc) { + plane_state->no_fbc_reason = "disabled per module param or by default"; + return 0; } - if (cache->crtc.mode_flags & DRM_MODE_FLAG_INTERLACE) { - fbc->no_fbc_reason = "incompatible mode"; - return false; + if (!plane_state->uapi.visible) { + plane_state->no_fbc_reason = "plane not visible"; + return 0; } - if (!intel_fbc_hw_tracking_covers_screen(fbc, crtc)) { - fbc->no_fbc_reason = "mode too large for compression"; - return false; + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { + plane_state->no_fbc_reason = "interlaced mode not supported"; + return 0; } - /* The use of a CPU fence is one of two ways to detect writes by the - * CPU to the scanout and trigger updates to the FBC. - * - * The other method is by software tracking (see - * intel_fbc_invalidate/flush()), it will manually notify FBC and nuke - * the current compressed buffer and recompress it. - * - * Note that is possible for a tiled surface to be unmappable (and - * so have no fence associated with it) due to aperture constraints - * at the time of pinning. - * - * FIXME with 90/270 degree rotation we should use the fence on - * the normal GTT view (the rotated view doesn't even have a - * fence). Would need changes to the FBC fence Y offset as well. - * For now this will effectively disable FBC with 90/270 degree - * rotation. - */ - if (DISPLAY_VER(i915) < 9 && cache->fence_id < 0) { - fbc->no_fbc_reason = "framebuffer not tiled or fenced"; - return false; + if (crtc_state->double_wide) { + plane_state->no_fbc_reason = "double wide pipe not supported"; + return 0; } - if (!pixel_format_is_valid(i915, cache->fb.format->format)) { - fbc->no_fbc_reason = "pixel format is invalid"; + /* + * Display 12+ is not supporting FBC with PSR2. + * Recommendation is to keep this combination disabled + * Bspec: 50422 HSD: 14010260002 + */ + if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) { + plane_state->no_fbc_reason = "PSR2 enabled"; return false; } - if (!rotation_is_valid(i915, cache->fb.format->format, - cache->plane.rotation)) { - fbc->no_fbc_reason = "rotation unsupported"; - return false; + if (!pixel_format_is_valid(plane_state)) { + plane_state->no_fbc_reason = "pixel format not supported"; + return 0; } - if (!tiling_is_valid(i915, cache->fb.modifier)) { - fbc->no_fbc_reason = "tiling unsupported"; - return false; + if (!tiling_is_valid(plane_state)) { + plane_state->no_fbc_reason = "tiling not supported"; + return 0; } - if (!stride_is_valid(i915, cache->fb.modifier, - cache->fb.stride * cache->fb.format->cpp[0])) { - fbc->no_fbc_reason = "framebuffer stride not supported"; - return false; + if (!rotation_is_valid(plane_state)) { + plane_state->no_fbc_reason = "rotation not supported"; + return 0; } - if (cache->plane.pixel_blend_mode != DRM_MODE_BLEND_PIXEL_NONE && - cache->fb.format->has_alpha) { - fbc->no_fbc_reason = "per-pixel alpha blending is incompatible with FBC"; - return false; + if (!stride_is_valid(plane_state)) { + plane_state->no_fbc_reason = "stride not supported"; + return 0; } - /* WaFbcExceedCdClockThreshold:hsw,bdw */ - if ((IS_HASWELL(i915) || IS_BROADWELL(i915)) && - cache->crtc.hsw_bdw_pixel_rate >= i915->cdclk.hw.cdclk * 95 / 100) { - fbc->no_fbc_reason = "pixel rate is too big"; + if (plane_state->hw.pixel_blend_mode != DRM_MODE_BLEND_PIXEL_NONE && + fb->format->has_alpha) { + plane_state->no_fbc_reason = "per-pixel alpha not supported"; return false; } - /* It is possible for the required CFB size change without a - * crtc->disable + crtc->enable since it is possible to change the - * stride without triggering a full modeset. Since we try to - * over-allocate the CFB, there's a chance we may keep FBC enabled even - * if this happens, but if we exceed the current CFB size we'll have to - * disable FBC. Notice that it would be possible to disable FBC, wait - * for a frame, free the stolen node, then try to reenable FBC in case - * we didn't get any invalidate/deactivate calls, but this would require - * a lot of tracking just for a specific case. If we conclude it's an - * important case, we can implement it later. */ - if (intel_fbc_cfb_size_changed(fbc)) { - fbc->no_fbc_reason = "CFB requirements changed"; - return false; + if (!intel_fbc_hw_tracking_covers_screen(plane_state)) { + plane_state->no_fbc_reason = "plane size too big"; + return 0; } /* @@ -1115,146 +1108,139 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * and screen flicker. */ if (DISPLAY_VER(i915) >= 9 && - (fbc->state_cache.plane.adjusted_y & 3)) { - fbc->no_fbc_reason = "plane Y offset is misaligned"; + plane_state->view.color_plane[0].y & 3) { + plane_state->no_fbc_reason = "plane start Y offset misaligned"; return false; } /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ if (DISPLAY_VER(i915) >= 11 && - (cache->plane.src_h + cache->plane.adjusted_y) % 4) { - fbc->no_fbc_reason = "plane height + offset is non-modulo of 4"; + (plane_state->view.color_plane[0].y + drm_rect_height(&plane_state->uapi.src)) & 3) { + plane_state->no_fbc_reason = "plane end Y offset misaligned"; return false; } - /* - * Display 12+ is not supporting FBC with PSR2. - * Recommendation is to keep this combination disabled - * Bspec: 50422 HSD: 14010260002 - */ - if (fbc->state_cache.psr2_active && DISPLAY_VER(i915) >= 12) { - fbc->no_fbc_reason = "not supported with PSR2"; - return false; - } - - return true; -} - -static void intel_fbc_get_reg_params(struct intel_fbc *fbc, - struct intel_crtc *crtc) -{ - const struct intel_fbc_state_cache *cache = &fbc->state_cache; - struct intel_fbc_reg_params *params = &fbc->params; - - /* Since all our fields are integer types, use memset here so the - * comparison function can rely on memcmp because the padding will be - * zero. */ - memset(params, 0, sizeof(*params)); - - params->fence_id = cache->fence_id; - params->fence_y_offset = cache->fence_y_offset; - - params->interval = cache->interval; + /* WaFbcExceedCdClockThreshold:hsw,bdw */ + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { + const struct intel_cdclk_state *cdclk_state; - params->crtc.pipe = crtc->pipe; - params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; + cdclk_state = intel_atomic_get_cdclk_state(state); + if (IS_ERR(cdclk_state)) + return PTR_ERR(cdclk_state); - params->fb.format = cache->fb.format; - params->fb.modifier = cache->fb.modifier; - params->fb.stride = cache->fb.stride; + if (crtc_state->pixel_rate >= cdclk_state->logical.cdclk * 95 / 100) { + plane_state->no_fbc_reason = "pixel rate too high"; + return 0; + } + } - params->cfb_stride = intel_fbc_cfb_stride(fbc, cache); - params->cfb_size = intel_fbc_cfb_size(fbc, cache); - params->override_cfb_stride = intel_fbc_override_cfb_stride(fbc, cache); + plane_state->no_fbc_reason = NULL; - params->plane_visible = cache->plane.visible; + return 0; } -static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - struct intel_fbc *fbc = &i915->fbc; - const struct intel_fbc_state_cache *cache = &fbc->state_cache; - const struct intel_fbc_reg_params *params = &fbc->params; - if (drm_atomic_crtc_needs_modeset(&crtc_state->uapi)) - return false; +static bool intel_fbc_can_flip_nuke(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_plane *plane) +{ + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_plane_state *old_plane_state = + intel_atomic_get_old_plane_state(state, plane); + const struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(state, plane); + const struct drm_framebuffer *old_fb = old_plane_state->hw.fb; + const struct drm_framebuffer *new_fb = new_plane_state->hw.fb; - if (!params->plane_visible) + if (drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) return false; - if (!intel_fbc_can_activate(crtc)) + if (!intel_fbc_is_ok(old_plane_state) || + !intel_fbc_is_ok(new_plane_state)) return false; - if (params->fb.format != cache->fb.format) + if (old_fb->format->format != new_fb->format->format) return false; - if (params->fb.modifier != cache->fb.modifier) + if (old_fb->modifier != new_fb->modifier) return false; - if (params->fb.stride != cache->fb.stride) + if (intel_fbc_plane_stride(old_plane_state) != + intel_fbc_plane_stride(new_plane_state)) return false; - if (params->cfb_stride != intel_fbc_cfb_stride(fbc, cache)) + if (intel_fbc_cfb_stride(old_plane_state) != + intel_fbc_cfb_stride(new_plane_state)) return false; - if (params->cfb_size != intel_fbc_cfb_size(fbc, cache)) + if (intel_fbc_cfb_size(old_plane_state) != + intel_fbc_cfb_size(new_plane_state)) return false; - if (params->override_cfb_stride != intel_fbc_override_cfb_stride(fbc, cache)) + if (intel_fbc_override_cfb_stride(old_plane_state) != + intel_fbc_override_cfb_stride(new_plane_state)) return false; return true; } -bool intel_fbc_pre_update(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static bool __intel_fbc_pre_update(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_plane *plane) { - struct intel_plane *plane = to_intel_plane(crtc->base.primary); - const struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - const struct intel_plane_state *plane_state = - intel_atomic_get_new_plane_state(state, plane); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct drm_i915_private *i915 = to_i915(state->base.dev); struct intel_fbc *fbc = plane->fbc; - const char *reason = "update pending"; bool need_vblank_wait = false; - if (!fbc || !plane_state) + fbc->flip_pending = true; + + if (intel_fbc_can_flip_nuke(state, crtc, plane)) return need_vblank_wait; - mutex_lock(&fbc->lock); + intel_fbc_deactivate(fbc, "update pending"); + + /* + * Display WA #1198: glk+ + * Need an extra vblank wait between FBC disable and most plane + * updates. Bspec says this is only needed for plane disable, but + * that is not true. Touching most plane registers will cause the + * corruption to appear. Also SKL/derivatives do not seem to be + * affected. + * + * TODO: could optimize this a bit by sampling the frame + * counter when we disable FBC (if it was already done earlier) + * and skipping the extra vblank wait before the plane update + * if at least one frame has already passed. + */ + if (fbc->activated && DISPLAY_VER(i915) >= 10) + need_vblank_wait = true; + fbc->activated = false; - if (fbc->crtc != crtc) - goto unlock; + return need_vblank_wait; +} - intel_fbc_update_state_cache(crtc, crtc_state, plane_state); - fbc->flip_pending = true; +bool intel_fbc_pre_update(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_plane_state *plane_state; + bool need_vblank_wait = false; + struct intel_plane *plane; + int i; + + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + struct intel_fbc *fbc = plane->fbc; - if (!intel_fbc_can_flip_nuke(crtc_state)) { - intel_fbc_deactivate(fbc, reason); - - /* - * Display WA #1198: glk+ - * Need an extra vblank wait between FBC disable and most plane - * updates. Bspec says this is only needed for plane disable, but - * that is not true. Touching most plane registers will cause the - * corruption to appear. Also SKL/derivatives do not seem to be - * affected. - * - * TODO: could optimize this a bit by sampling the frame - * counter when we disable FBC (if it was already done earlier) - * and skipping the extra vblank wait before the plane update - * if at least one frame has already passed. - */ - if (fbc->activated && - DISPLAY_VER(i915) >= 10) - need_vblank_wait = true; - fbc->activated = false; + if (!fbc || plane->pipe != crtc->pipe) + continue; + + mutex_lock(&fbc->lock); + + if (fbc->state.plane == plane) + need_vblank_wait |= __intel_fbc_pre_update(state, crtc, plane); + + mutex_unlock(&fbc->lock); } -unlock: - mutex_unlock(&fbc->lock); return need_vblank_wait; } @@ -1262,44 +1248,25 @@ unlock: static void __intel_fbc_disable(struct intel_fbc *fbc) { struct drm_i915_private *i915 = fbc->i915; - struct intel_crtc *crtc = fbc->crtc; + struct intel_plane *plane = fbc->state.plane; drm_WARN_ON(&i915->drm, !mutex_is_locked(&fbc->lock)); - drm_WARN_ON(&i915->drm, !fbc->crtc); drm_WARN_ON(&i915->drm, fbc->active); - drm_dbg_kms(&i915->drm, "Disabling FBC on pipe %c\n", - pipe_name(crtc->pipe)); + drm_dbg_kms(&i915->drm, "Disabling FBC on [PLANE:%d:%s]\n", + plane->base.base.id, plane->base.name); __intel_fbc_cleanup_cfb(fbc); - fbc->crtc = NULL; + fbc->state.plane = NULL; } -static void __intel_fbc_post_update(struct intel_crtc *crtc) +static void __intel_fbc_post_update(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - struct intel_fbc *fbc = &i915->fbc; + struct drm_i915_private *i915 = fbc->i915; drm_WARN_ON(&i915->drm, !mutex_is_locked(&fbc->lock)); - if (fbc->crtc != crtc) - return; - - fbc->flip_pending = false; - - if (!i915->params.enable_fbc) { - intel_fbc_deactivate(fbc, "disabled at runtime per module param"); - __intel_fbc_disable(fbc); - - return; - } - - intel_fbc_get_reg_params(fbc, crtc); - - if (!intel_fbc_can_activate(crtc)) - return; - if (!fbc->busy_bits) intel_fbc_activate(fbc); else @@ -1309,23 +1276,31 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) void intel_fbc_post_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { - struct intel_plane *plane = to_intel_plane(crtc->base.primary); - const struct intel_plane_state *plane_state = - intel_atomic_get_new_plane_state(state, plane); - struct intel_fbc *fbc = plane->fbc; + const struct intel_plane_state *plane_state; + struct intel_plane *plane; + int i; - if (!fbc || !plane_state) - return; + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + struct intel_fbc *fbc = plane->fbc; - mutex_lock(&fbc->lock); - __intel_fbc_post_update(crtc); - mutex_unlock(&fbc->lock); + if (!fbc || plane->pipe != crtc->pipe) + continue; + + mutex_lock(&fbc->lock); + + if (fbc->state.plane == plane) { + fbc->flip_pending = false; + __intel_fbc_post_update(fbc); + } + + mutex_unlock(&fbc->lock); + } } static unsigned int intel_fbc_get_frontbuffer_bit(struct intel_fbc *fbc) { - if (fbc->crtc) - return to_intel_plane(fbc->crtc->base.primary)->frontbuffer_bit; + if (fbc->state.plane) + return fbc->state.plane->frontbuffer_bit; else return fbc->possible_framebuffer_bits; } @@ -1334,9 +1309,9 @@ void intel_fbc_invalidate(struct drm_i915_private *i915, unsigned int frontbuffer_bits, enum fb_op_origin origin) { - struct intel_fbc *fbc = &i915->fbc; + struct intel_fbc *fbc = i915->fbc; - if (!HAS_FBC(i915)) + if (!fbc) return; if (origin == ORIGIN_FLIP || origin == ORIGIN_CURSOR_UPDATE) @@ -1346,7 +1321,7 @@ void intel_fbc_invalidate(struct drm_i915_private *i915, fbc->busy_bits |= intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits; - if (fbc->crtc && fbc->busy_bits) + if (fbc->state.plane && fbc->busy_bits) intel_fbc_deactivate(fbc, "frontbuffer write"); mutex_unlock(&fbc->lock); @@ -1355,9 +1330,9 @@ void intel_fbc_invalidate(struct drm_i915_private *i915, void intel_fbc_flush(struct drm_i915_private *i915, unsigned int frontbuffer_bits, enum fb_op_origin origin) { - struct intel_fbc *fbc = &i915->fbc; + struct intel_fbc *fbc = i915->fbc; - if (!HAS_FBC(i915)) + if (!fbc) return; mutex_lock(&fbc->lock); @@ -1367,144 +1342,83 @@ void intel_fbc_flush(struct drm_i915_private *i915, if (origin == ORIGIN_FLIP || origin == ORIGIN_CURSOR_UPDATE) goto out; - if (!fbc->busy_bits && fbc->crtc && + if (!fbc->busy_bits && fbc->state.plane && (frontbuffer_bits & intel_fbc_get_frontbuffer_bit(fbc))) { if (fbc->active) intel_fbc_nuke(fbc); else if (!fbc->flip_pending) - __intel_fbc_post_update(fbc->crtc); + __intel_fbc_post_update(fbc); } out: mutex_unlock(&fbc->lock); } -/** - * intel_fbc_choose_crtc - select a CRTC to enable FBC on - * @i915: i915 device instance - * @state: the atomic state structure - * - * This function looks at the proposed state for CRTCs and planes, then chooses - * which pipe is going to have FBC by setting intel_crtc_state->enable_fbc to - * true. - * - * Later, intel_fbc_enable is going to look for state->enable_fbc and then maybe - * enable FBC for the chosen CRTC. If it does, it will set i915->fbc.crtc. - */ -void intel_fbc_choose_crtc(struct drm_i915_private *i915, - struct intel_atomic_state *state) +int intel_fbc_atomic_check(struct intel_atomic_state *state) { - struct intel_fbc *fbc = &i915->fbc; - struct intel_plane *plane; struct intel_plane_state *plane_state; - bool crtc_chosen = false; + struct intel_plane *plane; int i; - mutex_lock(&fbc->lock); - - /* Does this atomic commit involve the CRTC currently tied to FBC? */ - if (fbc->crtc && - !intel_atomic_get_new_crtc_state(state, fbc->crtc)) - goto out; - - if (!intel_fbc_can_enable(fbc)) - goto out; - - /* Simply choose the first CRTC that is compatible and has a visible - * plane. We could go for fancier schemes such as checking the plane - * size, but this would just affect the few platforms that don't tie FBC - * to pipe or plane A. */ for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - struct intel_crtc_state *crtc_state; - struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); - - if (plane->fbc != fbc) - continue; - - if (!plane_state->uapi.visible) - continue; + int ret; - crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - - crtc_state->enable_fbc = true; - crtc_chosen = true; - break; + ret = intel_fbc_check_plane(state, plane); + if (ret) + return ret; } - if (!crtc_chosen) - fbc->no_fbc_reason = "no suitable CRTC for FBC"; - -out: - mutex_unlock(&fbc->lock); + return 0; } -/** - * intel_fbc_enable: tries to enable FBC on the CRTC - * @crtc: the CRTC - * @state: corresponding &drm_crtc_state for @crtc - * - * This function checks if the given CRTC was chosen for FBC, then enables it if - * possible. Notice that it doesn't activate FBC. It is valid to call - * intel_fbc_enable multiple times for the same pipe without an - * intel_fbc_disable in the middle, as long as it is deactivated. - */ -static void intel_fbc_enable(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void __intel_fbc_enable(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); - struct intel_plane *plane = to_intel_plane(crtc->base.primary); - const struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); + struct drm_i915_private *i915 = to_i915(state->base.dev); const struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); struct intel_fbc *fbc = plane->fbc; - struct intel_fbc_state_cache *cache; - int min_limit; - - if (!fbc || !plane_state) - return; - cache = &fbc->state_cache; - - min_limit = intel_fbc_min_limit(plane_state->hw.fb ? - plane_state->hw.fb->format->cpp[0] : 0); - - mutex_lock(&fbc->lock); + if (fbc->state.plane) { + if (fbc->state.plane != plane) + return; - if (fbc->crtc) { - if (fbc->crtc != crtc) - goto out; - - if (fbc->limit >= min_limit && - !intel_fbc_cfb_size_changed(fbc)) - goto out; + if (intel_fbc_is_ok(plane_state)) + return; __intel_fbc_disable(fbc); } drm_WARN_ON(&i915->drm, fbc->active); - intel_fbc_update_state_cache(crtc, crtc_state, plane_state); + fbc->no_fbc_reason = plane_state->no_fbc_reason; + if (fbc->no_fbc_reason) + return; - /* FIXME crtc_state->enable_fbc lies :( */ - if (!cache->plane.visible) - goto out; + if (!intel_fbc_is_fence_ok(plane_state)) { + fbc->no_fbc_reason = "framebuffer not fenced"; + return; + } + + if (fbc->underrun_detected) { + fbc->no_fbc_reason = "FIFO underrun"; + return; + } - if (intel_fbc_alloc_cfb(fbc, intel_fbc_cfb_size(fbc, cache), min_limit)) { - cache->plane.visible = false; + if (intel_fbc_alloc_cfb(fbc, intel_fbc_cfb_size(plane_state), + intel_fbc_min_limit(plane_state))) { fbc->no_fbc_reason = "not enough stolen memory"; - goto out; + return; } - drm_dbg_kms(&i915->drm, "Enabling FBC on pipe %c\n", - pipe_name(crtc->pipe)); + drm_dbg_kms(&i915->drm, "Enabling FBC on [PLANE:%d:%s]\n", + plane->base.base.id, plane->base.name); fbc->no_fbc_reason = "FBC enabled but not active yet\n"; - fbc->crtc = crtc; + intel_fbc_update_state(state, crtc, plane); intel_fbc_program_cfb(fbc); -out: - mutex_unlock(&fbc->lock); } /** @@ -1515,38 +1429,48 @@ out: */ void intel_fbc_disable(struct intel_crtc *crtc) { - struct intel_plane *plane = to_intel_plane(crtc->base.primary); - struct intel_fbc *fbc = plane->fbc; + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_plane *plane; - if (!fbc) - return; + for_each_intel_plane(&i915->drm, plane) { + struct intel_fbc *fbc = plane->fbc; - mutex_lock(&fbc->lock); - if (fbc->crtc == crtc) - __intel_fbc_disable(fbc); - mutex_unlock(&fbc->lock); + if (!fbc || plane->pipe != crtc->pipe) + continue; + + mutex_lock(&fbc->lock); + if (fbc->state.plane == plane) + __intel_fbc_disable(fbc); + mutex_unlock(&fbc->lock); + } } -/** - * intel_fbc_update: enable/disable FBC on the CRTC - * @state: atomic state - * @crtc: the CRTC - * - * This function checks if the given CRTC was chosen for FBC, then enables it if - * possible. Notice that it doesn't activate FBC. It is valid to call - * intel_fbc_update multiple times for the same pipe without an - * intel_fbc_disable in the middle. - */ void intel_fbc_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { const struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_plane_state *plane_state; + struct intel_plane *plane; + int i; - if (crtc_state->update_pipe && !crtc_state->enable_fbc) - intel_fbc_disable(crtc); - else - intel_fbc_enable(state, crtc); + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + struct intel_fbc *fbc = plane->fbc; + + if (!fbc || plane->pipe != crtc->pipe) + continue; + + mutex_lock(&fbc->lock); + + if (crtc_state->update_pipe && plane_state->no_fbc_reason) { + if (fbc->state.plane == plane) + __intel_fbc_disable(fbc); + } else { + __intel_fbc_enable(state, crtc, plane); + } + + mutex_unlock(&fbc->lock); + } } /** @@ -1557,56 +1481,56 @@ void intel_fbc_update(struct intel_atomic_state *state, */ void intel_fbc_global_disable(struct drm_i915_private *i915) { - struct intel_fbc *fbc = &i915->fbc; + struct intel_fbc *fbc = i915->fbc; - if (!HAS_FBC(i915)) + if (!fbc) return; mutex_lock(&fbc->lock); - if (fbc->crtc) { - drm_WARN_ON(&i915->drm, fbc->crtc->active); + if (fbc->state.plane) __intel_fbc_disable(fbc); - } mutex_unlock(&fbc->lock); } static void intel_fbc_underrun_work_fn(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, struct drm_i915_private, fbc.underrun_work); - struct intel_fbc *fbc = &i915->fbc; + struct intel_fbc *fbc = container_of(work, typeof(*fbc), underrun_work); + struct drm_i915_private *i915 = fbc->i915; mutex_lock(&fbc->lock); /* Maybe we were scheduled twice. */ - if (fbc->underrun_detected || !fbc->crtc) + if (fbc->underrun_detected || !fbc->state.plane) goto out; drm_dbg_kms(&i915->drm, "Disabling FBC due to FIFO underrun.\n"); fbc->underrun_detected = true; intel_fbc_deactivate(fbc, "FIFO underrun"); + if (!fbc->flip_pending) + intel_crtc_wait_for_next_vblank(intel_crtc_for_pipe(i915, fbc->state.plane->pipe)); + __intel_fbc_disable(fbc); out: mutex_unlock(&fbc->lock); } /* * intel_fbc_reset_underrun - reset FBC fifo underrun status. - * @fbc: The FBC instance + * @i915: the i915 device * * See intel_fbc_handle_fifo_underrun_irq(). For automated testing we * want to re-enable FBC after an underrun to increase test coverage. */ -int intel_fbc_reset_underrun(struct intel_fbc *fbc) +void intel_fbc_reset_underrun(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = fbc->i915; - int ret; + struct intel_fbc *fbc = i915->fbc; + + if (!fbc) + return; cancel_work_sync(&fbc->underrun_work); - ret = mutex_lock_interruptible(&fbc->lock); - if (ret) - return ret; + mutex_lock(&fbc->lock); if (fbc->underrun_detected) { drm_dbg_kms(&i915->drm, @@ -1616,13 +1540,11 @@ int intel_fbc_reset_underrun(struct intel_fbc *fbc) fbc->underrun_detected = false; mutex_unlock(&fbc->lock); - - return 0; } /** * intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun - * @fbc: The FBC instance + * @i915: i915 device * * Without FBC, most underruns are harmless and don't really cause too many * problems, except for an annoying message on dmesg. With FBC, underruns can @@ -1634,9 +1556,11 @@ int intel_fbc_reset_underrun(struct intel_fbc *fbc) * * This function is called from the IRQ handler. */ -void intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) +void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915) { - if (!HAS_FBC(fbc->i915)) + struct intel_fbc *fbc = i915->fbc; + + if (!fbc) return; /* There's no guarantee that underrun_detected won't be set to true @@ -1677,7 +1601,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *i915) static bool need_fbc_vtd_wa(struct drm_i915_private *i915) { /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ - if (intel_vtd_active() && + if (intel_vtd_active(i915) && (IS_SKYLAKE(i915) || IS_BROXTON(i915))) { drm_info(&i915->drm, "Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); @@ -1687,6 +1611,43 @@ static bool need_fbc_vtd_wa(struct drm_i915_private *i915) return false; } +void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane) +{ + if (!fbc) + return; + + plane->fbc = fbc; + fbc->possible_framebuffer_bits |= plane->frontbuffer_bit; +} + +static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915) +{ + struct intel_fbc *fbc; + + fbc = kzalloc(sizeof(*fbc), GFP_KERNEL); + if (!fbc) + return NULL; + + fbc->i915 = i915; + INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); + mutex_init(&fbc->lock); + + if (DISPLAY_VER(i915) >= 7) + fbc->funcs = &ivb_fbc_funcs; + else if (DISPLAY_VER(i915) == 6) + fbc->funcs = &snb_fbc_funcs; + else if (DISPLAY_VER(i915) == 5) + fbc->funcs = &ilk_fbc_funcs; + else if (IS_G4X(i915)) + fbc->funcs = &g4x_fbc_funcs; + else if (DISPLAY_VER(i915) == 4) + fbc->funcs = &i965_fbc_funcs; + else + fbc->funcs = &i8xx_fbc_funcs; + + return fbc; +} + /** * intel_fbc_init - Initialize FBC * @i915: the i915 device @@ -1695,12 +1656,7 @@ static bool need_fbc_vtd_wa(struct drm_i915_private *i915) */ void intel_fbc_init(struct drm_i915_private *i915) { - struct intel_fbc *fbc = &i915->fbc; - - fbc->i915 = i915; - INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); - mutex_init(&fbc->lock); - fbc->active = false; + struct intel_fbc *fbc; if (!drm_mm_initialized(&i915->mm.stolen)) mkwrite_device_info(i915)->display.has_fbc = false; @@ -1712,27 +1668,114 @@ void intel_fbc_init(struct drm_i915_private *i915) drm_dbg_kms(&i915->drm, "Sanitized enable_fbc value: %d\n", i915->params.enable_fbc); - if (!HAS_FBC(i915)) { - fbc->no_fbc_reason = "unsupported by this chipset"; + if (!HAS_FBC(i915)) return; - } - if (DISPLAY_VER(i915) >= 7) - fbc->funcs = &ivb_fbc_funcs; - else if (DISPLAY_VER(i915) == 6) - fbc->funcs = &snb_fbc_funcs; - else if (DISPLAY_VER(i915) == 5) - fbc->funcs = &ilk_fbc_funcs; - else if (IS_G4X(i915)) - fbc->funcs = &g4x_fbc_funcs; - else if (DISPLAY_VER(i915) == 4) - fbc->funcs = &i965_fbc_funcs; - else - fbc->funcs = &i8xx_fbc_funcs; + fbc = intel_fbc_create(i915); + if (!fbc) + return; /* We still don't have any sort of hardware state readout for FBC, so * deactivate it in case the BIOS activated it to make sure software * matches the hardware state. */ if (intel_fbc_hw_is_active(fbc)) intel_fbc_hw_deactivate(fbc); + + i915->fbc = fbc; +} + +static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) +{ + struct intel_fbc *fbc = m->private; + struct drm_i915_private *i915 = fbc->i915; + struct intel_plane *plane; + intel_wakeref_t wakeref; + + drm_modeset_lock_all(&i915->drm); + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + mutex_lock(&fbc->lock); + + if (fbc->active) { + seq_puts(m, "FBC enabled\n"); + seq_printf(m, "Compressing: %s\n", + yesno(intel_fbc_is_compressing(fbc))); + } else { + seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); + } + + for_each_intel_plane(&i915->drm, plane) { + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (plane->fbc != fbc) + continue; + + seq_printf(m, "%c [PLANE:%d:%s]: %s\n", + fbc->state.plane == plane ? '*' : ' ', + plane->base.base.id, plane->base.name, + plane_state->no_fbc_reason ?: "FBC possible"); + } + + mutex_unlock(&fbc->lock); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + + drm_modeset_unlock_all(&i915->drm); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(intel_fbc_debugfs_status); + +static int intel_fbc_debugfs_false_color_get(void *data, u64 *val) +{ + struct intel_fbc *fbc = data; + + *val = fbc->false_color; + + return 0; +} + +static int intel_fbc_debugfs_false_color_set(void *data, u64 val) +{ + struct intel_fbc *fbc = data; + + mutex_lock(&fbc->lock); + + fbc->false_color = val; + + if (fbc->active) + fbc->funcs->set_false_color(fbc, fbc->false_color); + + mutex_unlock(&fbc->lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(intel_fbc_debugfs_false_color_fops, + intel_fbc_debugfs_false_color_get, + intel_fbc_debugfs_false_color_set, + "%llu\n"); + +static void intel_fbc_debugfs_add(struct intel_fbc *fbc) +{ + struct drm_i915_private *i915 = fbc->i915; + struct drm_minor *minor = i915->drm.primary; + + debugfs_create_file("i915_fbc_status", 0444, + minor->debugfs_root, fbc, + &intel_fbc_debugfs_status_fops); + + if (fbc->funcs->set_false_color) + debugfs_create_file("i915_fbc_false_color", 0644, + minor->debugfs_root, fbc, + &intel_fbc_debugfs_false_color_fops); +} + +void intel_fbc_debugfs_register(struct drm_i915_private *i915) +{ + struct intel_fbc *fbc = i915->fbc; + + if (fbc) + intel_fbc_debugfs_add(fbc); } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h index ce48a22c5e9e..07ad0411fcc3 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.h +++ b/drivers/gpu/drm/i915/display/intel_fbc.h @@ -8,19 +8,16 @@ #include <linux/types.h> -#include "intel_frontbuffer.h" - +enum fb_op_origin; struct drm_i915_private; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_fbc; +struct intel_plane; struct intel_plane_state; -void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, - struct intel_atomic_state *state); -bool intel_fbc_is_active(struct intel_fbc *fbc); -bool intel_fbc_is_compressing(struct intel_fbc *fbc); +int intel_fbc_atomic_check(struct intel_atomic_state *state); bool intel_fbc_pre_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_fbc_post_update(struct intel_atomic_state *state, @@ -36,8 +33,9 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv, enum fb_op_origin origin); void intel_fbc_flush(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits, enum fb_op_origin origin); -void intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc); -int intel_fbc_reset_underrun(struct intel_fbc *fbc); -int intel_fbc_set_false_color(struct intel_fbc *fbc, bool enable); +void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane); +void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915); +void intel_fbc_reset_underrun(struct drm_i915_private *i915); +void intel_fbc_debugfs_register(struct drm_i915_private *i915); #endif /* __INTEL_FBC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index 2b5f80f3b4e0..3d6e22923601 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -4,6 +4,7 @@ */ #include "intel_atomic.h" +#include "intel_crtc.h" #include "intel_ddi.h" #include "intel_de.h" #include "intel_display_types.h" @@ -157,7 +158,7 @@ static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, if (pipe_config->fdi_lanes <= 2) return 0; - other_crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_C); + other_crtc = intel_crtc_for_pipe(dev_priv, PIPE_C); other_crtc_state = intel_atomic_get_crtc_state(state, other_crtc); if (IS_ERR(other_crtc_state)) @@ -178,7 +179,7 @@ static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, return -EINVAL; } - other_crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_B); + other_crtc = intel_crtc_for_pipe(dev_priv, PIPE_B); other_crtc_state = intel_atomic_get_crtc_state(state, other_crtc); if (IS_ERR(other_crtc_state)) diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index 28d9eeb7b4f3..d636d21fa9ce 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -26,8 +26,8 @@ */ #include "i915_drv.h" -#include "i915_trace.h" #include "intel_de.h" +#include "intel_display_trace.h" #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_fifo_underrun.h" @@ -61,7 +61,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc = intel_crtc_for_pipe(dev_priv, pipe); if (crtc->cpu_fifo_underrun_disabled) return false; @@ -79,7 +79,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc = intel_crtc_for_pipe(dev_priv, pipe); if (crtc->pch_fifo_underrun_disabled) return false; @@ -279,7 +279,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); bool old; lockdep_assert_held(&dev_priv->irq_lock); @@ -348,7 +348,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, bool enable) { struct intel_crtc *crtc = - intel_get_crtc_for_pipe(dev_priv, pch_transcoder); + intel_crtc_for_pipe(dev_priv, pch_transcoder); unsigned long flags; bool old; @@ -391,7 +391,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); u32 underruns = 0; /* We may be called too early in init, thanks BIOS! */ @@ -434,7 +434,7 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, drm_err(&dev_priv->drm, "CPU pipe %c FIFO underrun\n", pipe_name(pipe)); } - intel_fbc_handle_fifo_underrun_irq(&dev_priv->fbc); + intel_fbc_handle_fifo_underrun_irq(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 0492446cd04a..791248f812aa 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -55,14 +55,13 @@ * cancelled as soon as busyness is detected. */ -#include "display/intel_dp.h" - #include "i915_drv.h" -#include "i915_trace.h" +#include "intel_display_trace.h" #include "intel_display_types.h" +#include "intel_dp.h" +#include "intel_drrs.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" -#include "intel_drrs.h" #include "intel_psr.h" /** diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h index a88441edc8f9..ff0c37b079aa 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h @@ -28,7 +28,7 @@ #include <linux/kref.h> #include "gem/i915_gem_object_types.h" -#include "i915_active.h" +#include "i915_active_types.h" struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 7e3f5c6ca484..1a376e9a1ff3 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -1382,7 +1382,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) if (!HAS_OVERLAY(dev_priv)) return; - engine = dev_priv->gt.engine[RCS0]; + engine = to_gt(dev_priv)->engine[RCS0]; if (!engine || !engine->kernel_context) return; diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index dcd698a02da2..01ce1d72297f 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -3,11 +3,12 @@ * Copyright © 2021 Intel Corporation */ -#include "intel_display_types.h" -#include "intel_plane_initial.h" +#include "i915_drv.h" #include "intel_atomic_plane.h" #include "intel_display.h" +#include "intel_display_types.h" #include "intel_fb.h" +#include "intel_plane_initial.h" static bool intel_reuse_initial_plane_obj(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 3d9c0e13c329..f6526d9ccfdc 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -6,18 +6,19 @@ #ifndef __INTEL_PSR_H__ #define __INTEL_PSR_H__ -#include "intel_frontbuffer.h" +#include <linux/types.h> +enum fb_op_origin; struct drm_connector; struct drm_connector_state; struct drm_i915_private; +struct intel_atomic_state; +struct intel_crtc; struct intel_crtc_state; struct intel_dp; -struct intel_crtc; -struct intel_atomic_state; -struct intel_plane_state; -struct intel_plane; struct intel_encoder; +struct intel_plane; +struct intel_plane_state; void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_pre_plane_update(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index 8a52b7a16774..c8488f5ebd04 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -5,6 +5,7 @@ #include <linux/dmi.h> +#include "i915_drv.h" #include "intel_display_types.h" #include "intel_quirks.h" diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 2dc6c3742ba2..76e1188b01d4 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1842,7 +1842,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, intel_sdvo_write_sdvox(intel_sdvo, temp); for (i = 0; i < 2; i++) - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); success = intel_sdvo_get_trained_inputs(intel_sdvo, &input1, &input2); /* diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index c2251218a39e..09f405e4d363 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -186,6 +186,7 @@ static const struct intel_mpllb_state dg2_dp_uhbr10_100 = { REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | REG_FIELD_PREP(SNPS_PHY_MPLLB_WORD_DIV2_EN, 1) | REG_FIELD_PREP(SNPS_PHY_MPLLB_DP2_MODE, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SHIM_DIV32_CLK_SEL, 1) | REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), .mpllb_div2 = REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | @@ -369,6 +370,7 @@ static const struct intel_mpllb_state dg2_dp_uhbr10_38_4 = { REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | REG_FIELD_PREP(SNPS_PHY_MPLLB_WORD_DIV2_EN, 1) | REG_FIELD_PREP(SNPS_PHY_MPLLB_DP2_MODE, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SHIM_DIV32_CLK_SEL, 1) | REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), .mpllb_div2 = REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 1b99a9501a45..2357a1301f48 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -40,15 +40,15 @@ #include <drm/drm_rect.h> #include "i915_drv.h" -#include "i915_trace.h" #include "i915_vgpu.h" +#include "i9xx_plane.h" #include "intel_atomic_plane.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_frontbuffer.h" #include "intel_sprite.h" -#include "i9xx_plane.h" #include "intel_vrr.h" int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) @@ -431,10 +431,6 @@ vlv_sprite_update_noarm(struct intel_plane *plane, u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); unsigned long irqflags; - /* Sizes are 0 based */ - crtc_w--; - crtc_h--; - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); intel_de_write_fw(dev_priv, SPSTRIDE(pipe, plane_id), @@ -442,7 +438,7 @@ vlv_sprite_update_noarm(struct intel_plane *plane, intel_de_write_fw(dev_priv, SPPOS(pipe, plane_id), (crtc_y << 16) | crtc_x); intel_de_write_fw(dev_priv, SPSIZE(pipe, plane_id), - (crtc_h << 16) | crtc_w); + ((crtc_h - 1) << 16) | (crtc_w - 1)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -866,21 +862,15 @@ ivb_sprite_update_noarm(struct intel_plane *plane, u32 sprscale = 0; unsigned long irqflags; - /* Sizes are 0 based */ - src_w--; - src_h--; - crtc_w--; - crtc_h--; - if (crtc_w != src_w || crtc_h != src_h) - sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; + sprscale = SPRITE_SCALE_ENABLE | ((src_w - 1) << 16) | (src_h - 1); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); intel_de_write_fw(dev_priv, SPRSTRIDE(pipe), plane_state->view.color_plane[0].mapping_stride); intel_de_write_fw(dev_priv, SPRPOS(pipe), (crtc_y << 16) | crtc_x); - intel_de_write_fw(dev_priv, SPRSIZE(pipe), (crtc_h << 16) | crtc_w); + intel_de_write_fw(dev_priv, SPRSIZE(pipe), ((crtc_h - 1) << 16) | (crtc_w - 1)); if (IS_IVYBRIDGE(dev_priv)) intel_de_write_fw(dev_priv, SPRSCALE(pipe), sprscale); @@ -1208,21 +1198,15 @@ g4x_sprite_update_noarm(struct intel_plane *plane, u32 dvsscale = 0; unsigned long irqflags; - /* Sizes are 0 based */ - src_w--; - src_h--; - crtc_w--; - crtc_h--; - if (crtc_w != src_w || crtc_h != src_h) - dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; + dvsscale = DVS_SCALE_ENABLE | ((src_w - 1) << 16) | (src_h - 1); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); intel_de_write_fw(dev_priv, DVSSTRIDE(pipe), plane_state->view.color_plane[0].mapping_stride); intel_de_write_fw(dev_priv, DVSPOS(pipe), (crtc_y << 16) | crtc_x); - intel_de_write_fw(dev_priv, DVSSIZE(pipe), (crtc_h << 16) | crtc_w); + intel_de_write_fw(dev_priv, DVSSIZE(pipe), ((crtc_h - 1) << 16) | (crtc_w - 1)); intel_de_write_fw(dev_priv, DVSSCALE(pipe), dvsscale); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); @@ -1584,8 +1568,8 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data, */ if (!ret && has_dst_key_in_primary_plane(dev_priv)) { struct intel_crtc *crtc = - intel_get_crtc_for_pipe(dev_priv, - to_intel_plane(plane)->pipe); + intel_crtc_for_pipe(dev_priv, + to_intel_plane(plane)->pipe); plane_state = drm_atomic_get_plane_state(state, crtc->base.primary); diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 88a398df9621..8a39989b87ad 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -36,6 +36,7 @@ #include "i915_drv.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_hotplug.h" @@ -924,8 +925,7 @@ intel_enable_tv(struct intel_atomic_state *state, struct drm_i915_private *dev_priv = to_i915(dev); /* Prevents vblank waits from timing out in intel_tv_detect_type() */ - intel_wait_for_vblank(dev_priv, - to_intel_crtc(pipe_config->uapi.crtc)->pipe); + intel_crtc_wait_for_next_vblank(to_intel_crtc(pipe_config->uapi.crtc)); intel_de_write(dev_priv, TV_CTL, intel_de_read(dev_priv, TV_CTL) | TV_ENC_ENABLE); @@ -1618,7 +1618,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, intel_de_write(dev_priv, TV_DAC, tv_dac); intel_de_posting_read(dev_priv, TV_DAC); - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); type = -1; tv_dac = intel_de_read(dev_priv, TV_DAC); @@ -1651,7 +1651,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, intel_de_posting_read(dev_priv, TV_CTL); /* For unknown reasons the hw barfs if we don't do this vblank wait. */ - intel_wait_for_vblank(dev_priv, crtc->pipe); + intel_crtc_wait_for_next_vblank(crtc); /* Restore interrupt config */ if (connector->polled & DRM_CONNECTOR_POLL_HPD) { diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index a2108a8f544d..f043d85ba64d 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -330,7 +330,12 @@ enum vbt_gmbus_ddi { ADLS_DDC_BUS_PORT_TC1 = 0x2, ADLS_DDC_BUS_PORT_TC2, ADLS_DDC_BUS_PORT_TC3, - ADLS_DDC_BUS_PORT_TC4 + ADLS_DDC_BUS_PORT_TC4, + ADLP_DDC_BUS_PORT_TC1 = 0x3, + ADLP_DDC_BUS_PORT_TC2, + ADLP_DDC_BUS_PORT_TC3, + ADLP_DDC_BUS_PORT_TC4 + }; #define DP_AUX_A 0x40 diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index bf8d3c7ca2d9..9b05f93ed8bc 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -6,12 +6,14 @@ * Manasi Navare <manasi.d.navare@intel.com> */ #include <linux/limits.h> + #include "i915_drv.h" +#include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" -#include "intel_vdsc.h" #include "intel_qp_tables.h" +#include "intel_vdsc.h" enum ROW_INDEX_BPP { ROW_INDEX_6BPP = 0, @@ -1110,25 +1112,16 @@ static i915_reg_t dss_ctl2_reg(struct intel_crtc *crtc, enum transcoder cpu_tran ICL_PIPE_DSS_CTL2(crtc->pipe) : DSS_CTL2; } -static struct intel_crtc * -_get_crtc_for_pipe(struct drm_i915_private *i915, enum pipe pipe) -{ - if (!intel_pipe_valid(i915, pipe)) - return NULL; - - return intel_get_crtc_for_pipe(i915, pipe); -} - struct intel_crtc * intel_dsc_get_bigjoiner_secondary(const struct intel_crtc *primary_crtc) { - return _get_crtc_for_pipe(to_i915(primary_crtc->base.dev), primary_crtc->pipe + 1); + return intel_crtc_for_pipe(to_i915(primary_crtc->base.dev), primary_crtc->pipe + 1); } static struct intel_crtc * intel_dsc_get_bigjoiner_primary(const struct intel_crtc *secondary_crtc) { - return _get_crtc_for_pipe(to_i915(secondary_crtc->base.dev), secondary_crtc->pipe - 1); + return intel_crtc_for_pipe(to_i915(secondary_crtc->base.dev), secondary_crtc->pipe - 1); } void intel_uncompressed_joiner_enable(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 28890876bdeb..93a385396512 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -13,6 +13,7 @@ #include "intel_de.h" #include "intel_display_types.h" #include "intel_fb.h" +#include "intel_fbc.h" #include "intel_pm.h" #include "intel_psr.h" #include "intel_sprite.h" @@ -420,9 +421,19 @@ static int icl_plane_min_width(const struct drm_framebuffer *fb, } } -static int icl_plane_max_width(const struct drm_framebuffer *fb, - int color_plane, - unsigned int rotation) +static int icl_hdr_plane_max_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) + return 4096; + else + return 5120; +} + +static int icl_sdr_plane_max_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) { return 5120; } @@ -672,13 +683,13 @@ static u32 skl_plane_ctl_format(u32 pixel_format) case DRM_FORMAT_XYUV8888: return PLANE_CTL_FORMAT_XYUV; case DRM_FORMAT_YUYV: - return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_ORDER_YUYV; case DRM_FORMAT_YVYU: - return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_ORDER_YVYU; case DRM_FORMAT_UYVY: - return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_ORDER_UYVY; case DRM_FORMAT_VYUY: - return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_ORDER_VYUY; case DRM_FORMAT_NV12: return PLANE_CTL_FORMAT_NV12; case DRM_FORMAT_P010: @@ -1022,10 +1033,6 @@ skl_program_plane_noarm(struct intel_plane *plane, u32 src_h = drm_rect_height(&plane_state->uapi.src) >> 16; unsigned long irqflags; - /* Sizes are 0 based */ - src_w--; - src_h--; - /* The scaler will handle the output position */ if (plane_state->scaler_id >= 0) { crtc_x = 0; @@ -1045,7 +1052,14 @@ skl_program_plane_noarm(struct intel_plane *plane, intel_de_write_fw(dev_priv, PLANE_POS(pipe, plane_id), (crtc_y << 16) | crtc_x); intel_de_write_fw(dev_priv, PLANE_SIZE(pipe, plane_id), - (src_h << 16) | src_w); + ((src_h - 1) << 16) | (src_w - 1)); + + if (intel_fb_is_rc_ccs_cc_modifier(fb->modifier)) { + intel_de_write_fw(dev_priv, PLANE_CC_VAL(pipe, plane_id, 0), + lower_32_bits(plane_state->ccval)); + intel_de_write_fw(dev_priv, PLANE_CC_VAL(pipe, plane_id, 1), + upper_32_bits(plane_state->ccval)); + } if (icl_is_hdr_plane(dev_priv, plane_id)) intel_de_write_fw(dev_priv, PLANE_CUS_CTL(pipe, plane_id), @@ -1054,10 +1068,6 @@ skl_program_plane_noarm(struct intel_plane *plane, if (fb->format->is_yuv && icl_is_hdr_plane(dev_priv, plane_id)) icl_program_input_csc(plane, crtc_state, plane_state); - if (intel_fb_is_rc_ccs_cc_modifier(fb->modifier)) - intel_uncore_write64_fw(&dev_priv->uncore, - PLANE_CC_VAL(pipe, plane_id), plane_state->ccval); - skl_write_plane_wm(plane, crtc_state); intel_psr2_program_plane_sel_fetch(plane, crtc_state, plane_state, color_plane); @@ -1727,7 +1737,7 @@ static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - return intel_pxp_key_check(&i915->gt.pxp, obj, false) == 0; + return intel_pxp_key_check(&to_gt(i915)->pxp, obj, false) == 0; } static bool pxp_is_borked(struct drm_i915_gem_object *obj) @@ -1815,6 +1825,15 @@ static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, return pipe == PIPE_A && plane_id == PLANE_PRIMARY; } +static struct intel_fbc *skl_plane_fbc(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (skl_plane_has_fbc(dev_priv, pipe, plane_id)) + return dev_priv->fbc; + else + return NULL; +} + static bool skl_plane_has_planar(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { @@ -2101,14 +2120,14 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->id = plane_id; plane->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, plane_id); - if (skl_plane_has_fbc(dev_priv, pipe, plane_id)) - plane->fbc = &dev_priv->fbc; - if (plane->fbc) - plane->fbc->possible_framebuffer_bits |= plane->frontbuffer_bit; + intel_fbc_add_plane(skl_plane_fbc(dev_priv, pipe, plane_id), plane); if (DISPLAY_VER(dev_priv) >= 11) { plane->min_width = icl_plane_min_width; - plane->max_width = icl_plane_max_width; + if (icl_is_hdr_plane(dev_priv, plane_id)) + plane->max_width = icl_hdr_plane_max_width; + else + plane->max_width = icl_sdr_plane_max_width; plane->max_height = icl_plane_max_height; plane->min_cdclk = icl_plane_min_cdclk; } else if (DISPLAY_VER(dev_priv) >= 10) { diff --git a/drivers/gpu/drm/i915/dma_resv_utils.c b/drivers/gpu/drm/i915/dma_resv_utils.c deleted file mode 100644 index 7df91b7e4ca8..000000000000 --- a/drivers/gpu/drm/i915/dma_resv_utils.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2020 Intel Corporation - */ - -#include <linux/dma-resv.h> - -#include "dma_resv_utils.h" - -void dma_resv_prune(struct dma_resv *resv) -{ - if (dma_resv_trylock(resv)) { - if (dma_resv_test_signaled(resv, true)) - dma_resv_add_excl_fence(resv, NULL); - dma_resv_unlock(resv); - } -} diff --git a/drivers/gpu/drm/i915/dma_resv_utils.h b/drivers/gpu/drm/i915/dma_resv_utils.h deleted file mode 100644 index b9d8fb5f8367..000000000000 --- a/drivers/gpu/drm/i915/dma_resv_utils.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2020 Intel Corporation - */ - -#ifndef DMA_RESV_UTILS_H -#define DMA_RESV_UTILS_H - -struct dma_resv; - -void dma_resv_prune(struct dma_resv *resv); - -#endif /* DMA_RESV_UTILS_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f0435c6feb68..8a248003dfae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct clflush *clflush; assert_object_held(obj); + if (IS_DGFX(i915)) { + WARN_ON_ONCE(obj->cache_dirty); + return false; + } + /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. @@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), + i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); + /* + * We must have successfully populated the pages(since we are + * holding a pin on the pages as per the flush worker) to reach + * this point, which must mean we have already done the required + * flush-on-acquire, hence resetting cache_dirty here should be + * safe. + */ + obj->cache_dirty = false; } else if (obj->mm.pages) { __do_clflush(obj); + obj->cache_dirty = false; } else { GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } - obj->cache_dirty = false; return true; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ebd775cb1661..cad3f0b2be9e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -237,7 +237,7 @@ static int proto_context_set_persistence(struct drm_i915_private *i915, * colateral damage, and we should not pretend we can by * exposing the interface. */ - if (!intel_has_reset_engine(&i915->gt)) + if (!intel_has_reset_engine(to_gt(i915))) return -ENODEV; pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); @@ -254,7 +254,7 @@ static int proto_context_set_protected(struct drm_i915_private *i915, if (!protected) { pc->uses_protected_content = false; - } else if (!intel_pxp_is_enabled(&i915->gt.pxp)) { + } else if (!intel_pxp_is_enabled(&to_gt(i915)->pxp)) { ret = -ENODEV; } else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) || !(pc->user_flags & BIT(UCONTEXT_BANNABLE))) { @@ -268,8 +268,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915, */ pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (!intel_pxp_is_active(&i915->gt.pxp)) - ret = intel_pxp_start(&i915->gt.pxp); + if (!intel_pxp_is_active(&to_gt(i915)->pxp)) + ret = intel_pxp_start(&to_gt(i915)->pxp); } return ret; @@ -571,7 +571,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, intel_engine_mask_t prev_mask; /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) + if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc))) return -ENODEV; if (get_user(slot, &ext->engine_index)) @@ -833,7 +833,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, sseu = &pc->legacy_rcs_sseu; } - ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu); + ret = i915_gem_user_to_context_sseu(to_gt(i915), &user_sseu, sseu); if (ret) return ret; @@ -1001,7 +1001,7 @@ static void free_engines_rcu(struct rcu_head *rcu) free_engines(engines); } -static int __i915_sw_fence_call +static int engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_gem_engines *engines = @@ -1044,7 +1044,7 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, struct intel_sseu rcs_sseu) { - const struct intel_gt *gt = &ctx->i915->gt; + const struct intel_gt *gt = to_gt(ctx->i915); struct intel_engine_cs *engine; struct i915_gem_engines *e, *err; enum intel_engine_id id; @@ -1521,7 +1521,7 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) * colateral damage, and we should not pretend we can by * exposing the interface. */ - if (!intel_has_reset_engine(&ctx->i915->gt)) + if (!intel_has_reset_engine(to_gt(ctx->i915))) return -ENODEV; i915_gem_context_clear_persistence(ctx); @@ -1559,7 +1559,7 @@ i915_gem_create_context(struct drm_i915_private *i915, } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(&i915->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(i915), 0); if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -1742,7 +1742,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - ppgtt = i915_ppgtt_create(&i915->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(i915), 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -2194,7 +2194,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) return -EINVAL; - ret = intel_gt_terminally_wedged(&i915->gt); + ret = intel_gt_terminally_wedged(to_gt(i915)); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 8955d6abcef1..9402d4bf4ffc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -379,7 +379,7 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data if (ext.flags) return -EINVAL; - if (!intel_pxp_is_enabled(&ext_data->i915->gt.pxp)) + if (!intel_pxp_is_enabled(&to_gt(ext_data->i915)->pxp)) return -ENODEV; ext_data->flags |= I915_BO_PROTECTED; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index e8a58c997170..1b526039a60d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -248,8 +248,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - /* XXX: consider doing a vmap flush or something */ - if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj)) + /* + * DG1 is special here since it still snoops transactions even with + * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We + * might need to revisit this as we add new discrete platforms. + * + * XXX: Consider doing a vmap flush or something, where possible. + * Currently we just do a heavy handed wbinvd_on_all_cpus() here since + * the underlying sg_table might not even point to struct pages, so we + * can't just call drm_clflush_sg or similar, like we do elsewhere in + * the driver. + */ + if (i915_gem_object_can_bypass_llc(obj) || + (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); sg_page_sizes = i915_sg_dma_sizes(pages->sgl); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index b684a62bf3b0..26532c07d467 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -18,10 +18,32 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (IS_DGFX(i915)) + return false; + return !(obj->cache_level == I915_CACHE_NONE || obj->cache_level == I915_CACHE_WT); } +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (obj->cache_dirty) + return false; + + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + + if (IS_DGFX(i915)) + return false; + + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); +} + static void flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4d7da07442f2..e7f548a22970 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -29,6 +29,7 @@ #include "i915_gem_ioctls.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_vma_snapshot.h" struct eb_vma { struct i915_vma *vma; @@ -307,11 +308,15 @@ struct i915_execbuffer { struct eb_fence *fences; unsigned long num_fences; +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1]; +#endif }; static int eb_parse(struct i915_execbuffer *eb); static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); +static void eb_capture_release(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { @@ -990,7 +995,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } if (!(ev->flags & EXEC_OBJECT_WRITE)) { - err = dma_resv_reserve_shared(vma->resv, 1); + err = dma_resv_reserve_shared(vma->obj->base.resv, 1); if (err) return err; } @@ -1043,6 +1048,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final) i915_vma_put(vma); } + eb_capture_release(eb); eb_unpin_engine(eb); } @@ -1092,6 +1098,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } +static void reloc_cache_unmap(struct reloc_cache *cache) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) + kunmap_atomic(vaddr); + else + io_mapping_unmap_atomic((void __iomem *)vaddr); +} + +static void reloc_cache_remap(struct reloc_cache *cache, + struct drm_i915_gem_object *obj) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + if (cache->vaddr & KMAP) { + struct page *page = i915_gem_object_get_page(obj, cache->page); + + vaddr = kmap_atomic(page); + cache->vaddr = unmask_flags(cache->vaddr) | + (unsigned long)vaddr; + } else { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + + offset = cache->node.start; + if (!drm_mm_node_allocated(&cache->node)) + offset += cache->page << PAGE_SHIFT; + + cache->vaddr = (unsigned long) + io_mapping_map_atomic_wc(&ggtt->iomap, offset); + } +} + static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; @@ -1356,10 +1403,17 @@ eb_relocate_entry(struct i915_execbuffer *eb, * batchbuffers. */ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && - GRAPHICS_VER(eb->i915) == 6) { + GRAPHICS_VER(eb->i915) == 6 && + !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) { + struct i915_vma *vma = target->vma; + + reloc_cache_unmap(&eb->reloc_cache); + mutex_lock(&vma->vm->mutex); err = i915_vma_bind(target->vma, target->vma->obj->cache_level, PIN_GLOBAL, NULL); + mutex_unlock(&vma->vm->mutex); + reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); if (err) return err; } @@ -1880,36 +1934,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb) return NULL; } -static int eb_move_to_gpu(struct i915_execbuffer *eb) +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ +static void eb_capture_stage(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; - unsigned int i = count; - int err = 0, j; + unsigned int i = count, j; + struct i915_vma_snapshot *vsnap; while (i--) { struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; unsigned int flags = ev->flags; - struct drm_i915_gem_object *obj = vma->obj; - assert_vma_held(vma); + if (!(flags & EXEC_OBJECT_CAPTURE)) + continue; - if (flags & EXEC_OBJECT_CAPTURE) { + vsnap = i915_vma_snapshot_alloc(GFP_KERNEL); + if (!vsnap) + continue; + + i915_vma_snapshot_init(vsnap, vma, "user"); + for_each_batch_create_order(eb, j) { struct i915_capture_list *capture; - for_each_batch_create_order(eb, j) { - if (!eb->requests[j]) - break; + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (!capture) + continue; - capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (capture) { - capture->next = - eb->requests[j]->capture_list; - capture->vma = vma; - eb->requests[j]->capture_list = capture; - } - } + capture->next = eb->capture_lists[j]; + capture->vma_snapshot = i915_vma_snapshot_get(vsnap); + eb->capture_lists[j] = capture; + } + i915_vma_snapshot_put(vsnap); + } +} + +/* Commit once we're in the critical path */ +static void eb_capture_commit(struct i915_execbuffer *eb) +{ + unsigned int j; + + for_each_batch_create_order(eb, j) { + struct i915_request *rq = eb->requests[j]; + + if (!rq) + break; + + rq->capture_list = eb->capture_lists[j]; + eb->capture_lists[j] = NULL; + } +} + +/* + * Release anything that didn't get committed due to errors. + * The capture_list will otherwise be freed at request retire. + */ +static void eb_capture_release(struct i915_execbuffer *eb) +{ + unsigned int j; + + for_each_batch_create_order(eb, j) { + if (eb->capture_lists[j]) { + i915_request_free_capture_list(eb->capture_lists[j]); + eb->capture_lists[j] = NULL; } + } +} + +static void eb_capture_list_clear(struct i915_execbuffer *eb) +{ + memset(eb->capture_lists, 0, sizeof(eb->capture_lists)); +} + +#else + +static void eb_capture_stage(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_commit(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_release(struct i915_execbuffer *eb) +{ +} + +static void eb_capture_list_clear(struct i915_execbuffer *eb) +{ +} + +#endif + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i = count; + int err = 0, j; + + while (i--) { + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + unsigned int flags = ev->flags; + struct drm_i915_gem_object *obj = vma->obj; + + assert_vma_held(vma); /* * If the GPU is not _reading_ through the CPU cache, we need @@ -1990,6 +2121,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) /* Unconditionally flush any chipset caches (for streaming writes). */ intel_gt_chipset_flush(eb->gt); + eb_capture_commit(eb); + return 0; err_skip: @@ -2164,7 +2297,7 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = dma_resv_reserve_shared(shadow->resv, 1); + err = dma_resv_reserve_shared(shadow->obj->base.resv, 1); if (err) goto err_trampoline; @@ -2276,9 +2409,9 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } -static int num_vcs_engines(const struct drm_i915_private *i915) +static int num_vcs_engines(struct drm_i915_private *i915) { - return hweight_long(VDBOX_MASK(&i915->gt)); + return hweight_long(VDBOX_MASK(to_gt(i915))); } /* @@ -3114,7 +3247,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, /* Allocate a request for this batch buffer nice and early. */ eb->requests[i] = i915_request_create(eb_find_context(eb, i)); if (IS_ERR(eb->requests[i])) { - out_fence = ERR_PTR(PTR_ERR(eb->requests[i])); + out_fence = ERR_CAST(eb->requests[i]); eb->requests[i] = NULL; return out_fence; } @@ -3132,13 +3265,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, } /* - * Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when - * this request is retired will the batch_obj be moved onto - * the inactive_list and lose its active reference. Hence we do - * not need to explicitly hold another reference here. + * Not really on stack, but we don't want to call + * kfree on the batch_snapshot when we put it, so use the + * _onstack interface. */ - eb->requests[i]->batch = eb->batches[i]->vma; + if (eb->batches[i]->vma) + i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot, + eb->batches[i]->vma, + "batch"); if (eb->batch_pool) { GEM_BUG_ON(intel_context_is_parallel(eb->context)); intel_gt_buffer_pool_mark_active(eb->batch_pool, @@ -3187,6 +3321,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.fences = NULL; eb.num_fences = 0; + eb_capture_list_clear(&eb); + memset(eb.requests, 0, sizeof(struct i915_request *) * ARRAY_SIZE(eb.requests)); eb.composite_fence = NULL; @@ -3273,10 +3409,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, } ww_acquire_done(&eb.ww.ctx); + eb_capture_stage(&eb); out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); if (IS_ERR(out_fence)) { err = PTR_ERR(out_fence); + out_fence = NULL; if (eb.requests[0]) goto err_request; else diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index a57a6b7013c2..c5150a1ee3d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { .put_pages = i915_gem_object_put_pages_internal, }; -/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) +__i915_gem_object_create_internal(struct drm_i915_private *i915, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; @@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + i915_gem_object_init(obj, ops, &lock_class, 0); obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* @@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return obj; } + +/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) +{ + return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 65fc6ff5f59d..aaf970c37aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -17,6 +17,7 @@ #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_gem_mman.h" +#include "i915_mm.h" #include "i915_trace.h" #include "i915_user_extensions.h" #include "i915_gem_ttm.h" @@ -72,7 +73,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (args->flags & ~(I915_MMAP_WC)) return -EINVAL; - if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) + if (args->flags & I915_MMAP_WC && !pat_enabled()) return -ENODEV; obj = i915_gem_object_lookup(file, args->handle); @@ -645,7 +646,7 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT, NULL); if (err) goto err; @@ -736,7 +737,7 @@ i915_gem_dumb_mmap_offset(struct drm_file *file, if (HAS_LMEM(to_i915(dev))) mmap_type = I915_MMAP_TYPE_FIXED; - else if (boot_cpu_has(X86_FEATURE_PAT)) + else if (pat_enabled()) mmap_type = I915_MMAP_TYPE_WC; else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) return -ENODEV; @@ -792,7 +793,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, break; case I915_MMAP_OFFSET_WC: - if (!boot_cpu_has(X86_FEATURE_PAT)) + if (!pat_enabled()) return -ENODEV; type = I915_MMAP_TYPE_WC; break; @@ -802,7 +803,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, break; case I915_MMAP_OFFSET_UC: - if (!boot_cpu_has(X86_FEATURE_PAT)) + if (!pat_enabled()) return -ENODEV; type = I915_MMAP_TYPE_UC; break; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1e426a42a36c..d87b508b59b1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -31,6 +31,7 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" +#include "i915_gem_ttm.h" #include "i915_memcpy.h" #include "i915_trace.h" @@ -91,7 +92,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, } /** - * i915_gem_object_fini - Clean up a GEM object initialization + * __i915_gem_object_fini - Clean up a GEM object initialization * @obj: The gem object to cleanup * * This function cleans up gem object fields that are set up by @@ -107,25 +108,29 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj) } /** - * Mark up the object's coherency levels for a given cache_level + * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels + * for a given cache_level * @obj: #drm_i915_gem_object * @cache_level: cache level */ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + obj->cache_level = cache_level; if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | I915_BO_CACHE_COHERENT_FOR_WRITE); - else if (HAS_LLC(to_i915(obj->base.dev))) + else if (HAS_LLC(i915)) obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; else obj->cache_coherent = 0; obj->cache_dirty = - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) && + !IS_DGFX(i915); } bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) @@ -257,6 +262,8 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) */ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) { + assert_object_held(obj); + if (!list_empty(&obj->vma.list)) { struct i915_vma *vma; @@ -323,7 +330,16 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, obj->ops->delayed_free(obj); continue; } + + if (!i915_gem_object_trylock(obj, NULL)) { + /* busy, toss it back to the pile */ + if (llist_add(&obj->freed, &i915->mm.free_list)) + queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10)); + continue; + } + __i915_gem_object_pages_fini(obj); + i915_gem_object_unlock(obj); __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ @@ -343,7 +359,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915) static void __i915_gem_free_work(struct work_struct *work) { struct drm_i915_private *i915 = - container_of(work, struct drm_i915_private, mm.free_work); + container_of(work, struct drm_i915_private, mm.free_work.work); i915_gem_flush_free_objects(i915); } @@ -364,15 +380,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) atomic_inc(&i915->mm.free_count); /* - * This serializes freeing with the shrinker. Since the free - * is delayed, first by RCU then by the workqueue, we want the - * shrinker to be able to free pages of unreferenced objects, - * or else we may oom whilst there are plenty of deferred - * freed objects. - */ - i915_gem_object_make_unshrinkable(obj); - - /* * Since we require blocking on struct_mutex to unbind the freed * object from the GPU before releasing resources back to the * system, we can not do that directly from the RCU callback (which may @@ -384,7 +391,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) */ if (llist_add(&obj->freed, &i915->mm.free_list)) - queue_work(i915->wq, &i915->mm.free_work); + queue_delayed_work(i915->wq, &i915->mm.free_work, 0); } void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, @@ -456,7 +463,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset * from can't cross a page boundary. The caller must ensure that @obj pages * are pinned and that @obj is synced wrt. any related writes. * - * Returns 0 on success or -ENODEV if the type of @obj's backing store is + * Return: %0 on success or -ENODEV if the type of @obj's backing store is * unsupported. */ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) @@ -709,7 +716,7 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, void i915_gem_init__objects(struct drm_i915_private *i915) { - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work); } void i915_objects_module_exit(void) @@ -732,6 +739,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { .export = i915_gem_prime_export, }; +/** + * i915_gem_object_get_moving_fence - Get the object's moving fence if any + * @obj: The object whose moving fence to get. + * + * A non-signaled moving fence means that there is an async operation + * pending on the object that needs to be waited on before setting up + * any GPU- or CPU PTEs to the object's pages. + * + * Return: A refcounted pointer to the object's moving fence if any, + * NULL otherwise. + */ +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +{ + return dma_fence_get(i915_gem_to_ttm(obj)->moving); +} + +/** + * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any + * @obj: The object whose moving fence to wait for. + * @intr: Whether to wait interruptible. + * + * If the moving fence signaled without an error, it is detached from the + * object and put. + * + * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted, + * negative error code if the async operation represented by the + * moving fence failed. + */ +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr) +{ + struct dma_fence *fence = i915_gem_to_ttm(obj)->moving; + int ret; + + assert_object_held(obj); + if (!fence) + return 0; + + ret = dma_fence_wait(fence, intr); + if (ret) + return ret; + + if (fence->error) + return fence->error; + + i915_gem_to_ttm(obj)->moving = NULL; + dma_fence_put(fence); + return 0; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 59201801cec5..f66d46882ea7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915); struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj); -void i915_gem_object_truncate(struct drm_i915_gem_object *obj); /** * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle @@ -211,9 +210,13 @@ static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object return __i915_gem_object_lock(obj, ww, true); } -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_trylock(obj->base.resv); + if (!ww) + return dma_resv_trylock(obj->base.resv); + else + return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) @@ -296,6 +299,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST); +} + +static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); @@ -449,7 +458,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) } int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); -void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +int i915_gem_object_truncate(struct drm_i915_gem_object *obj); void i915_gem_object_writeback(struct drm_i915_gem_object *obj); /** @@ -512,11 +521,18 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); + +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr); + void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); +bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); @@ -533,25 +549,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); -static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - if (obj->cache_dirty) - return false; - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - - /* Currently in use by HW (display engine)? Keep flushed. */ - return i915_gem_object_is_framebuffer(obj); -} - static inline void __start_cpu_write(struct drm_i915_gem_object *obj) { obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; - if (cpu_write_needs_clflush(obj)) + if (i915_gem_cpu_write_needs_clflush(obj)) obj->cache_dirty = true; } @@ -613,6 +619,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, enum intel_memory_type type); +int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment); +void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup); +void __shmem_writeback(size_t size, struct address_space *mapping); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index da85169006d4..f9f7e44099fe 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -34,9 +34,11 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) -#define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_NO_MMAP BIT(3) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +/* Skip the shrinker management in set_pages/unset_pages */ +#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2) +#define I915_GEM_OBJECT_IS_PROXY BIT(3) +#define I915_GEM_OBJECT_NO_MMAP BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -54,8 +56,11 @@ struct drm_i915_gem_object_ops { int (*get_pages)(struct drm_i915_gem_object *obj); void (*put_pages)(struct drm_i915_gem_object *obj, struct sg_table *pages); - void (*truncate)(struct drm_i915_gem_object *obj); + int (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*shrinker_release_pages)(struct drm_i915_gem_object *obj, + bool no_gpu_wait, + bool should_writeback); int (*pread)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pread *arg); @@ -486,9 +491,37 @@ struct drm_i915_gem_object { * instead go through the pin/unpin interfaces. */ atomic_t pages_pin_count; + + /** + * @shrink_pin: Prevents the pages from being made visible to + * the shrinker, while the shrink_pin is non-zero. Most users + * should pretty much never have to care about this, outside of + * some special use cases. + * + * By default most objects will start out as visible to the + * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the + * backing pages are attached to the object, like in + * __i915_gem_object_set_pages(). They will then be removed the + * shrinker list once the pages are released. + * + * The @shrink_pin is incremented by calling + * i915_gem_object_make_unshrinkable(), which will also remove + * the object from the shrinker list, if the pin count was zero. + * + * Callers will then typically call + * i915_gem_object_make_shrinkable() or + * i915_gem_object_make_purgeable() to decrement the pin count, + * and make the pages visible again. + */ atomic_t shrink_pin; /** + * @ttm_shrinkable: True when the object is using shmem pages + * underneath. Protected by the object lock. + */ + bool ttm_shrinkable; + + /** * Priority list of potential placements for this object. */ struct intel_memory_region **placements; @@ -512,6 +545,7 @@ struct drm_i915_gem_object { */ struct list_head region_link; + struct i915_refct_sgt *rsgt; struct sg_table *pages; void *mapping; @@ -547,7 +581,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_dma_page; /** - * Element within i915->mm.unbound_list or i915->mm.bound_list, + * Element within i915->mm.shrink_list or i915->mm.purge_list, * locked by i915->mm.obj_lock. */ struct list_head link; @@ -565,7 +599,7 @@ struct drm_i915_gem_object { } mm; struct { - struct sg_table *cached_io_st; + struct i915_refct_sgt *cached_io_rsgt; struct i915_gem_object_page_iter get_io_page; struct drm_i915_gem_object *backup; bool created:1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8eb1c3a6fc9c..89b70f5cde7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -26,6 +26,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { + WARN_ON_ONCE(IS_DGFX(i915)); obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(pages); @@ -68,7 +69,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, shrinkable = false; } - if (shrinkable) { + if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) { struct list_head *list; unsigned long flags; @@ -158,11 +159,13 @@ retry: } /* Immediately discard the backing storage */ -void i915_gem_object_truncate(struct drm_i915_gem_object *obj) +int i915_gem_object_truncate(struct drm_i915_gem_object *obj) { drm_gem_free_mmap_offset(&obj->base); if (obj->ops->truncate) - obj->ops->truncate(obj); + return obj->ops->truncate(obj); + + return 0; } /* Try to discard unwanted pages */ @@ -208,7 +211,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) if (i915_gem_object_is_volatile(obj)) obj->mm.madv = I915_MADV_WILLNEED; - i915_gem_object_make_unshrinkable(obj); + if (!i915_gem_object_has_self_managed_shrink_list(obj)) + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { unmap_object(obj, page_mask_bits(obj->mm.mapping)); @@ -414,8 +418,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } if (!ptr) { - if (GEM_WARN_ON(type == I915_MAP_WC && - !static_cpu_has(X86_FEATURE_PAT))) + err = i915_gem_object_wait_moving_fence(obj, true); + if (err) { + ptr = ERR_PTR(err); + goto err_unpin; + } + + if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled())) ptr = ERR_PTR(-ENODEV); else if (i915_gem_object_has_struct_page(obj)) ptr = i915_gem_object_map_page(obj, type); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 7986612f48fa..ca6faffcc496 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -19,6 +19,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct scatterlist *sg; struct sg_table *st; dma_addr_t dma; @@ -73,7 +74,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) dst += PAGE_SIZE; } - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + intel_gt_chipset_flush(to_gt(i915)); /* We're no longer struct page backed */ obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; @@ -140,6 +141,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, { void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); + struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; err = i915_gem_object_wait(obj, @@ -159,7 +161,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, return -EFAULT; drm_clflush_virt_range(vaddr, args->size); - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + intel_gt_chipset_flush(to_gt(i915)); i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 726b40e1fbb0..ac56124760e1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -35,7 +35,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend_prepare(&i915->gt); + intel_gt_suspend_prepare(to_gt(i915)); i915_gem_drain_freed_objects(i915); } @@ -153,7 +153,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - intel_gt_suspend_late(&i915->gt); + intel_gt_suspend_late(to_gt(i915)); spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { @@ -223,7 +223,7 @@ void i915_gem_resume(struct drm_i915_private *i915) * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(&i915->gt); + intel_gt_resume(to_gt(i915)); ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU); GEM_WARN_ON(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index a016ccec36f3..a4350227e9ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -11,7 +11,7 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, struct intel_memory_region *mem) { - obj->mm.region = intel_memory_region_get(mem); + obj->mm.region = mem; mutex_lock(&mem->objects.lock); list_add(&obj->mm.region_link, &mem->objects.list); @@ -25,8 +25,6 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) mutex_lock(&mem->objects.lock); list_del(&obj->mm.region_link); mutex_unlock(&mem->objects.lock); - - intel_memory_region_put(mem); } struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index d77da59fae04..cc9fe258fba7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec) cond_resched(); } -static int shmem_get_pages(struct drm_i915_gem_object *obj) +void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mem = obj->mm.region; - const unsigned long page_count = obj->base.size / PAGE_SIZE; + struct sgt_iter sgt_iter; + struct pagevec pvec; + struct page *page; + + mapping_clear_unevictable(mapping); + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (dirty) + set_page_dirty(page); + + if (backup) + mark_page_accessed(page); + + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); + + sg_free_table(st); +} + +int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment) +{ + const unsigned long page_count = size / PAGE_SIZE; unsigned long i; - struct address_space *mapping; - struct sg_table *st; struct scatterlist *sg; - struct sgt_iter sgt_iter; struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ - unsigned int max_segment = i915_sg_segment_size(); - unsigned int sg_page_sizes; gfp_t noreclaim; int ret; /* - * Assert that the object is not currently in any GPU domain. As it - * wasn't in the GTT, there shouldn't be any way it could have been in - * a GPU cache - */ - GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); - GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); - - /* * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (obj->base.size > resource_size(&mem->region)) + if (size > resource_size(&mr->region)) return -ENOMEM; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + if (sg_alloc_table(st, page_count, GFP_KERNEL)) return -ENOMEM; -rebuild_st: - if (sg_alloc_table(st, page_count, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - /* * Get the list of pages out of our struct file. They'll be pinned * at this point until we release them. * * Fail silently without starting the shrinker */ - mapping = obj->base.filp->f_mapping; mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; st->nents = 0; - sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, @@ -135,10 +140,9 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) { - sg_page_sizes |= sg->length; + if (i) sg = sg_next(sg); - } + st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -149,14 +153,67 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL); } - if (sg) { /* loop terminated early; short sg table */ - sg_page_sizes |= sg->length; + if (sg) /* loop terminated early; short sg table */ sg_mark_end(sg); - } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); + return 0; +err_sg: + sg_mark_end(sg); + if (sg != st->sgl) { + shmem_sg_free_table(st, mapping, false, false); + } else { + mapping_clear_unevictable(mapping); + sg_free_table(st); + } + + /* + * shmemfs first checks if there is enough memory to allocate the page + * and reports ENOSPC should there be insufficient, along with the usual + * ENOMEM for a genuine allocation failure. + * + * We use ENOSPC in our driver to mean that we have run out of aperture + * space and so want to translate the error from shmemfs back to our + * usual understanding of ENOMEM. + */ + if (ret == -ENOSPC) + ret = -ENOMEM; + + return ret; +} + +static int shmem_get_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; + struct address_space *mapping = obj->base.filp->f_mapping; + const unsigned long page_count = obj->base.size / PAGE_SIZE; + unsigned int max_segment = i915_sg_segment_size(); + struct sg_table *st; + struct sgt_iter sgt_iter; + struct page *page; + int ret; + + /* + * Assert that the object is not currently in any GPU domain. As it + * wasn't in the GTT, there shouldn't be any way it could have been in + * a GPU cache + */ + GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + +rebuild_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping, + max_segment); + if (ret) + goto err_st; + ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) { /* @@ -168,6 +225,7 @@ rebuild_st: for_each_sgt_page(page, sgt_iter, st) put_page(page); sg_free_table(st); + kfree(st); max_segment = PAGE_SIZE; goto rebuild_st; @@ -185,28 +243,12 @@ rebuild_st: if (i915_gem_object_can_bypass_llc(obj)) obj->cache_dirty = true; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); return 0; -err_sg: - sg_mark_end(sg); err_pages: - mapping_clear_unevictable(mapping); - if (sg != st->sgl) { - struct pagevec pvec; - - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, st) { - if (!pagevec_add(&pvec, page)) - check_release_pagevec(&pvec); - } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); - } - sg_free_table(st); - kfree(st); - + shmem_sg_free_table(st, mapping, false, false); /* * shmemfs first checks if there is enough memory to allocate the page * and reports ENOSPC should there be insufficient, along with the usual @@ -216,13 +258,16 @@ err_pages: * space and so want to translate the error from shmemfs back to our * usual understanding of ENOMEM. */ +err_st: if (ret == -ENOSPC) ret = -ENOMEM; + kfree(st); + return ret; } -static void +static int shmem_truncate(struct drm_i915_gem_object *obj) { /* @@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj) shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); obj->mm.madv = __I915_MADV_PURGED; obj->mm.pages = ERR_PTR(-EFAULT); + + return 0; } -static void -shmem_writeback(struct drm_i915_gem_object *obj) +void __shmem_writeback(size_t size, struct address_space *mapping) { - struct address_space *mapping; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = SWAP_CLUSTER_MAX, @@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj) * instead of invoking writeback so they are aged and paged out * as normal. */ - mapping = obj->base.filp->f_mapping; /* Begin writeback on each dirty page */ - for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + for (i = 0; i < size >> PAGE_SHIFT; i++) { struct page *page; page = find_lock_page(mapping, i); @@ -281,6 +325,12 @@ put: } } +static void +shmem_writeback(struct drm_i915_gem_object *obj) +{ + __shmem_writeback(obj->base.size, obj->base.filp->f_mapping); +} + void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) { - struct sgt_iter sgt_iter; - struct pagevec pvec; - struct page *page; - - GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj, pages); - mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); - - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) - set_page_dirty(page); - - if (obj->mm.madv == I915_MADV_WILLNEED) - mark_page_accessed(page); - - if (!pagevec_add(&pvec, page)) - check_release_pagevec(&pvec); - } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); - obj->mm.dirty = false; - - sg_free_table(pages); + shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping, + obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED); kfree(pages); + obj->mm.dirty = false; } static void @@ -634,9 +664,10 @@ static int init_shmem(struct intel_memory_region *mem) return 0; /* Don't error, we can simply fallback to the kernel mnt */ } -static void release_shmem(struct intel_memory_region *mem) +static int release_shmem(struct intel_memory_region *mem) { i915_gemfs_fini(mem->i915); + return 0; } static const struct intel_memory_region_ops shmem_region_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 5ab136ffdeb2..cc927e49d21f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -15,7 +15,6 @@ #include "gt/intel_gt_requests.h" -#include "dma_resv_utils.h" #include "i915_trace.h" static bool swap_available(void) @@ -37,8 +36,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, - unsigned long shrink, bool trylock_vm) +static int drop_pages(struct drm_i915_gem_object *obj, + unsigned long shrink, bool trylock_vm) { unsigned long flags; @@ -56,19 +55,25 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, return false; } -static void try_to_writeback(struct drm_i915_gem_object *obj, - unsigned int flags) +static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags) { + if (obj->ops->shrinker_release_pages) + return obj->ops->shrinker_release_pages(obj, + !(flags & I915_SHRINK_ACTIVE), + flags & I915_SHRINK_WRITEBACK); + switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); - return; + return 0; case __I915_MADV_PURGED: - return; + return 0; } if (flags & I915_SHRINK_WRITEBACK) i915_gem_object_writeback(obj); + + return 0; } /** @@ -148,7 +153,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, */ if (shrink & I915_SHRINK_ACTIVE) /* Retire requests to unpin all idle contexts */ - intel_gt_retire_requests(&i915->gt); + intel_gt_retire_requests(to_gt(i915)); /* * As we may completely rewrite the (un)bound list whilst unbinding @@ -209,27 +214,23 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - err = 0; - if (unsafe_drop_pages(obj, shrink, trylock_vm)) { - /* May arrive from get_pages on another bo */ - if (!ww) { - if (!i915_gem_object_trylock(obj)) - goto skip; - } else { - err = i915_gem_object_lock(obj, ww); - if (err) - goto skip; - } - - if (!__i915_gem_object_put_pages(obj)) { - try_to_writeback(obj, shrink); - count += obj->base.size >> PAGE_SHIFT; - } - if (!ww) - i915_gem_object_unlock(obj); + /* May arrive from get_pages on another bo */ + if (!ww) { + if (!i915_gem_object_trylock(obj, NULL)) + goto skip; + } else { + err = i915_gem_object_lock(obj, ww); + if (err) + goto skip; } - dma_resv_prune(obj->base.resv); + if (drop_pages(obj, shrink, trylock_vm) && + !__i915_gem_object_put_pages(obj) && + !try_to_writeback(obj, shrink)) + count += obj->base.size >> PAGE_SHIFT; + + if (!ww) + i915_gem_object_unlock(obj); scanned += obj->base.size >> PAGE_SHIFT; skip: @@ -404,12 +405,18 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr list_for_each_entry_safe(vma, next, &i915->ggtt.vm.bound_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj = vma->obj; if (!vma->iomap || i915_vma_is_active(vma)) continue; + if (!i915_gem_object_trylock(obj, NULL)) + continue; + if (__i915_vma_unbind(vma) == 0) freed_pages += count; + + i915_gem_object_unlock(obj); } mutex_unlock(&i915->ggtt.vm.mutex); @@ -458,6 +465,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, #define obj_to_i915(obj__) to_i915((obj__)->base.dev) +/** + * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By + * default all object types that support shrinking(see IS_SHRINKABLE), will also + * make the object visible to the shrinker after allocating the system memory + * pages. + * @obj: The GEM object. + * + * This is typically used for special kernel internal objects that can't be + * easily processed by the shrinker, like if they are perma-pinned. + */ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); @@ -482,13 +499,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } -static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, - struct list_head *head) +static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) { struct drm_i915_private *i915 = obj_to_i915(obj); unsigned long flags; - GEM_BUG_ON(!i915_gem_object_has_pages(obj)); if (!i915_gem_object_is_shrinkable(obj)) return; @@ -508,14 +524,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } +/** + * __i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} + +/** + * __i915_gem_object_make_purgeable - Move the object to the tail of the + * purgeable list. Objects on this list might be swapped out. Used with + * DONTNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); +} + +/** + * i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * MUST only be called on objects which have backing pages. + * + * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). + */ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.shrink_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_shrinkable(obj); } +/** + * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable + * list. Used with DONTNEED objects. Unlike with shrinkable objects, the + * shrinker will attempt to discard the backing pages, instead of trying to swap + * them out. + * @obj: The GEM object. + * + * MUST only be called on objects which have backing pages. + * + * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). + */ void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.purge_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_purgeable(obj); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index ddd37ccb1362..7df50fd6cc7b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -399,7 +399,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) return 0; } - if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) { + if (intel_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { drm_notice(&i915->drm, "%s, disabling use of stolen memory\n", "DMAR active"); @@ -488,6 +488,9 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) return 0; } + /* Exclude the reserved region from driver use */ + mem->region.end = reserved_base - 1; + /* It is possible for the reserved area to end before the end of stolen * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; @@ -653,7 +656,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (WARN_ON(!i915_gem_object_trylock(obj))) + if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) return -EBUSY; i915_gem_object_init_memory_region(obj, mem); @@ -720,9 +723,10 @@ static int init_stolen_smem(struct intel_memory_region *mem) return i915_gem_init_stolen(mem); } -static void release_stolen_smem(struct intel_memory_region *mem) +static int release_stolen_smem(struct intel_memory_region *mem) { i915_gem_cleanup_stolen(mem->i915); + return 0; } static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { @@ -759,10 +763,11 @@ err_fini: return err; } -static void release_stolen_lmem(struct intel_memory_region *mem) +static int release_stolen_lmem(struct intel_memory_region *mem) { io_mapping_fini(&mem->iomap); i915_gem_cleanup_stolen(mem->i915); + return 0; } static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { @@ -778,6 +783,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct intel_uncore *uncore = &i915->uncore; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_memory_region *mem; + resource_size_t min_page_size; resource_size_t io_start; resource_size_t lmem_size; u64 lmem_base; @@ -789,8 +795,11 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, lmem_size = pci_resource_len(pdev, 2) - lmem_base; io_start = pci_resource_start(pdev, 2) + lmem_base; + min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : + I915_GTT_PAGE_SIZE_4K; + mem = intel_memory_region_create(i915, lmem_base, lmem_size, - I915_GTT_PAGE_SIZE_4K, io_start, + min_page_size, io_start, type, instance, &i915_region_stolen_lmem_ops); if (IS_ERR(mem)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 1929d6cf4150..75501db71041 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -38,12 +38,13 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, { const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = to_i915(dev); struct i915_gem_context *ctx; unsigned long idx; long ret; /* ABI: return -EIO if already wedged */ - ret = intel_gt_terminally_wedged(&to_i915(dev)->gt); + ret = intel_gt_terminally_wedged(to_gt(i915)); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 74a1ffd0d7dd..923cc7ad8d70 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -14,13 +14,9 @@ #include "gem/i915_gem_object.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" - -#include "gt/intel_engine_pm.h" -#include "gt/intel_gt.h" -#include "gt/intel_migrate.h" - #define I915_TTM_PRIO_PURGE 0 #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 @@ -34,7 +30,9 @@ * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. * @dev: The struct device used for dma mapping and unmapping. - * @cached_st: The cached scatter-gather table. + * @cached_rsgt: The cached scatter-gather table. + * @is_shmem: Set if using shmem. + * @filp: The shmem file, if using shmem backend. * * Note that DMA may be going on right up to the point where the page- * vector is unpopulated in delayed destroy. Hence keep the @@ -45,7 +43,10 @@ struct i915_ttm_tt { struct ttm_tt ttm; struct device *dev; - struct sg_table *cached_st; + struct i915_refct_sgt cached_rsgt; + + bool is_shmem; + struct file *filp; }; static const struct ttm_place sys_placement_flags = { @@ -103,37 +104,15 @@ static int i915_ttm_err_to_gem(int err) return err; } -static bool gpu_binds_iomem(struct ttm_resource *mem) -{ - return mem->mem_type != TTM_PL_SYSTEM; -} - -static bool cpu_maps_iomem(struct ttm_resource *mem) -{ - /* Once / if we support GGTT, this is also false for cached ttm_tts */ - return mem->mem_type != TTM_PL_SYSTEM; -} - -static enum i915_cache_level -i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, - struct ttm_tt *ttm) -{ - return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && - ttm->caching == ttm_cached) ? I915_CACHE_LLC : - I915_CACHE_NONE; -} - -static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); - static enum ttm_caching i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) { /* - * Objects only allowed in system get cached cpu-mappings. - * Other objects get WC mapping for now. Even if in system. + * Objects only allowed in system get cached cpu-mappings, or when + * evicting lmem-only buffers to system for swapping. Other objects get + * WC mapping for now. Even if in system. */ - if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && - obj->mm.n_placements <= 1) + if (obj->mm.n_placements <= 1) return ttm_cached; return ttm_write_combined; @@ -179,15 +158,103 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, placement->busy_placement = busy; } +static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + const unsigned int max_segment = i915_sg_segment_size(); + const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; + struct file *filp = i915_tt->filp; + struct sgt_iter sgt_iter; + struct sg_table *st; + struct page *page; + unsigned long i; + int err; + + if (!filp) { + struct address_space *mapping; + gfp_t mask; + + filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; + + mapping = filp->f_mapping; + mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); + + i915_tt->filp = filp; + } + + st = &i915_tt->cached_rsgt.table; + err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping, + max_segment); + if (err) + return err; + + err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (err) + goto err_free_st; + + i = 0; + for_each_sgt_page(page, sgt_iter, st) + ttm->pages[i++] = page; + + if (ttm->page_flags & TTM_TT_FLAG_SWAPPED) + ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + + return 0; + +err_free_st: + shmem_sg_free_table(st, filp->f_mapping, false, false); + + return err; +} + +static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED; + struct sg_table *st = &i915_tt->cached_rsgt.table; + + shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping, + backup, backup); +} + +static void i915_ttm_tt_release(struct kref *ref) +{ + struct i915_ttm_tt *i915_tt = + container_of(ref, typeof(*i915_tt), cached_rsgt.kref); + struct sg_table *st = &i915_tt->cached_rsgt.table; + + GEM_WARN_ON(st->sgl); + + kfree(i915_tt); +} + +static const struct i915_refct_sgt_ops tt_rsgt_ops = { + .release = i915_ttm_tt_release +}; + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, bo->resource->mem_type); struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + enum ttm_caching caching; struct i915_ttm_tt *i915_tt; int ret; + if (!obj) + return NULL; + i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); if (!i915_tt) return NULL; @@ -196,38 +263,66 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, man->use_tt) page_flags |= TTM_TT_FLAG_ZERO_ALLOC; - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, - i915_ttm_select_tt_caching(obj)); - if (ret) { - kfree(i915_tt); - return NULL; + caching = i915_ttm_select_tt_caching(obj); + if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) { + page_flags |= TTM_TT_FLAG_EXTERNAL | + TTM_TT_FLAG_EXTERNAL_MAPPABLE; + i915_tt->is_shmem = true; } + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching); + if (ret) + goto err_free; + + __i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size, + &tt_rsgt_ops); + i915_tt->dev = obj->base.dev->dev; return &i915_tt->ttm; + +err_free: + kfree(i915_tt); + return NULL; +} + +static int i915_ttm_tt_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + if (i915_tt->is_shmem) + return i915_ttm_tt_shmem_populate(bdev, ttm, ctx); + + return ttm_pool_alloc(&bdev->pool, ttm, ctx); } static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + struct sg_table *st = &i915_tt->cached_rsgt.table; - if (i915_tt->cached_st) { - dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, - DMA_BIDIRECTIONAL, 0); - sg_free_table(i915_tt->cached_st); - kfree(i915_tt->cached_st); - i915_tt->cached_st = NULL; + if (st->sgl) + dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); + + if (i915_tt->is_shmem) { + i915_ttm_tt_shmem_unpopulate(ttm); + } else { + sg_free_table(st); + ttm_pool_free(&bdev->pool, ttm); } - ttm_pool_free(&bdev->pool, ttm); } static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + if (i915_tt->filp) + fput(i915_tt->filp); + ttm_tt_fini(ttm); - kfree(i915_tt); + i915_refct_sgt_put(&i915_tt->cached_rsgt); } static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, @@ -235,6 +330,17 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + if (!obj) + return false; + + /* + * EXTERNAL objects should never be swapped out by TTM, instead we need + * to handle that ourselves. TTM will already skip such objects for us, + * but we would like to avoid grabbing locks for no good reason. + */ + if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) + return false; + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ return i915_gem_object_evictable(obj); } @@ -245,28 +351,19 @@ static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, *placement = i915_sys_placement; } -static int i915_ttm_move_notify(struct ttm_buffer_object *bo) -{ - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret; - - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); - if (ret) - return ret; - - ret = __i915_gem_object_put_pages(obj); - if (ret) - return ret; - - return 0; -} - -static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) +/** + * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information + * @obj: The GEM object + * This function frees any LMEM-related information that is cached on + * the object. For example the radix tree for fast page lookup and the + * cached refcounted sg-table + */ +void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj) { struct radix_tree_iter iter; void __rcu **slot; - if (!obj->ttm.cached_io_st) + if (!obj->ttm.cached_io_rsgt) return; rcu_read_lock(); @@ -274,93 +371,106 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); rcu_read_unlock(); - sg_free_table(obj->ttm.cached_io_st); - kfree(obj->ttm.cached_io_st); - obj->ttm.cached_io_st = NULL; + i915_refct_sgt_put(obj->ttm.cached_io_rsgt); + obj->ttm.cached_io_rsgt = NULL; } -static void -i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +/** + * i915_ttm_purge - Clear an object of its memory + * @obj: The object + * + * This function is called to clear an object of it's memory when it is + * marked as not needed anymore. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_purge(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement place = {}; + int ret; - if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { - obj->write_domain = I915_GEM_DOMAIN_WC; - obj->read_domains = I915_GEM_DOMAIN_WC; - } else { - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - } -} + if (obj->mm.madv == __I915_MADV_PURGED) + return 0; -static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) -{ - struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - unsigned int cache_level; - unsigned int i; + ret = ttm_bo_validate(bo, &place, &ctx); + if (ret) + return ret; - /* - * If object was moved to an allowable region, update the object - * region to consider it migrated. Note that if it's currently not - * in an allowable region, it's evicted and we don't update the - * object region. - */ - if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { - for (i = 0; i < obj->mm.n_placements; ++i) { - struct intel_memory_region *mr = obj->mm.placements[i]; - - if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && - mr != obj->mm.region) { - i915_gem_object_release_memory_region(obj); - i915_gem_object_init_memory_region(obj, mr); - break; - } - } + if (bo->ttm && i915_tt->filp) { + /* + * The below fput(which eventually calls shmem_truncate) might + * be delayed by worker, so when directly called to purge the + * pages(like by the shrinker) we should try to be more + * aggressive and release the pages immediately. + */ + shmem_truncate_range(file_inode(i915_tt->filp), + 0, (loff_t)-1); + fput(fetch_and_zero(&i915_tt->filp)); } - obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); - - obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : - I915_BO_FLAG_STRUCT_PAGE; + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); + i915_ttm_free_cached_io_rsgt(obj); + obj->mm.madv = __I915_MADV_PURGED; - cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, - bo->ttm); - i915_gem_object_set_cache_coherency(obj, cache_level); + return 0; } -static void i915_ttm_purge(struct drm_i915_gem_object *obj) +static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj, + bool no_wait_gpu, + bool should_writeback) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); struct ttm_operation_ctx ctx = { .interruptible = true, - .no_wait_gpu = false, + .no_wait_gpu = no_wait_gpu, }; struct ttm_placement place = {}; int ret; - if (obj->mm.madv == __I915_MADV_PURGED) - return; + if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) + return 0; + + GEM_BUG_ON(!i915_tt->is_shmem); + + if (!i915_tt->filp) + return 0; + + ret = ttm_bo_wait_ctx(bo, &ctx); + if (ret) + return ret; - /* TTM's purge interface. Note that we might be reentering. */ + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + return i915_ttm_purge(obj); + case __I915_MADV_PURGED: + return 0; + } + + if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) + return 0; + + bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED; ret = ttm_bo_validate(bo, &place, &ctx); - if (!ret) { - obj->write_domain = 0; - obj->read_domains = 0; - i915_ttm_adjust_gem_after_move(obj); - i915_ttm_free_cached_io_st(obj); - obj->mm.madv = __I915_MADV_PURGED; + if (ret) { + bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + return ret; } -} -static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) -{ - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret = i915_ttm_move_notify(bo); + if (should_writeback) + __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping); - GEM_WARN_ON(ret); - GEM_WARN_ON(obj->ttm.cached_io_st); - if (!ret && obj->mm.madv != I915_MADV_WILLNEED) - i915_ttm_purge(obj); + return 0; } static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) @@ -369,232 +479,101 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) if (likely(obj)) { __i915_gem_object_pages_fini(obj); - i915_ttm_free_cached_io_st(obj); + i915_ttm_free_cached_io_rsgt(obj); } } -static struct intel_memory_region * -i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) -{ - struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); - - /* There's some room for optimization here... */ - GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && - ttm_mem_type < I915_PL_LMEM0); - if (ttm_mem_type == I915_PL_SYSTEM) - return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, - 0); - - return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, - ttm_mem_type - I915_PL_LMEM0); -} - -static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) +static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) { struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); struct sg_table *st; int ret; - if (i915_tt->cached_st) - return i915_tt->cached_st; - - st = kzalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); + if (i915_tt->cached_rsgt.table.sgl) + return i915_refct_sgt_get(&i915_tt->cached_rsgt); + st = &i915_tt->cached_rsgt.table; ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, i915_sg_segment_size(), GFP_KERNEL); if (ret) { - kfree(st); + st->sgl = NULL; return ERR_PTR(ret); } ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); if (ret) { sg_free_table(st); - kfree(st); return ERR_PTR(ret); } - i915_tt->cached_st = st; - return st; + return i915_refct_sgt_get(&i915_tt->cached_rsgt); } -static struct sg_table * +/** + * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the + * resource memory + * @obj: The GEM object used for sg-table caching + * @res: The struct ttm_resource for which an sg-table is requested. + * + * This function returns a refcounted sg-table representing the memory + * pointed to by @res. If @res is the object's current resource it may also + * cache the sg_table on the object or attempt to access an already cached + * sg-table. The refcounted sg-table needs to be put when no-longer in use. + * + * Return: A valid pointer to a struct i915_refct_sgt or error pointer on + * failure. + */ +struct i915_refct_sgt * i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - if (!gpu_binds_iomem(res)) + if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. */ - GEM_WARN_ON(!cpu_maps_iomem(res)); - return intel_region_ttm_resource_to_st(obj->mm.region, res); -} - -static int i915_ttm_accel_move(struct ttm_buffer_object *bo, - bool clear, - struct ttm_resource *dst_mem, - struct ttm_tt *dst_ttm, - struct sg_table *dst_st) -{ - struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), - bdev); - struct ttm_resource_manager *src_man = - ttm_manager_type(bo->bdev, bo->resource->mem_type); - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct sg_table *src_st; - struct i915_request *rq; - struct ttm_tt *src_ttm = bo->ttm; - enum i915_cache_level src_level, dst_level; - int ret; - - if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt)) - return -EINVAL; + GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res)); + if (bo->resource == res) { + if (!obj->ttm.cached_io_rsgt) { + struct i915_refct_sgt *rsgt; - dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); - if (clear) { - if (bo->type == ttm_bo_type_kernel) - return -EINVAL; + rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, + res); + if (IS_ERR(rsgt)) + return rsgt; - intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, - dst_st->sgl, dst_level, - gpu_binds_iomem(dst_mem), - 0, &rq); - - if (!ret && rq) { - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); - } - intel_engine_pm_put(i915->gt.migrate.context->engine); - } else { - src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) : - obj->ttm.cached_io_st; - - src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); - intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_copy(i915->gt.migrate.context, - NULL, src_st->sgl, src_level, - gpu_binds_iomem(bo->resource), - dst_st->sgl, dst_level, - gpu_binds_iomem(dst_mem), - &rq); - if (!ret && rq) { - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + obj->ttm.cached_io_rsgt = rsgt; } - intel_engine_pm_put(i915->gt.migrate.context->engine); + return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return ret; -} - -static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, - struct ttm_resource *dst_mem, - struct ttm_tt *dst_ttm, - struct sg_table *dst_st, - bool allow_accel) -{ - int ret = -EINVAL; - - if (allow_accel) - ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st); - if (ret) { - struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct intel_memory_region *dst_reg, *src_reg; - union { - struct ttm_kmap_iter_tt tt; - struct ttm_kmap_iter_iomap io; - } _dst_iter, _src_iter; - struct ttm_kmap_iter *dst_iter, *src_iter; - - dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); - src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); - GEM_BUG_ON(!dst_reg || !src_reg); - - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); - } + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); } -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *dst_mem, - struct ttm_place *hop) +static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - struct ttm_resource_manager *dst_man = - ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct ttm_tt *ttm = bo->ttm; - struct sg_table *dst_st; - bool clear; int ret; - /* Sync for now. We could do the actual copy async. */ - ret = ttm_bo_wait_ctx(bo, ctx); - if (ret) - return ret; + if (!obj) + return; ret = i915_ttm_move_notify(bo); - if (ret) - return ret; - - if (obj->mm.madv != I915_MADV_WILLNEED) { + GEM_WARN_ON(ret); + GEM_WARN_ON(obj->ttm.cached_io_rsgt); + if (!ret && obj->mm.madv != I915_MADV_WILLNEED) i915_ttm_purge(obj); - ttm_resource_free(bo, &dst_mem); - return 0; - } - - /* Populate ttm with pages if needed. Typically system memory. */ - if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, ttm, ctx); - if (ret) - return ret; - } - - dst_st = i915_ttm_resource_get_st(obj, dst_mem); - if (IS_ERR(dst_st)) - return PTR_ERR(dst_st); - - clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true); - - ttm_bo_move_sync_cleanup(bo, dst_mem); - i915_ttm_adjust_domains_after_move(obj); - i915_ttm_free_cached_io_st(obj); - - if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { - obj->ttm.cached_io_st = dst_st; - obj->ttm.get_io_page.sg_pos = dst_st->sgl; - obj->ttm.get_io_page.sg_idx = 0; - } - - i915_ttm_adjust_gem_after_move(obj); - return 0; } static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { - if (!cpu_maps_iomem(mem)) + if (!i915_ttm_cpu_maps_iomem(mem)) return 0; mem->bus.caching = ttm_write_combined; @@ -607,19 +586,26 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; struct scatterlist *sg; + unsigned long base; unsigned int ofs; + GEM_BUG_ON(!obj); GEM_WARN_ON(bo->ttm); + base = obj->mm.region->iomap.base - obj->mm.region->region.start; sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } +/* + * All callbacks need to take care not to downcast a struct ttm_buffer_object + * without checking its subclass, since it might be a TTM ghost object. + */ static struct ttm_device_funcs i915_ttm_bo_driver = { .ttm_tt_create = i915_ttm_tt_create, + .ttm_tt_populate = i915_ttm_tt_populate, .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, .ttm_tt_destroy = i915_ttm_tt_destroy, .eviction_valuable = i915_ttm_eviction_valuable, @@ -649,7 +635,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - struct sg_table *st; int real_num_busy; int ret; @@ -676,7 +661,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, return i915_ttm_err_to_gem(ret); } - i915_ttm_adjust_lru(obj); if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); if (ret) @@ -687,14 +671,19 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, } if (!i915_gem_object_has_pages(obj)) { - /* Object either has a page vector or is an iomem object */ - st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; - if (IS_ERR(st)) - return PTR_ERR(st); + struct i915_refct_sgt *rsgt = + i915_ttm_resource_get_st(obj, bo->resource); + + if (IS_ERR(rsgt)) + return PTR_ERR(rsgt); - __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + GEM_BUG_ON(obj->mm.rsgt); + obj->mm.rsgt = rsgt; + __i915_gem_object_set_pages(obj, &rsgt->table, + i915_sg_dma_sizes(rsgt->table.sgl)); } + i915_ttm_adjust_lru(obj); return ret; } @@ -766,12 +755,21 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, * and shrinkers will move it out if needed. */ - i915_ttm_adjust_lru(obj); + if (obj->mm.rsgt) + i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)); } -static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) +/** + * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists. + * @obj: The object + */ +void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + bool shrinkable = + bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm); /* * Don't manipulate the TTM LRUs while in TTM bo destruction. @@ -781,10 +779,53 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) return; /* + * We skip managing the shrinker LRU in set_pages() and just manage + * everything here. This does at least solve the issue with having + * temporary shmem mappings(like with evicted lmem) not being visible to + * the shrinker. Only our shmem objects are shrinkable, everything else + * we keep as unshrinkable. + * + * To make sure everything plays nice we keep an extra shrink pin in TTM + * if the underlying pages are not currently shrinkable. Once we release + * our pin, like when the pages are moved to shmem, the pages will then + * be added to the shrinker LRU, assuming the caller isn't also holding + * a pin. + * + * TODO: consider maybe also bumping the shrinker list here when we have + * already unpinned it, which should give us something more like an LRU. + * + * TODO: There is a small window of opportunity for this function to + * get called from eviction after we've dropped the last GEM refcount, + * but before the TTM deleted flag is set on the object. Avoid + * adjusting the shrinker list in such cases, since the object is + * not available to the shrinker anyway due to its zero refcount. + * To fix this properly we should move to a TTM shrinker LRU list for + * these objects. + */ + if (kref_get_unless_zero(&obj->base.refcount)) { + if (shrinkable != obj->mm.ttm_shrinkable) { + if (shrinkable) { + if (obj->mm.madv == I915_MADV_WILLNEED) + __i915_gem_object_make_shrinkable(obj); + else + __i915_gem_object_make_purgeable(obj); + } else { + i915_gem_object_make_unshrinkable(obj); + } + + obj->mm.ttm_shrinkable = shrinkable; + } + i915_gem_object_put(obj); + } + + /* * Put on the correct LRU list depending on the MADV status */ spin_lock(&bo->bdev->lru_lock); - if (obj->mm.madv != I915_MADV_WILLNEED) { + if (shrinkable) { + /* Try to keep shmem_tt from being considered for shrinking. */ + bo->priority = TTM_MAX_BO_PRIORITY - 1; + } else if (obj->mm.madv != I915_MADV_WILLNEED) { bo->priority = I915_TTM_PRIO_PURGE; } else if (!i915_gem_object_has_pages(obj)) { if (bo->priority < I915_TTM_PRIO_HAS_PAGES) @@ -823,15 +864,39 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) { struct vm_area_struct *area = vmf->vma; - struct drm_i915_gem_object *obj = - i915_ttm_to_gem(area->vm_private_data); + struct ttm_buffer_object *bo = area->vm_private_data; + struct drm_device *dev = bo->base.dev; + struct drm_i915_gem_object *obj; + vm_fault_t ret; + int idx; + + obj = i915_ttm_to_gem(bo); + if (!obj) + return VM_FAULT_SIGBUS; /* Sanity check that we allow writing into this object */ if (unlikely(i915_gem_object_is_readonly(obj) && area->vm_flags & VM_WRITE)) return VM_FAULT_SIGBUS; - return ttm_bo_vm_fault(vmf); + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + if (drm_dev_enter(dev, &idx)) { + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + i915_ttm_adjust_lru(obj); + + dma_resv_unlock(bo->base.resv); + return ret; } static int @@ -882,13 +947,18 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST, .get_pages = i915_ttm_get_pages, .put_pages = i915_ttm_put_pages, .truncate = i915_ttm_purge, + .shrinker_release_pages = i915_ttm_shrinker_release_pages, + .adjust_lru = i915_ttm_adjust_lru, .delayed_free = i915_ttm_delayed_free, .migrate = i915_ttm_migrate, + .mmap_offset = i915_ttm_mmap_offset, .mmap_ops = &vm_ops_ttm, }; @@ -901,6 +971,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) mutex_destroy(&obj->ttm.get_io_page.lock); if (obj->ttm.created) { + /* + * We freely manage the shrinker LRU outide of the mm.pages life + * cycle. As a result when destroying the object we should be + * extra paranoid and ensure we remove it from the LRU, before + * we free the object. + * + * Touching the ttm_shrinkable outside of the object lock here + * should be safe now that the last GEM object ref was dropped. + */ + if (obj->mm.ttm_shrinkable) + i915_gem_object_make_unshrinkable(obj); + i915_ttm_backup_free(obj); /* This releases all gem object bindings to the backend. */ @@ -940,10 +1022,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); /* Don't put on a region list until we're either locked or fully initialized. */ - obj->mm.region = intel_memory_region_get(mem); + obj->mm.region = mem; INIT_LIST_HEAD(&obj->mm.region_link); - i915_gem_object_make_unshrinkable(obj); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : @@ -955,6 +1036,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, GEM_BUG_ON(page_size && obj->mm.n_placements); /* + * Keep an extra shrink pin to prevent the object from being made + * shrinkable too early. If the ttm_tt is ever allocated in shmem, we + * drop the pin. The TTM backend manages the shrinker LRU itself, + * outside of the normal mm.pages life cycle. + */ + i915_gem_object_make_unshrinkable(obj); + + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the * destructor until obj->ttm.created is true. @@ -980,6 +1069,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, static const struct intel_memory_region_ops ttm_system_region_ops = { .init_object = __i915_gem_ttm_object_init, + .release = intel_region_ttm_fini, }; struct intel_memory_region * @@ -999,50 +1089,3 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915, intel_memory_region_set_name(mr, "system-ttm"); return mr; } - -/** - * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to - * another - * @dst: The destination object - * @src: The source object - * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. - * @intr: Whether to perform waits interruptible: - * - * Note: The caller is responsible for assuring that the underlying - * TTM objects are populated if needed and locked. - * - * Return: Zero on success. Negative error code on error. If @intr == true, - * then it may return -ERESTARTSYS or -EINTR. - */ -int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, - struct drm_i915_gem_object *src, - bool allow_accel, bool intr) -{ - struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); - struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); - struct ttm_operation_ctx ctx = { - .interruptible = intr, - }; - struct sg_table *dst_st; - int ret; - - assert_object_held(dst); - assert_object_held(src); - - /* - * Sync for now. This will change with async moves. - */ - ret = ttm_bo_wait_ctx(dst_bo, &ctx); - if (!ret) - ret = ttm_bo_wait_ctx(src_bo, &ctx); - if (ret) - return ret; - - dst_st = gpu_binds_iomem(dst_bo->resource) ? - dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm); - - __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, - dst_st, allow_accel); - - return 0; -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 0b7291dd897c..9d698ad00853 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -5,6 +5,8 @@ #ifndef _I915_GEM_TTM_H_ #define _I915_GEM_TTM_H_ +#include <drm/ttm/ttm_placement.h> + #include "gem/i915_gem_object_types.h" /** @@ -35,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + if (bo->destroy != i915_ttm_bo_destroy) return NULL; return container_of(bo, struct drm_i915_gem_object, __do_not_access); @@ -47,10 +49,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, resource_size_t page_size, unsigned int flags); -int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, - struct drm_i915_gem_object *src, - bool allow_accel, bool intr); - /* Internal I915 TTM declarations and definitions below. */ #define I915_PL_LMEM0 TTM_PL_PRIV @@ -60,4 +58,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, struct ttm_placement *i915_ttm_sys_placement(void); +void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj); + +struct i915_refct_sgt * +i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, + struct ttm_resource *res); + +void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); + +int i915_ttm_purge(struct drm_i915_gem_object *obj); + +/** + * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT? + * @mem: struct ttm_resource representing the memory. + * + * Return: true if memory should be viewed as LMEM for GTT binding purposes, + * false otherwise. + */ +static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem) +{ + return mem->mem_type != I915_PL_SYSTEM; +} + +/** + * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU? + * @mem: struct ttm_resource representing the memory. + * + * Return: true if memory should be viewed as IOMEM for CPU mapping purposes. + */ +static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != I915_PL_SYSTEM; +} #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c new file mode 100644 index 000000000000..ee9612a3ee5e --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/ttm/ttm_bo_driver.h> + +#include "i915_deps.h" +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_object.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" + +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt.h" +#include "gt/intel_migrate.h" + +/** + * DOC: Selftest failure modes for failsafe migration: + * + * For fail_gpu_migration, the gpu blit scheduled is always a clear blit + * rather than a copy blit, and then we force the failure paths as if + * the blit fence returned an error. + * + * For fail_work_allocation we fail the kmalloc of the async worker, we + * sync the gpu blit. If it then fails, or fail_gpu_migration is set to + * true, then a memcpy operation is performed sync. + */ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +static bool fail_gpu_migration; +static bool fail_work_allocation; + +void i915_ttm_migrate_set_failure_modes(bool gpu_migration, + bool work_allocation) +{ + fail_gpu_migration = gpu_migration; + fail_work_allocation = work_allocation; +} +#endif + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + !i915_ttm_gtt_binds_lmem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + +static struct intel_memory_region * +i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + + /* There's some room for optimization here... */ + GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && + ttm_mem_type < I915_PL_LMEM0); + if (ttm_mem_type == I915_PL_SYSTEM) + return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, + 0); + + return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, + ttm_mem_type - I915_PL_LMEM0); +} + +/** + * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a + * TTM move + * @obj: The gem object + */ +void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +/** + * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move + * @obj: The gem object + * + * Adjusts the GEM object's region, mem_flags and cache coherency after a + * TTM move. + */ +void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + +/** + * i915_ttm_move_notify - Prepare an object for move + * @bo: The ttm buffer object. + * + * This function prepares an object for move by removing all GPU bindings, + * removing all CPU mapings and finally releasing the pages sg-table. + * + * Return: 0 if successful, negative error code on error. + */ +int i915_ttm_move_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret; + + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (ret) + return ret; + + ret = __i915_gem_object_put_pages(obj); + if (ret) + return ret; + + return 0; +} + +static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, + struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, + struct sg_table *dst_st, + const struct i915_deps *deps) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct i915_request *rq; + struct ttm_tt *src_ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; + int ret; + + if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915))) + return ERR_PTR(-EINVAL); + + /* With fail_gpu_migration, we always perform a GPU clear. */ + if (I915_SELFTEST_ONLY(fail_gpu_migration)) + clear = true; + + dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); + if (clear) { + if (bo->type == ttm_bo_type_kernel && + !I915_SELFTEST_ONLY(fail_gpu_migration)) + return ERR_PTR(-EINVAL); + + intel_engine_pm_get(to_gt(i915)->migrate.context->engine); + ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps, + dst_st->sgl, dst_level, + i915_ttm_gtt_binds_lmem(dst_mem), + 0, &rq); + } else { + struct i915_refct_sgt *src_rsgt = + i915_ttm_resource_get_st(obj, bo->resource); + + if (IS_ERR(src_rsgt)) + return ERR_CAST(src_rsgt); + + src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); + intel_engine_pm_get(to_gt(i915)->migrate.context->engine); + ret = intel_context_migrate_copy(to_gt(i915)->migrate.context, + deps, src_rsgt->table.sgl, + src_level, + i915_ttm_gtt_binds_lmem(bo->resource), + dst_st->sgl, dst_level, + i915_ttm_gtt_binds_lmem(dst_mem), + &rq); + + i915_refct_sgt_put(src_rsgt); + } + + intel_engine_pm_put(to_gt(i915)->migrate.context->engine); + + if (ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + + return ret ? ERR_PTR(ret) : &rq->fence; +} + +/** + * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality. + * @_dst_iter: Storage space for the destination kmap iterator. + * @_src_iter: Storage space for the source kmap iterator. + * @dst_iter: Pointer to the destination kmap iterator. + * @src_iter: Pointer to the source kmap iterator. + * @clear: Whether to clear instead of copy. + * @src_rsgt: Refcounted scatter-gather list of source memory. + * @dst_rsgt: Refcounted scatter-gather list of destination memory. + */ +struct i915_ttm_memcpy_arg { + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, + _src_iter; + struct ttm_kmap_iter *dst_iter; + struct ttm_kmap_iter *src_iter; + unsigned long num_pages; + bool clear; + struct i915_refct_sgt *src_rsgt; + struct i915_refct_sgt *dst_rsgt; +}; + +/** + * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence. + * @fence: The dma-fence. + * @work: The work struct use for the memcpy work. + * @lock: The fence lock. Not used to protect anything else ATM. + * @irq_work: Low latency worker to signal the fence since it can't be done + * from the callback for lockdep reasons. + * @cb: Callback for the accelerated migration fence. + * @arg: The argument for the memcpy functionality. + */ +struct i915_ttm_memcpy_work { + struct dma_fence fence; + struct work_struct work; + /* The fence lock */ + spinlock_t lock; + struct irq_work irq_work; + struct dma_fence_cb cb; + struct i915_ttm_memcpy_arg arg; +}; + +static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) +{ + ttm_move_memcpy(arg->clear, arg->num_pages, + arg->dst_iter, arg->src_iter); +} + +static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg, + struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct intel_memory_region *dst_reg, *src_reg; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) : + ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap, + &dst_rsgt->table, dst_reg->region.start); + + arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap, + &obj->ttm.cached_io_rsgt->table, + src_reg->region.start); + arg->clear = clear; + arg->num_pages = bo->base.size >> PAGE_SHIFT; + + arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt); + arg->src_rsgt = clear ? NULL : + i915_ttm_resource_get_st(obj, bo->resource); +} + +static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg) +{ + i915_refct_sgt_put(arg->src_rsgt); + i915_refct_sgt_put(arg->dst_rsgt); +} + +static void __memcpy_work(struct work_struct *work) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(work, typeof(*copy_work), work); + struct i915_ttm_memcpy_arg *arg = ©_work->arg; + bool cookie = dma_fence_begin_signalling(); + + i915_ttm_move_memcpy(arg); + dma_fence_end_signalling(cookie); + + dma_fence_signal(©_work->fence); + + i915_ttm_memcpy_release(arg); + dma_fence_put(©_work->fence); +} + +static void __memcpy_irq_work(struct irq_work *irq_work) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(irq_work, typeof(*copy_work), irq_work); + struct i915_ttm_memcpy_arg *arg = ©_work->arg; + + dma_fence_signal(©_work->fence); + i915_ttm_memcpy_release(arg); + dma_fence_put(©_work->fence); +} + +static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_ttm_memcpy_work *copy_work = + container_of(cb, typeof(*copy_work), cb); + + if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) { + INIT_WORK(©_work->work, __memcpy_work); + queue_work(system_unbound_wq, ©_work->work); + } else { + init_irq_work(©_work->irq_work, __memcpy_irq_work); + irq_work_queue(©_work->irq_work); + } +} + +static const char *get_driver_name(struct dma_fence *fence) +{ + return "i915_ttm_memcpy_work"; +} + +static const char *get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static const struct dma_fence_ops dma_fence_memcpy_ops = { + .get_driver_name = get_driver_name, + .get_timeline_name = get_timeline_name, +}; + +static struct dma_fence * +i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, + struct dma_fence *dep) +{ + int ret; + + spin_lock_init(&work->lock); + dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0); + dma_fence_get(&work->fence); + ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb); + if (ret) { + if (ret != -ENOENT) + dma_fence_wait(dep, false); + + return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL : + dep->error); + } + + return &work->fence; +} + +static struct dma_fence * +__i915_ttm_move(struct ttm_buffer_object *bo, + const struct ttm_operation_ctx *ctx, bool clear, + struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt, bool allow_accel, + const struct i915_deps *move_deps) +{ + struct i915_ttm_memcpy_work *copy_work = NULL; + struct i915_ttm_memcpy_arg _arg, *arg = &_arg; + struct dma_fence *fence = ERR_PTR(-EINVAL); + + if (allow_accel) { + fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, + &dst_rsgt->table, move_deps); + + /* + * We only need to intercept the error when moving to lmem. + * When moving to system, TTM or shmem will provide us with + * cleared pages. + */ + if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) && + !I915_SELFTEST_ONLY(fail_gpu_migration || + fail_work_allocation)) + goto out; + } + + /* If we've scheduled gpu migration. Try to arm error intercept. */ + if (!IS_ERR(fence)) { + struct dma_fence *dep = fence; + + if (!I915_SELFTEST_ONLY(fail_work_allocation)) + copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL); + + if (copy_work) { + arg = ©_work->arg; + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + fence = i915_ttm_memcpy_work_arm(copy_work, dep); + } else { + dma_fence_wait(dep, false); + fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? + -EINVAL : fence->error); + } + dma_fence_put(dep); + + if (!IS_ERR(fence)) + goto out; + } else if (move_deps) { + int err = i915_deps_sync(move_deps, ctx); + + if (err) + return ERR_PTR(err); + } + + /* Error intercept failed or no accelerated migration to start with */ + if (!copy_work) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + i915_ttm_move_memcpy(arg); + i915_ttm_memcpy_release(arg); + kfree(copy_work); + + return NULL; +out: + if (!fence && copy_work) { + i915_ttm_memcpy_release(arg); + kfree(copy_work); + } + + return fence; +} + +static int +prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + struct i915_deps *deps) +{ + int ret; + + ret = i915_deps_add_dependency(deps, bo->moving, ctx); + if (!ret) + ret = i915_deps_add_resv(deps, bo->base.resv, ctx); + + return ret; +} + +/** + * i915_ttm_move - The TTM move callback used by i915. + * @bo: The buffer object. + * @evict: Whether this is an eviction. + * @dst_mem: The destination ttm resource. + * @hop: If we need multihop, what temporary memory type to move to. + * + * Return: 0 if successful, negative error code otherwise. + */ +int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct dma_fence *migration_fence = NULL; + struct ttm_tt *ttm = bo->ttm; + struct i915_refct_sgt *dst_rsgt; + bool clear; + int ret; + + if (GEM_WARN_ON(!obj)) { + ttm_bo_move_null(bo, dst_mem); + return 0; + } + + ret = i915_ttm_move_notify(bo); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + i915_ttm_purge(obj); + ttm_resource_free(bo, &dst_mem); + return 0; + } + + /* Populate ttm with pages if needed. Typically system memory. */ + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); + if (ret) + return ret; + } + + dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem); + if (IS_ERR(dst_rsgt)) + return PTR_ERR(dst_rsgt); + + clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + struct i915_deps deps; + + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + ret = prev_deps(bo, ctx, &deps); + if (ret) { + i915_refct_sgt_put(dst_rsgt); + return ret; + } + + migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm, + dst_rsgt, true, &deps); + i915_deps_fini(&deps); + } + + /* We can possibly get an -ERESTARTSYS here */ + if (IS_ERR(migration_fence)) { + i915_refct_sgt_put(dst_rsgt); + return PTR_ERR(migration_fence); + } + + if (migration_fence) { + ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, + true, dst_mem); + if (ret) { + dma_fence_wait(migration_fence, false); + ttm_bo_move_sync_cleanup(bo, dst_mem); + } + dma_fence_put(migration_fence); + } else { + ttm_bo_move_sync_cleanup(bo, dst_mem); + } + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_free_cached_io_rsgt(obj); + + if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) { + obj->ttm.cached_io_rsgt = dst_rsgt; + obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl; + obj->ttm.get_io_page.sg_idx = 0; + } else { + i915_refct_sgt_put(dst_rsgt); + } + + i915_ttm_adjust_lru(obj); + i915_ttm_adjust_gem_after_move(obj); + return 0; +} + +/** + * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to + * another + * @dst: The destination object + * @src: The source object + * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. + * @intr: Whether to perform waits interruptible: + * + * Note: The caller is responsible for assuring that the underlying + * TTM objects are populated if needed and locked. + * + * Return: Zero on success. Negative error code on error. If @intr == true, + * then it may return -ERESTARTSYS or -EINTR. + */ +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr) +{ + struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); + struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); + struct ttm_operation_ctx ctx = { + .interruptible = intr, + }; + struct i915_refct_sgt *dst_rsgt; + struct dma_fence *copy_fence; + struct i915_deps deps; + int ret; + + assert_object_held(dst); + assert_object_held(src); + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + ret = dma_resv_reserve_shared(src_bo->base.resv, 1); + if (ret) + return ret; + + ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx); + if (ret) + return ret; + + ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx); + if (ret) + return ret; + + dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); + copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource, + dst_bo->ttm, dst_rsgt, allow_accel, + &deps); + + i915_deps_fini(&deps); + i915_refct_sgt_put(dst_rsgt); + if (IS_ERR_OR_NULL(copy_fence)) + return PTR_ERR_OR_ZERO(copy_fence); + + dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence); + dma_resv_add_shared_fence(src_bo->base.resv, copy_fence); + + dma_fence_put(copy_fence); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h new file mode 100644 index 000000000000..d2e7f149e05c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_GEM_TTM_MOVE_H_ +#define _I915_GEM_TTM_MOVE_H_ + +#include <linux/types.h> + +#include "i915_selftest.h" + +struct ttm_buffer_object; +struct ttm_operation_ctx; +struct ttm_place; +struct ttm_resource; +struct ttm_tt; + +struct drm_i915_gem_object; +struct i915_refct_sgt; + +int i915_ttm_move_notify(struct ttm_buffer_object *bo); + +I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, + bool work_allocation)); + +int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, + struct drm_i915_gem_object *src, + bool allow_accel, bool intr); + +/* Internal I915 TTM declarations and definitions below. */ + +int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop); + +void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj); + +void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 3b6d14b5c604..9aad84059d56 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -12,6 +12,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" /** @@ -79,6 +80,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); GEM_WARN_ON(err); + ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = backup; return 0; @@ -169,6 +171,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu, false); GEM_WARN_ON(err); + ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = NULL; err = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 3173c9f9a040..3cc01c30dd62 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -529,7 +529,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - if (!dev_priv->gt.vm->has_read_only) + if (!to_gt(dev_priv)->vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f11325484110..dab3d30c09a0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -10,7 +10,6 @@ #include "gt/intel_engine.h" -#include "dma_resv_utils.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -52,13 +51,6 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, } dma_resv_iter_end(&cursor); - /* - * Opportunistically prune the fences iff we know they have *all* been - * signaled. - */ - if (timeout > 0) - dma_resv_prune(resv); - return ret; } @@ -262,6 +254,6 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags) { might_sleep(); - /* NOP for now. */ - return 0; + + return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE)); } diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index dbdbdc344d87..7271fbf813fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -12,6 +12,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) { + char huge_opt[] = "huge=within_size"; /* r/w */ struct file_system_type *type; struct vfsmount *gemfs; char *opts; @@ -31,10 +32,8 @@ int i915_gemfs_init(struct drm_i915_private *i915) */ opts = NULL; - if (intel_vtd_active()) { + if (intel_vtd_active(i915)) { if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - static char huge_opt[] = "huge=within_size"; /* r/w */ - opts = huge_opt; drm_info(&i915->drm, "Transparent Hugepage mode '%s'\n", diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b2003133deaf..11f0aa65f8a3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -22,6 +22,22 @@ #include "selftests/mock_region.h" #include "selftests/i915_random.h" +static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915, + struct file *file) +{ + struct i915_gem_context *ctx = live_context(i915, file); + struct i915_address_space *vm; + + if (IS_ERR(ctx)) + return ctx; + + vm = ctx->vm; + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + + return ctx; +} + static const unsigned int page_sizes[] = { I915_GTT_PAGE_SIZE_2M, I915_GTT_PAGE_SIZE_64K, @@ -552,7 +568,7 @@ out_unpin: out_put: i915_gem_object_put(obj); out_region: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg) __i915_gem_object_put_pages(obj); i915_gem_object_unlock(obj); i915_gem_object_put(obj); + + i915_gem_drain_freed_objects(i915); } } @@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_unbind(vma); - if (err) - return err; - err = i915_vma_pin(vma, size, 0, flags | offset); if (err) { /* @@ -1117,7 +1131,7 @@ out_vma_unpin: return err; } -static int igt_write_huge(struct i915_gem_context *ctx, +static int igt_write_huge(struct drm_i915_private *i915, struct drm_i915_gem_object *obj) { struct i915_gem_engines *engines; @@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, IGT_TIMEOUT(end_time); unsigned int max_page_size; unsigned int count; + struct i915_gem_context *ctx; + struct file *file; u64 max; u64 num; u64 size; @@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx, int i, n; int err = 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; @@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx, } i915_gem_context_unlock_engines(ctx); if (!n) - return 0; + goto out; /* * To keep things interesting when alternating between engines in our @@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, kfree(order); +out: + fput(file); return err; } @@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng, static int igt_ppgtt_smoke_huge(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; I915_RND_STATE(prng); struct { @@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg) u32 min = backends[i].min; u32 max = backends[i].max; u32 size = max; + try_again: size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); @@ -1336,7 +1364,7 @@ try_again: goto out_unpin; } - err = igt_write_huge(ctx, obj); + err = igt_write_huge(i915, obj); if (err) { pr_err("%s write-huge failed with size=%u, i=%d\n", __func__, size, i); @@ -1363,8 +1391,7 @@ out_put: static int igt_ppgtt_sanity_check(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct { igt_create_fn fn; @@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg) if (pages) obj->mm.page_sizes.sg = pages; - err = igt_write_huge(ctx, obj); + err = igt_write_huge(i915, obj); i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); @@ -1458,15 +1485,27 @@ out: static int igt_tmpfs_fallback(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm; + struct i915_gem_context *ctx; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; + struct file *file; u32 *vaddr; int err = 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + /* * Make sure that we don't burst into a ball of flames upon falling back * to tmpfs, which we rely on if on the off-chance we encouter a failure @@ -1510,33 +1549,47 @@ out_restore: i915->mm.gemfs = gemfs; i915_vm_put(vm); +out: + fput(file); return err; } static int igt_shrink_thp(void *arg) { - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm; + struct i915_gem_context *ctx; struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; struct i915_vma *vma; + struct file *file; unsigned int flags = PIN_USER; unsigned int n; bool should_swap; - int err = 0; + int err; + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); /* * Sanity check shrinking huge-paged object -- make sure nothing blows * up. */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - goto out_vm; - } - obj = i915_gem_object_create_shmem(i915, SZ_2M); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -1626,7 +1679,8 @@ out_put: i915_gem_object_put(obj); out_vm: i915_vm_put(vm); - +out: + fput(file); return err; } @@ -1651,7 +1705,7 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_unlock; @@ -1687,36 +1741,14 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), }; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct file *file; - int err; if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); return 0; } - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - vm = ctx->vm; - if (vm) - WRITE_ONCE(vm->scrub_64K, true); - - err = i915_subtests(tests, ctx); - -out_file: - fput(file); - return err; + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 8402ed925a69..75947e9dada2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -592,7 +592,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_client_tiled_blits), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index b32f7fed2d9c..3f41fe5ec9d4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -88,9 +88,9 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ) < 0) { + if (i915_request_wait(rq, 0, 10 * HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(to_gt(i915)); i915_request_put(rq); err = -EIO; goto out_file; @@ -146,7 +146,7 @@ static int live_nop_switch(void *arg) if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(to_gt(i915)); i915_request_put(rq); break; } @@ -1223,7 +1223,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, return 0; if (flags & TEST_RESET) - igt_global_reset_lock(&i915->gt); + igt_global_reset_lock(to_gt(i915)); obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -1306,7 +1306,7 @@ out_put: out_unlock: if (flags & TEST_RESET) - igt_global_reset_unlock(&i915->gt); + igt_global_reset_unlock(to_gt(i915)); if (ret) pr_err("%s: Failed with %d!\n", name, ret); @@ -1481,10 +1481,10 @@ static int check_scratch(struct i915_address_space *vm, u64 offset) static int write_to_scratch(struct i915_gem_context *ctx, struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, u64 offset, u32 value) { struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; @@ -1497,15 +1497,9 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) return err; - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out; - } + if (IS_ERR(cmd)) + return PTR_ERR(cmd); *cmd++ = MI_STORE_DWORD_IMM_GEN4; if (GRAPHICS_VER(i915) >= 8) { @@ -1569,17 +1563,19 @@ err_unpin: i915_vma_unpin(vma); out_vm: i915_vm_put(vm); -out: - i915_gem_object_put(obj); + + if (!err) + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + return err; } static int read_from_scratch(struct i915_gem_context *ctx, struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, u64 offset, u32 *value) { struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; struct i915_address_space *vm; const u32 result = 0x100; struct i915_request *rq; @@ -1594,10 +1590,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) return err; - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600; @@ -1615,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out; + goto err_unpin; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1651,7 +1643,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out; + goto err_unpin; } memset(cmd, POISON_INUSE, PAGE_SIZE); @@ -1722,8 +1714,10 @@ err_unpin: i915_vma_unpin(vma); out_vm: i915_vm_put(vm); -out: - i915_gem_object_put(obj); + + if (!err) + err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); + return err; } @@ -1757,6 +1751,7 @@ static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; + struct drm_i915_gem_object *obj_a, *obj_b; unsigned long num_engines, count; struct intel_engine_cs *engine; struct igt_live_test t; @@ -1810,6 +1805,18 @@ static int igt_vm_isolation(void *arg) vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); + obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj_a)) { + err = PTR_ERR(obj_a); + goto out_file; + } + + obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj_b)) { + err = PTR_ERR(obj_b); + goto put_a; + } + count = 0; num_engines = 0; for_each_uabi_engine(engine, i915) { @@ -1832,13 +1839,13 @@ static int igt_vm_isolation(void *arg) I915_GTT_PAGE_SIZE, vm_total, sizeof(u32), alignof_dword); - err = write_to_scratch(ctx_a, engine, + err = write_to_scratch(ctx_a, engine, obj_a, offset, 0xdeadbeef); if (err == 0) - err = read_from_scratch(ctx_b, engine, + err = read_from_scratch(ctx_b, engine, obj_b, offset, &value); if (err) - goto out_file; + goto put_b; if (value != expected) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1847,7 +1854,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_file; + goto put_b; } this++; @@ -1858,6 +1865,10 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %lu engines\n", count, num_engines); +put_b: + i915_gem_object_put(obj_b); +put_a: + i915_gem_object_put(obj_a); out_file: if (igt_live_test_end(&t)) err = -EIO; @@ -1877,7 +1888,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_vm_isolation), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 4a6bb64c3a35..3cc74b0fed06 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", - PTR_ERR(dmabuf)); + PTR_ERR(obj)); err = PTR_ERR(obj); goto out_ret; } @@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, regions, num_regions); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", - PTR_ERR(dmabuf)); + PTR_ERR(obj)); err = PTR_ERR(obj); goto out_ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 28a700f08b49..ecb691c81d1e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,6 +4,7 @@ */ #include "gt/intel_migrate.h" +#include "gem/i915_gem_ttm_move.h" static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) @@ -227,17 +228,38 @@ out_put: return err; } +static int igt_lmem_pages_failsafe_migrate(void *arg) +{ + int fail_gpu, fail_alloc, ret; + + for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { + for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) { + pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", + fail_gpu, fail_alloc); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = igt_lmem_pages_migrate(arg); + if (ret) + goto out_err; + } + } + +out_err: + i915_ttm_migrate_set_failure_modes(false, false); + return ret; +} + int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_smem_create_migrate), SUBTEST(igt_lmem_create_migrate), SUBTEST(igt_same_create_migrate), - SUBTEST(igt_lmem_pages_migrate), + SUBTEST(igt_lmem_pages_failsafe_migrate), }; if (!HAS_LMEM(i915)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 6d30cdfa80f3..743e6ab2c40b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -84,6 +84,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, struct rnd_state *prng) { const unsigned long npages = obj->base.size / PAGE_SIZE; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; @@ -141,7 +142,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) goto out; - intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -175,6 +176,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, { const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; unsigned long page; int err; @@ -234,7 +236,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -616,14 +618,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - cancel_delayed_work_sync(&i915->gt.requests.retire_work); + intel_gt_pm_get(to_gt(i915)); + cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { igt_flush_test(i915); - intel_gt_pm_put(&i915->gt); + intel_gt_pm_put(to_gt(i915)); i915_gem_driver_register__shrinker(i915); } @@ -651,8 +653,8 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Disable background reaper */ disable_retire_worker(i915); - GEM_BUG_ON(!i915->gt.awake); - intel_gt_retire_requests(&i915->gt); + GEM_BUG_ON(!to_gt(i915)->awake); + intel_gt_retire_requests(to_gt(i915)); i915_gem_drain_freed_objects(i915); /* Trim the device mmap space to only a page */ @@ -728,7 +730,7 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); @@ -942,7 +944,7 @@ static int __igt_mmap(struct drm_i915_private *i915, } if (type == I915_MMAP_TYPE_GTT) - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); err = wc_check(obj); if (err == -ENXIO) @@ -1049,7 +1051,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915, goto out_unmap; } - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); err = access_process_vm(current, addr, &x, sizeof(x), 0); if (err != sizeof(x)) { @@ -1065,7 +1067,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915, goto out_unmap; } - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); err = __get_user(y, ptr); if (err) { @@ -1165,7 +1167,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, } if (type == I915_MMAP_TYPE_GTT) - intel_gt_flush_ggtt_writes(&i915->gt); + intel_gt_flush_ggtt_writes(to_gt(i915)); for_each_uabi_engine(engine, i915) { struct i915_request *rq; diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 890191f286e3..6e9292918bfc 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm, pt = stash->pt[0]; __i915_gem_object_pin_pages(pt->base); - i915_gem_object_make_unshrinkable(pt->base); fill32_px(pt, vm->scratch[0]->encode); @@ -262,30 +261,14 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - __i915_vma_put(ppgtt->vma); - gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); mutex_destroy(&ppgtt->flush); - mutex_destroy(&ppgtt->pin_mutex); free_pd(&ppgtt->base.vm, ppgtt->base.pd); } -static int pd_vma_set_pages(struct i915_vma *vma) -{ - vma->pages = ERR_PTR(-ENODEV); - return 0; -} - -static void pd_vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - vma->pages = NULL; -} - static void pd_vma_bind(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma *vma, @@ -325,43 +308,10 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) } static const struct i915_vma_ops pd_vma_ops = { - .set_pages = pd_vma_set_pages, - .clear_pages = pd_vma_clear_pages, .bind_vma = pd_vma_bind, .unbind_vma = pd_vma_unbind, }; -static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) -{ - struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; - struct i915_vma *vma; - - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(size > ggtt->vm.total); - - vma = i915_vma_alloc(); - if (!vma) - return ERR_PTR(-ENOMEM); - - i915_active_init(&vma->active, NULL, NULL, 0); - - kref_init(&vma->ref); - mutex_init(&vma->pages_mutex); - vma->vm = i915_vm_get(&ggtt->vm); - vma->ops = &pd_vma_ops; - vma->private = ppgtt; - - vma->size = size; - vma->fence_size = size; - atomic_set(&vma->flags, I915_VMA_GGTT); - vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ - - INIT_LIST_HEAD(&vma->obj_link); - INIT_LIST_HEAD(&vma->closed_link); - - return vma; -} - int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); @@ -378,42 +328,92 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) return 0; - if (mutex_lock_interruptible(&ppgtt->pin_mutex)) - return -EINTR; + /* grab the ppgtt resv to pin the object */ + err = i915_vm_lock_objects(&ppgtt->base.vm, ww); + if (err) + return err; /* * PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - err = 0; - if (!atomic_read(&ppgtt->pin_count)) + if (!atomic_read(&ppgtt->pin_count)) { err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); + + GEM_BUG_ON(ppgtt->vma->fence); + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma)); + } if (!err) atomic_inc(&ppgtt->pin_count); - mutex_unlock(&ppgtt->pin_mutex); return err; } -void gen6_ppgtt_unpin(struct i915_ppgtt *base) +static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + obj->mm.pages = ZERO_SIZE_PTR; + return 0; +} - GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); - if (atomic_dec_and_test(&ppgtt->pin_count)) - i915_vma_unpin(ppgtt->vma); +static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ } -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) +static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { + .name = "pd_dummy_obj", + .get_pages = pd_dummy_obj_get_pages, + .put_pages = pd_dummy_obj_put_pages, +}; + +static struct i915_page_directory * +gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_page_directory *pd; + int err; - if (!atomic_read(&ppgtt->pin_count)) - return; + pd = __alloc_pd(I915_PDES); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); - i915_vma_unpin(ppgtt->vma); - atomic_set(&ppgtt->pin_count, 0); + pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, + &pd_dummy_obj_ops, + I915_PDES * SZ_4K); + if (IS_ERR(pd->pt.base)) { + err = PTR_ERR(pd->pt.base); + pd->pt.base = NULL; + goto err_pd; + } + + pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm); + pd->pt.base->shares_resv_from = &ppgtt->base.vm; + + ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + ppgtt->vma = NULL; + goto err_pd; + } + + /* The dummy object we create is special, override ops.. */ + ppgtt->vma->ops = &pd_vma_ops; + ppgtt->vma->private = ppgtt; + return pd; + +err_pd: + free_pd(&ppgtt->base.vm, pd); + return ERR_PTR(err); +} + +void gen6_ppgtt_unpin(struct i915_ppgtt *base) +{ + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); + + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); } struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) @@ -427,7 +427,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) return ERR_PTR(-ENOMEM); mutex_init(&ppgtt->flush); - mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt, 0); ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); @@ -440,21 +439,16 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(I915_PDES); - if (!ppgtt->base.pd) { - err = -ENOMEM; - goto err_free; - } - err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_pd; + goto err_free; - ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); - if (IS_ERR(ppgtt->vma)) { - err = PTR_ERR(ppgtt->vma); + ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); + if (IS_ERR(ppgtt->base.pd)) { + err = PTR_ERR(ppgtt->base.pd); goto err_scratch; } @@ -462,10 +456,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err_scratch: free_scratch(&ppgtt->base.vm); -err_pd: - free_pd(&ppgtt->base.vm, ppgtt->base.pd); err_free: - mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index 6a61a5c3a85a..5e5cf2ec3309 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -19,7 +19,6 @@ struct gen6_ppgtt { u32 pp_dir; atomic_t pin_count; - struct mutex pin_mutex; bool scan_for_unused_pt; }; @@ -71,7 +70,6 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww); void gen6_ppgtt_unpin(struct i915_ppgtt *base); -void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); void gen6_ppgtt_enable(struct intel_gt *gt); void gen7_ppgtt_enable(struct intel_gt *gt); struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 461844dffd7e..e320610dd0b8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) dc_flush_wa = true; } diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 037a9a6e4889..b012c50f7ce7 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -18,7 +18,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr, const enum i915_cache_level level) { - u64 pde = addr | _PAGE_PRESENT | _PAGE_RW; + u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (level != I915_CACHE_NONE) pde |= PPAT_CACHED_PDE; @@ -32,10 +32,10 @@ static u64 gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; + pte &= ~GEN8_PAGE_RW; if (flags & PTE_LM) pte |= GEN12_PPGTT_PTE_LM; @@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, pt = stash->pt[!!lvl]; __i915_gem_object_pin_pages(pt->base); - i915_gem_object_make_unshrinkable(pt->base); fill_px(pt, vm->scratch[lvl]->encode); @@ -652,7 +651,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) vm->scratch[0]->encode = gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_LLC, pte_flags); + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -668,7 +667,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) } fill_px(obj, vm->scratch[i - 1]->encode); - obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC); + obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); vm->scratch[i] = obj; } @@ -777,10 +776,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - if (HAS_LMEM(gt->i915)) + if (HAS_LMEM(gt->i915)) { ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; - else + + /* + * On some platforms the hw has dropped support for 4K GTT pages + * when dealing with LMEM, and due to the design of 64K GTT + * pages in the hw, we can only mark the *entire* page-table as + * operating in 64K GTT mode, since the enable bit is still on + * the pde, and not the pte. And since we still need to allow + * 4K GTT pages for SMEM objects, we can't have a "normal" 4K + * page-table with scratch pointing to LMEM, since that's + * undefined from the hw pov. The simplest solution is to just + * move the 64K scratch page to SMEM on such platforms and call + * it a day, since that should work for all configurations. + */ + if (HAS_64K_PAGES(gt->i915)) + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + else + ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; + } else { ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + } err = gen8_init_scratch(&ppgtt->vm); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..ba083d800a08 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -219,7 +219,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce, */ err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); - if (!err && ce->ring->vma->obj) + if (!err) err = i915_gem_object_lock(ce->ring->vma->obj, ww); if (!err && ce->state) err = i915_gem_object_lock(ce->state->obj, ww); @@ -228,17 +228,17 @@ int __intel_context_do_pin_ww(struct intel_context *ce, if (err) return err; - err = i915_active_acquire(&ce->active); + err = ce->ops->pre_pin(ce, ww, &vaddr); if (err) goto err_ctx_unpin; - err = ce->ops->pre_pin(ce, ww, &vaddr); + err = i915_active_acquire(&ce->active); if (err) - goto err_release; + goto err_post_unpin; err = mutex_lock_interruptible(&ce->pin_mutex); if (err) - goto err_post_unpin; + goto err_release; intel_engine_pm_might_get(ce->engine); @@ -273,11 +273,11 @@ int __intel_context_do_pin_ww(struct intel_context *ce, err_unlock: mutex_unlock(&ce->pin_mutex); +err_release: + i915_active_release(&ce->active); err_post_unpin: if (!handoff) ce->ops->post_unpin(ce); -err_release: - i915_active_release(&ce->active); err_ctx_unpin: intel_context_post_unpin(ce); @@ -364,7 +364,7 @@ static int __intel_context_active(struct i915_active *active) return 0; } -static int __i915_sw_fence_call +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state) { diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 246c37d72cd7..d8c74bbf9aae 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce) static inline void intel_context_mark_active(struct intel_context *ce) { - lockdep_assert_held(&ce->timeline->mutex); + lockdep_assert(lockdep_is_held(&ce->timeline->mutex) || + test_bit(CONTEXT_IS_PARKING, &ce->flags)); ++ce->active_count; } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 9e0177dc5484..30cd81ad8911 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -118,6 +118,7 @@ struct intel_context { #define CONTEXT_LRCA_DIRTY 9 #define CONTEXT_GUC_INIT 10 #define CONTEXT_PERMA_PIN 11 +#define CONTEXT_IS_PARKING 12 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ff6753ccb129..352254e001b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -325,6 +325,38 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); + if (GRAPHICS_VER(gt->i915) >= 11) { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + engine->reset_domain = engine_reset_domains[id]; + } else { + static const u32 engine_reset_domains[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, + }; + GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || + !engine_reset_domains[id]); + engine->reset_domain = engine_reset_domains[id]; + } engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; @@ -363,7 +395,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.lock); + seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1676,14 +1708,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, static void print_request_ring(struct drm_printer *m, struct i915_request *rq) { + struct i915_vma_snapshot *vsnap = &rq->batch_snapshot; void *ring; int size; + if (!i915_vma_snapshot_present(vsnap)) + vsnap = NULL; + drm_printf(m, "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u, + vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u); size = rq->tail - rq->head; if (rq->tail < rq->head) @@ -1915,22 +1951,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, - ktime_t *now) -{ - ktime_t total = engine->stats.total; - - /* - * If the engine is executing something at the moment - * add it to the total. - */ - *now = ktime_get(); - if (READ_ONCE(engine->stats.active)) - total = ktime_add(total, ktime_sub(*now, engine->stats.start)); - - return total; -} - /** * intel_engine_get_busy_time() - Return current accumulated engine busyness * @engine: engine to report on @@ -1940,15 +1960,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) { - unsigned int seq; - ktime_t total; - - do { - seq = read_seqcount_begin(&engine->stats.lock); - total = __intel_engine_get_busy_time(engine, now); - } while (read_seqcount_retry(&engine->stats.lock, seq)); - - return total; + return engine->busyness(engine, now); } struct intel_context * diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index a1334b48dde7..b0a4a2dbe3ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -26,7 +26,7 @@ static void dbg_poison_ce(struct intel_context *ce) int type = i915_coherent_map_type(ce->engine->i915, obj, true); void *map; - if (!i915_gem_object_trylock(obj)) + if (!i915_gem_object_trylock(obj, NULL)) return; map = i915_gem_object_pin_map(obj, type); @@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf) return 0; } -#if IS_ENABLED(CONFIG_LOCKDEP) - -static unsigned long __timeline_mark_lock(struct intel_context *ce) -{ - unsigned long flags; - - local_irq_save(flags); - mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); - - return flags; -} - -static void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) -{ - mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); - local_irq_restore(flags); -} - -#else - -static unsigned long __timeline_mark_lock(struct intel_context *ce) -{ - return 0; -} - -static void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) -{ -} - -#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ - static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) { struct i915_request *rq = to_request(fence); @@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) { struct intel_context *ce = engine->kernel_context; struct i915_request *rq; - unsigned long flags; bool result = true; /* @@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * engine->wakeref.count, we may see the request completion and retire * it causing an underflow of the engine->wakeref. */ - flags = __timeline_mark_lock(ce); + set_bit(CONTEXT_IS_PARKING, &ce->flags); GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); rq = __i915_request_create(ce, GFP_NOWAIT); @@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) result = false; out_unlock: - __timeline_mark_unlock(ce, flags); + clear_bit(CONTEXT_IS_PARKING, &ce->flags); return result; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h index 24fbdd94351a..8e762d683e50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -15,45 +15,46 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) { + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; unsigned long flags; - if (engine->stats.active) { - engine->stats.active++; + if (stats->active) { + stats->active++; return; } /* The writer is serialised; but the pmu reader may be from hardirq */ local_irq_save(flags); - write_seqcount_begin(&engine->stats.lock); + write_seqcount_begin(&stats->lock); - engine->stats.start = ktime_get(); - engine->stats.active++; + stats->start = ktime_get(); + stats->active++; - write_seqcount_end(&engine->stats.lock); + write_seqcount_end(&stats->lock); local_irq_restore(flags); - GEM_BUG_ON(!engine->stats.active); + GEM_BUG_ON(!stats->active); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) { + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; unsigned long flags; - GEM_BUG_ON(!engine->stats.active); - if (engine->stats.active > 1) { - engine->stats.active--; + GEM_BUG_ON(!stats->active); + if (stats->active > 1) { + stats->active--; return; } local_irq_save(flags); - write_seqcount_begin(&engine->stats.lock); + write_seqcount_begin(&stats->lock); - engine->stats.active--; - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); + stats->active--; + stats->total = ktime_add(stats->total, + ktime_sub(ktime_get(), stats->start)); - write_seqcount_end(&engine->stats.lock); + write_seqcount_end(&stats->lock); local_irq_restore(flags); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e0f773585c29..36365bdbe1ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -257,6 +257,55 @@ struct intel_engine_execlists { #define INTEL_ENGINE_CS_MAX_NAME 8 +struct intel_engine_execlists_stats { + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + + /** + * @lock: Lock protecting the below fields. + */ + seqcount_t lock; + + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases where + * engine is currently busy (active > 0). + */ + ktime_t total; + + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; +}; + +struct intel_engine_guc_stats { + /** + * @running: Active state of the engine when busyness was last sampled. + */ + bool running; + + /** + * @prev_total: Previous value of total runtime clock cycles. + */ + u32 prev_total; + + /** + * @total_gt_clks: Total gt clock cycles this engine was busy. + */ + u64 total_gt_clks; + + /** + * @start_gt_clk: GT clock time of last idle to active transition. + */ + u64 start_gt_clk; +}; + struct intel_engine_cs { struct drm_i915_private *i915; struct intel_gt *gt; @@ -269,6 +318,7 @@ struct intel_engine_cs { unsigned int guc_id; intel_engine_mask_t mask; + u32 reset_domain; /** * @logical_mask: logical mask of engine, reported to user space via * query IOCTL and used to communicate with the GuC in logical space. @@ -439,6 +489,12 @@ struct intel_engine_cs { void (*add_active_request)(struct i915_request *rq); void (*remove_active_request)(struct i915_request *rq); + /* + * Get engine busyness and the time at which the busyness was sampled. + */ + ktime_t (*busyness)(struct intel_engine_cs *engine, + ktime_t *now); + struct intel_engine_execlists execlists; /* @@ -488,30 +544,10 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); struct { - /** - * @active: Number of contexts currently scheduled in. - */ - unsigned int active; - - /** - * @lock: Lock protecting the below fields. - */ - seqcount_t lock; - - /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). - */ - ktime_t total; - - /** - * @start: Timestamp of the last idle to active transition. - * - * Idle is defined as active == 0, active is active > 0. - */ - ktime_t start; + union { + struct intel_engine_execlists_stats execlists; + struct intel_engine_guc_stats guc; + }; /** * @rps: Utilisation at last RPS sampling. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 8f8bea08e734..9ce85a845105 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -116,7 +116,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915) disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); - if (intel_uc_uses_guc_submission(&i915->gt.uc)) + if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; for (i = 0; i < ARRAY_SIZE(map); i++) { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..a69df5e9e77a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2186,7 +2186,8 @@ struct execlists_capture { static void execlists_capture_work(struct work_struct *work) { struct execlists_capture *cap = container_of(work, typeof(*cap), work); - const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN; struct intel_engine_cs *engine = cap->rq->engine; struct intel_gt_coredump *gt = cap->error->gt; struct intel_engine_capture_vma *vma; @@ -3293,6 +3294,38 @@ static void execlists_release(struct intel_engine_cs *engine) lrc_fini_wa_ctx(engine); } +static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine, + ktime_t *now) +{ + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; + ktime_t total = stats->total; + + /* + * If the engine is executing something at the moment + * add it to the total. + */ + *now = ktime_get(); + if (READ_ONCE(stats->active)) + total = ktime_add(total, ktime_sub(*now, stats->start)); + + return total; +} + +static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine, + ktime_t *now) +{ + struct intel_engine_execlists_stats *stats = &engine->stats.execlists; + unsigned int seq; + ktime_t total; + + do { + seq = read_seqcount_begin(&stats->lock); + total = __execlists_engine_busyness(engine, now); + } while (read_seqcount_retry(&stats->lock, seq)); + + return total; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { @@ -3349,6 +3382,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_bb_start = gen8_emit_bb_start; else engine->emit_bb_start = gen8_emit_bb_start_noarb; + + engine->busyness = execlists_engine_busyness; } static void logical_ring_default_irqs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 555111c3bee5..5263dda7f8d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -22,9 +22,6 @@ #include "intel_gtt.h" #include "gen8_ppgtt.h" -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma); - static void i915_ggtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -106,7 +103,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915) * Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (!intel_vtd_active()) + if (!intel_vtd_active(i915)) return false; if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) @@ -209,7 +206,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - gen8_pte_t pte = addr | _PAGE_PRESENT; + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; if (flags & PTE_LM) pte |= GEN12_GGTT_PTE_LM; @@ -892,21 +889,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return 0; } -int ggtt_set_pages(struct i915_vma *vma) -{ - int ret; - - GEM_BUG_ON(vma->pages); - - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - static void gen6_gmch_remove(struct i915_address_space *vm) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); @@ -941,6 +923,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) size = gen8_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; @@ -967,8 +950,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; ggtt->vm.pte_encode = gen8_ggtt_pte_encode; @@ -1094,6 +1075,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.clear_range = nop_clear_range; if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) @@ -1117,8 +1099,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; return ggtt_probe_common(ggtt, size); } @@ -1146,6 +1126,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; if (needs_idle_maps(i915)) { drm_notice(&i915->drm, @@ -1162,8 +1143,6 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; if (unlikely(ggtt->do_idle_maps)) drm_notice(&i915->drm, @@ -1229,11 +1208,11 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) { int ret; - ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); + ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915)); if (ret) return ret; - if (intel_vtd_active()) + if (intel_vtd_active(i915)) drm_info(&i915->drm, "VT-d active for gfx access\n"); return 0; @@ -1333,382 +1312,3 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) intel_ggtt_restore_fences(ggtt); } - -static struct scatterlist * -rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, - unsigned int width, unsigned int height, - unsigned int src_stride, unsigned int dst_stride, - struct sg_table *st, struct scatterlist *sg) -{ - unsigned int column, row; - unsigned int src_idx; - - for (column = 0; column < width; column++) { - unsigned int left; - - src_idx = src_stride * (height - 1) + column + offset; - for (row = 0; row < height; row++) { - st->nents++; - /* - * We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); - sg_dma_address(sg) = - i915_gem_object_get_dma_address(obj, src_idx); - sg_dma_len(sg) = I915_GTT_PAGE_SIZE; - sg = sg_next(sg); - src_idx -= src_stride; - } - - left = (dst_stride - height) * I915_GTT_PAGE_SIZE; - - if (!left) - continue; - - st->nents++; - - /* - * The DE ignores the PTEs for the padding tiles, the sg entry - * here is just a conenience to indicate how many padding PTEs - * to insert at this spot. - */ - sg_set_page(sg, NULL, left, 0); - sg_dma_address(sg) = 0; - sg_dma_len(sg) = left; - sg = sg_next(sg); - } - - return sg; -} - -static noinline struct sg_table * -intel_rotate_pages(struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_rotation_info_size(rot_info); - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) - sg = rotate_pages(obj, rot_info->plane[i].offset, - rot_info->plane[i].width, rot_info->plane[i].height, - rot_info->plane[i].src_stride, - rot_info->plane[i].dst_stride, - st, sg); - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, - rot_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static struct scatterlist * -add_padding_pages(unsigned int count, - struct sg_table *st, struct scatterlist *sg) -{ - st->nents++; - - /* - * The DE ignores the PTEs for the padding tiles, the sg entry - * here is just a convenience to indicate how many padding PTEs - * to insert at this spot. - */ - sg_set_page(sg, NULL, count * I915_GTT_PAGE_SIZE, 0); - sg_dma_address(sg) = 0; - sg_dma_len(sg) = count * I915_GTT_PAGE_SIZE; - sg = sg_next(sg); - - return sg; -} - -static struct scatterlist * -remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj, - unsigned int offset, unsigned int alignment_pad, - unsigned int width, unsigned int height, - unsigned int src_stride, unsigned int dst_stride, - struct sg_table *st, struct scatterlist *sg, - unsigned int *gtt_offset) -{ - unsigned int row; - - if (!width || !height) - return sg; - - if (alignment_pad) - sg = add_padding_pages(alignment_pad, st, sg); - - for (row = 0; row < height; row++) { - unsigned int left = width * I915_GTT_PAGE_SIZE; - - while (left) { - dma_addr_t addr; - unsigned int length; - - /* - * We don't need the pages, but need to initialize - * the entries so the sg list can be happily traversed. - * The only thing we need are DMA addresses. - */ - - addr = i915_gem_object_get_dma_address_len(obj, offset, &length); - - length = min(left, length); - - st->nents++; - - sg_set_page(sg, NULL, length, 0); - sg_dma_address(sg) = addr; - sg_dma_len(sg) = length; - sg = sg_next(sg); - - offset += length / I915_GTT_PAGE_SIZE; - left -= length; - } - - offset += src_stride - width; - - left = (dst_stride - width) * I915_GTT_PAGE_SIZE; - - if (!left) - continue; - - sg = add_padding_pages(left >> PAGE_SHIFT, st, sg); - } - - *gtt_offset += alignment_pad + dst_stride * height; - - return sg; -} - -static struct scatterlist * -remap_contiguous_pages(struct drm_i915_gem_object *obj, - unsigned int obj_offset, - unsigned int count, - struct sg_table *st, struct scatterlist *sg) -{ - struct scatterlist *iter; - unsigned int offset; - - iter = i915_gem_object_get_sg_dma(obj, obj_offset, &offset); - GEM_BUG_ON(!iter); - - do { - unsigned int len; - - len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), - count << PAGE_SHIFT); - sg_set_page(sg, NULL, len, 0); - sg_dma_address(sg) = - sg_dma_address(iter) + (offset << PAGE_SHIFT); - sg_dma_len(sg) = len; - - st->nents++; - count -= len >> PAGE_SHIFT; - if (count == 0) - return sg; - - sg = __sg_next(sg); - iter = __sg_next(iter); - offset = 0; - } while (1); -} - -static struct scatterlist * -remap_linear_color_plane_pages(struct drm_i915_gem_object *obj, - unsigned int obj_offset, unsigned int alignment_pad, - unsigned int size, - struct sg_table *st, struct scatterlist *sg, - unsigned int *gtt_offset) -{ - if (!size) - return sg; - - if (alignment_pad) - sg = add_padding_pages(alignment_pad, st, sg); - - sg = remap_contiguous_pages(obj, obj_offset, size, st, sg); - sg = sg_next(sg); - - *gtt_offset += alignment_pad + size; - - return sg; -} - -static struct scatterlist * -remap_color_plane_pages(const struct intel_remapped_info *rem_info, - struct drm_i915_gem_object *obj, - int color_plane, - struct sg_table *st, struct scatterlist *sg, - unsigned int *gtt_offset) -{ - unsigned int alignment_pad = 0; - - if (rem_info->plane_alignment) - alignment_pad = ALIGN(*gtt_offset, rem_info->plane_alignment) - *gtt_offset; - - if (rem_info->plane[color_plane].linear) - sg = remap_linear_color_plane_pages(obj, - rem_info->plane[color_plane].offset, - alignment_pad, - rem_info->plane[color_plane].size, - st, sg, - gtt_offset); - - else - sg = remap_tiled_color_plane_pages(obj, - rem_info->plane[color_plane].offset, - alignment_pad, - rem_info->plane[color_plane].width, - rem_info->plane[color_plane].height, - rem_info->plane[color_plane].src_stride, - rem_info->plane[color_plane].dst_stride, - st, sg, - gtt_offset); - - return sg; -} - -static noinline struct sg_table * -intel_remap_pages(struct intel_remapped_info *rem_info, - struct drm_i915_gem_object *obj) -{ - unsigned int size = intel_remapped_info_size(rem_info); - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int gtt_offset = 0; - int ret = -ENOMEM; - int i; - - /* Allocate target SG list. */ - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, size, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - sg = st->sgl; - - for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) - sg = remap_color_plane_pages(rem_info, obj, i, st, sg, >t_offset); - - i915_sg_trim(st); - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - - drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rem_info->plane[0].width, - rem_info->plane[0].height, size); - - return ERR_PTR(ret); -} - -static noinline struct sg_table * -intel_partial_pages(const struct i915_ggtt_view *view, - struct drm_i915_gem_object *obj) -{ - struct sg_table *st; - struct scatterlist *sg; - unsigned int count = view->partial.size; - int ret = -ENOMEM; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - goto err_st_alloc; - - ret = sg_alloc_table(st, count, GFP_KERNEL); - if (ret) - goto err_sg_alloc; - - st->nents = 0; - - sg = remap_contiguous_pages(obj, view->partial.offset, count, st, st->sgl); - - sg_mark_end(sg); - i915_sg_trim(st); /* Drop any unused tail entries. */ - - return st; - -err_sg_alloc: - kfree(st); -err_st_alloc: - return ERR_PTR(ret); -} - -static int -i915_get_ggtt_vma_pages(struct i915_vma *vma) -{ - int ret; - - /* - * The vma->pages are only valid within the lifespan of the borrowed - * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so - * must be the vma->pages. A simple rule is that vma->pages must only - * be accessed when the obj->mm.pages are pinned. - */ - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - - switch (vma->ggtt_view.type) { - default: - GEM_BUG_ON(vma->ggtt_view.type); - fallthrough; - case I915_GGTT_VIEW_NORMAL: - vma->pages = vma->obj->mm.pages; - return 0; - - case I915_GGTT_VIEW_ROTATED: - vma->pages = - intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); - break; - - case I915_GGTT_VIEW_REMAPPED: - vma->pages = - intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); - break; - - case I915_GGTT_VIEW_PARTIAL: - vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - break; - } - - ret = 0; - if (IS_ERR(vma->pages)) { - ret = PTR_ERR(vma->pages); - vma->pages = NULL; - drm_err(&vma->vm->i915->drm, - "Failed to get pages for VMA view type %u (%d)!\n", - vma->ggtt_view.type, ret); - } - return ret; -} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f2422d48be32..f98f0fb21efb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -25,11 +25,8 @@ #include "shmem_utils.h" #include "pxp/intel_pxp.h" -void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { - gt->i915 = i915; - gt->uncore = &i915->uncore; - spin_lock_init(>->irq_lock); INIT_LIST_HEAD(>->closed_vma); @@ -48,6 +45,12 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_rps_init_early(>->rps); } +void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +{ + gt->i915 = i915; + gt->uncore = &i915->uncore; +} + int intel_gt_probe_lmem(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 74e771871a9b..3ace129eb2af 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -35,6 +35,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); +void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); int intel_gt_probe_lmem(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index acc49c56a9f3..9db3dcbd917f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -9,11 +9,6 @@ #include "intel_engine_pm.h" #include "intel_gt_buffer_pool.h" -static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool) -{ - return container_of(pool, struct intel_gt, buffer_pool); -} - static struct list_head * bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz) { @@ -141,7 +136,7 @@ static struct intel_gt_buffer_pool_node * node_create(struct intel_gt_buffer_pool *pool, size_t sz, enum i915_map_type type) { - struct intel_gt *gt = to_gt(pool); + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); struct intel_gt_buffer_pool_node *node; struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h index e307ceb99031..17e79b735cfe 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h @@ -10,11 +10,7 @@ struct intel_gt; -#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \ - static int __name ## _open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, __name ## _show, inode->i_private); \ -} \ +#define __GT_DEBUGFS_ATTRIBUTE_FOPS(__name) \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ @@ -23,6 +19,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ +__GT_DEBUGFS_ATTRIBUTE_FOPS(__name) + +#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(__name, __size_vf) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open_size(file, __name ## _show, inode->i_private, \ + __size_vf(inode->i_private)); \ +} \ +__GT_DEBUGFS_ATTRIBUTE_FOPS(__name) + void intel_gt_debugfs_register(struct intel_gt *gt); struct intel_gt_debugfs_file { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 524eaf678790..c0fa41e4c803 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -86,6 +86,7 @@ static int __gt_unpark(struct intel_wakeref *wf) intel_rc6_unpark(>->rc6); intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); + intel_guc_busyness_unpark(gt); intel_gt_unpark_requests(gt); runtime_begin(gt); @@ -104,6 +105,7 @@ static int __gt_park(struct intel_wakeref *wf) runtime_end(gt); intel_gt_park_requests(gt); + intel_guc_busyness_park(gt); i915_vma_parked(gt); i915_pmu_gt_parked(i915); intel_rps_park(>->rps); @@ -301,7 +303,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) user_forcewake(gt, true); wait_for_suspend(gt); - intel_pxp_suspend(>->pxp, false); + intel_pxp_suspend_prepare(>->pxp); } static suspend_state_t pm_suspend_target(void) @@ -326,6 +328,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) GEM_BUG_ON(gt->awake); intel_uc_suspend(>->uc); + intel_pxp_suspend(>->pxp); /* * On disabling the device, we want to turn off HW access to memory @@ -353,7 +356,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) void intel_gt_runtime_suspend(struct intel_gt *gt) { - intel_pxp_suspend(>->pxp, true); + intel_pxp_runtime_suspend(>->pxp); intel_uc_runtime_suspend(>->uc); GT_TRACE(gt, "\n"); @@ -371,7 +374,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) if (ret) return ret; - intel_pxp_resume(>->pxp); + intel_pxp_runtime_resume(>->pxp); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 67d14afa6623..a94be0306464 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -6,6 +6,9 @@ #include <linux/slab.h> /* fault-inject.h is not standalone! */ #include <linux/fault-inject.h> +#include <linux/sched/mm.h> + +#include <drm/drm_cache.h> #include "gem/i915_gem_lmem.h" #include "i915_trace.h" @@ -221,19 +224,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) INIT_LIST_HEAD(&vm->bound_list); } -void clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - - if (vma->pages != vma->obj->mm.pages) { - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); -} - void *__px_vaddr(struct drm_i915_gem_object *p) { enum i915_map_type type; @@ -273,6 +263,7 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch) val = POISON_FREE; memset(vaddr, val, scratch->base.size); + drm_clflush_virt_range(vaddr, scratch->base.size); } int setup_scratch_page(struct i915_address_space *vm) @@ -298,7 +289,7 @@ int setup_scratch_page(struct i915_address_space *vm) do { struct drm_i915_gem_object *obj; - obj = vm->alloc_pt_dma(vm, size); + obj = vm->alloc_scratch_dma(vm, size); if (IS_ERR(obj)) goto skip; @@ -334,6 +325,18 @@ skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; + /* + * If we need 64K minimum GTT pages for device local-memory, + * like on XEHPSDV, then we need to fail the allocation here, + * otherwise we can't safely support the insertion of + * local-memory pages for this vm, since the HW expects the + * correct physical alignment and size when the page-table is + * operating in 64K GTT mode, which includes any scratch PTEs, + * since userspace can still touch them. + */ + if (HAS_64K_PAGES(vm->i915)) + return -ENOMEM; + size = I915_GTT_PAGE_SIZE_4K; } while (1); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index dfeaef680aac..177b42b935a1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -135,6 +135,9 @@ typedef u64 gen8_pte_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PAGE_PRESENT BIT_ULL(0) +#define GEN8_PAGE_RW BIT_ULL(1) + #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) @@ -206,9 +209,6 @@ struct i915_vma_ops { */ void (*unbind_vma)(struct i915_address_space *vm, struct i915_vma *vma); - - int (*set_pages)(struct i915_vma *vma); - void (*clear_pages)(struct i915_vma *vma); }; struct i915_address_space { @@ -265,6 +265,8 @@ struct i915_address_space { struct drm_i915_gem_object * (*alloc_pt_dma)(struct i915_address_space *vm, int sz); + struct drm_i915_gem_object * + (*alloc_scratch_dma)(struct i915_address_space *vm, int sz); u64 (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -596,10 +598,6 @@ release_pd_entry(struct i915_page_directory * const pd, const struct drm_i915_gem_object * const scratch); void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); -int ggtt_set_pages(struct i915_vma *vma); -int ppgtt_set_pages(struct i915_vma *vma); -void clear_pages(struct i915_vma *vma); - void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 56156cf18c41..b3489599e4de 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1167,6 +1167,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); + /* Wa_16013000631:dg2 */ + if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(ce->engine->i915)) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index afb1cce9a352..18b44af56969 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -13,7 +13,6 @@ struct insert_pte_data { u64 offset; - bool is_lmem; }; #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ @@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm, struct insert_pte_data *d = data; vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, - d->is_lmem ? PTE_LM : 0); + i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0); d->offset += PAGE_SIZE; } @@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) goto err_vm; /* Now allow the GPU to rewrite the PTE via its own ppGTT */ - d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); - vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); + vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); } return &vm->vm; @@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq, GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); /* Compute the page directory offset for the target address range */ - offset += (u64)rq->engine->instance << 32; offset >>= 12; offset *= sizeof(u64); offset += 2 * CHUNK_SZ; + offset += (u64)rq->engine->instance << 32; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -406,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size) int intel_context_migrate_copy(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -433,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_ce; } - if (await) { - err = i915_request_await_dma_fence(rq, await); + if (deps) { + err = i915_request_await_deps(rq, deps); if (err) goto out_rq; @@ -444,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_rq; } - await = NULL; + deps = NULL; } /* The PTE updates + copy must not be interrupted. */ @@ -527,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value) int intel_context_migrate_clear(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -552,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce, goto out_ce; } - if (await) { - err = i915_request_await_dma_fence(rq, await); + if (deps) { + err = i915_request_await_deps(rq, deps); if (err) goto out_rq; @@ -563,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce, goto out_rq; } - await = NULL; + deps = NULL; } /* The PTE updates + clear must not be interrupted. */ @@ -601,7 +599,7 @@ out_ce: int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -626,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_copy(ce, await, + err = intel_context_migrate_copy(ce, deps, src, src_cache_level, src_is_lmem, dst, dst_cache_level, dst_is_lmem, out); @@ -640,7 +638,7 @@ out: int intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -663,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_clear(ce, await, sg, cache_level, + err = intel_context_migrate_clear(ce, deps, sg, cache_level, is_lmem, value, out); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h index 4e18e755a00b..ccc677ec4aa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.h +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -11,6 +11,7 @@ #include "intel_migrate_types.h" struct dma_fence; +struct i915_deps; struct i915_request; struct i915_gem_ww_ctx; struct intel_gt; @@ -23,7 +24,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m); int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -33,7 +34,7 @@ int intel_migrate_copy(struct intel_migrate *m, struct i915_request **out); int intel_context_migrate_copy(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -45,7 +46,7 @@ int intel_context_migrate_copy(struct intel_context *ce, int intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -53,7 +54,7 @@ intel_migrate_clear(struct intel_migrate *m, struct i915_request **out); int intel_context_migrate_clear(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 15f9ada28a7a..9c253ba593c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -424,7 +424,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->unused_entries_index = I915_MOCS_PTE; if (IS_DG2(i915)) { - if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); table->table = dg2_mocs_table_g10_ax; } else { diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 4396bfd630d8..083b3090c69c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -289,16 +289,6 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm, } } -int ppgtt_set_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->pages); - - vma->pages = vma->obj->mm.pages; - vma->page_sizes = vma->obj->mm.page_sizes; - - return 0; -} - void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, unsigned long lmem_pt_obj_flags) { @@ -315,6 +305,4 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; - ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; - ppgtt->vm.vma_ops.clear_pages = clear_pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 43093dd2d0c9..c3155ee58689 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,10 +117,17 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + /* Wa_16011777198 - Render powergating must remain disabled */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + pg_enable = + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; + else + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index afb35d2e5c73..fde2dcb59809 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -66,12 +66,16 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem) DMA_ATTR_FORCE_CONTIGUOUS); } -static void +static int region_lmem_release(struct intel_memory_region *mem) { - intel_region_ttm_fini(mem); + int ret; + + ret = intel_region_ttm_fini(mem); io_mapping_fini(&mem->iomap); release_fake_lmem_bar(mem); + + return ret; } static int @@ -158,7 +162,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0)) + if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0)) return false; *start = 0; @@ -193,6 +197,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_memory_region *mem; + resource_size_t min_page_size; resource_size_t io_start; resource_size_t lmem_size; int err; @@ -207,10 +212,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) return ERR_PTR(-ENODEV); + min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : + I915_GTT_PAGE_SIZE_4K; mem = intel_memory_region_create(i915, 0, lmem_size, - I915_GTT_PAGE_SIZE_4K, + min_page_size, io_start, INTEL_MEMORY_LOCAL, 0, @@ -231,7 +238,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) return mem; err_region_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 91200c43951f..7be0002d9d70 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -6,7 +6,7 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> -#include "display/intel_display_types.h" +#include "display/intel_display.h" #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" @@ -297,13 +297,6 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - static const u32 hw_engine_mask[] = { - [RCS0] = GEN6_GRDOM_RENDER, - [BCS0] = GEN6_GRDOM_BLT, - [VCS0] = GEN6_GRDOM_MEDIA, - [VCS1] = GEN8_GRDOM_MEDIA2, - [VECS0] = GEN6_GRDOM_VECS, - }; struct intel_engine_cs *engine; u32 hw_mask; @@ -314,8 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt, hw_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - hw_mask |= hw_engine_mask[engine->id]; + hw_mask |= engine->reset_domain; } } @@ -492,22 +484,6 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - static const u32 hw_engine_mask[] = { - [RCS0] = GEN11_GRDOM_RENDER, - [BCS0] = GEN11_GRDOM_BLT, - [VCS0] = GEN11_GRDOM_MEDIA, - [VCS1] = GEN11_GRDOM_MEDIA2, - [VCS2] = GEN11_GRDOM_MEDIA3, - [VCS3] = GEN11_GRDOM_MEDIA4, - [VCS4] = GEN11_GRDOM_MEDIA5, - [VCS5] = GEN11_GRDOM_MEDIA6, - [VCS6] = GEN11_GRDOM_MEDIA7, - [VCS7] = GEN11_GRDOM_MEDIA8, - [VECS0] = GEN11_GRDOM_VECS, - [VECS1] = GEN11_GRDOM_VECS2, - [VECS2] = GEN11_GRDOM_VECS3, - [VECS3] = GEN11_GRDOM_VECS4, - }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; u32 reset_mask, unlock_mask = 0; @@ -518,8 +494,7 @@ static int gen11_reset_engines(struct intel_gt *gt, } else { reset_mask = 0; for_each_engine_masked(engine, gt, engine_mask, tmp) { - GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); - reset_mask |= hw_engine_mask[engine->id]; + reset_mask |= engine->reset_domain; ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask); if (ret) goto sfc_unlock; @@ -1367,20 +1342,27 @@ void intel_gt_handle_error(struct intel_gt *gt, /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ synchronize_rcu_expedited(); - /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, gt, tmp) { - while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) - wait_on_bit(>->reset.flags, - I915_RESET_ENGINE + engine->id, - TASK_UNINTERRUPTIBLE); + /* + * Prevent any other reset-engine attempt. We don't do this for GuC + * submission the GuC owns the per-engine reset, not the i915. + */ + if (!intel_uc_uses_guc_submission(>->uc)) { + for_each_engine(engine, gt, tmp) { + while (test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) + wait_on_bit(>->reset.flags, + I915_RESET_ENGINE + engine->id, + TASK_UNINTERRUPTIBLE); + } } intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, gt, tmp) - clear_bit_unlock(I915_RESET_ENGINE + engine->id, - >->reset.flags); + if (!intel_uc_uses_guc_submission(>->uc)) { + for_each_engine(engine, gt, tmp) + clear_bit_unlock(I915_RESET_ENGINE + engine->id, + >->reset.flags); + } clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); smp_mb__after_atomic(); wake_up_all(>->reset.queue); @@ -1441,6 +1423,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > I915_WEDGED_ON_INIT); intel_gt_set_wedged(gt); + i915_disable_error_state(gt->i915, -ENODEV); set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ @@ -1450,6 +1433,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) void intel_gt_set_wedged_on_fini(struct intel_gt *gt) { intel_gt_set_wedged(gt); + i915_disable_error_state(gt->i915, -ENODEV); set_bit(I915_WEDGED_ON_FINI, >->reset.flags); intel_gt_retire_requests(gt); /* cleanup any wedged requests */ } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 586dca1731ce..3e6fac0340ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1357,7 +1357,7 @@ retry: err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww); if (!err && gen7_wa_vma) err = i915_gem_object_lock(gen7_wa_vma->obj, &ww); - if (!err && engine->legacy.ring->vma->obj) + if (!err) err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww); if (!err) err = intel_timeline_pin(timeline, &ww); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 5e275f8dda8c..54e7df788dbf 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -936,8 +936,70 @@ void intel_rps_park(struct intel_rps *rps) GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } +u32 intel_rps_get_boost_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return slpc->boost_freq; + } else { + return intel_gpu_freq(rps, rps->boost_freq); + } +} + +static int rps_set_boost_freq(struct intel_rps *rps, u32 val) +{ + bool boost = false; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || val > rps->max_freq) + return -EINVAL; + + mutex_lock(&rps->lock); + if (val != rps->boost_freq) { + rps->boost_freq = val; + boost = atomic_read(&rps->num_waiters); + } + mutex_unlock(&rps->lock); + if (boost) + schedule_work(&rps->work); + + return 0; +} + +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + return intel_guc_slpc_set_boost_freq(slpc, freq); + } else { + return rps_set_boost_freq(rps, freq); + } +} + +void intel_rps_dec_waiters(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc; + + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + intel_guc_slpc_dec_waiters(slpc); + } else { + atomic_dec(&rps->num_waiters); + } +} + void intel_rps_boost(struct i915_request *rq) { + struct intel_guc_slpc *slpc; + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; @@ -945,6 +1007,16 @@ void intel_rps_boost(struct i915_request *rq) if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + if (rps_uses_slpc(rps)) { + slpc = rps_to_slpc(rps); + + /* Return if old value is non zero */ + if (!atomic_fetch_inc(&slpc->num_waiters)) + schedule_work(&slpc->boost_work); + + return; + } + if (atomic_fetch_inc(&rps->num_waiters)) return; @@ -2154,6 +2226,65 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps) return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); } +static void intel_rps_set_manual(struct intel_rps *rps, bool enable) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; + + /* Allow punit to process software requests */ + intel_uncore_write(uncore, GEN6_RP_CONTROL, state); +} + +void intel_rps_raise_unslice(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp0_unslice_req; + + mutex_lock(&rps->lock); + + if (rps_uses_slpc(rps)) { + /* RP limits have not been initialized yet for SLPC path */ + rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0) + & 0xff) * GEN9_FREQ_SCALER; + + intel_rps_set_manual(rps, true); + intel_uncore_write(uncore, GEN6_RPNSWREQ, + ((rp0_unslice_req << + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | + GEN9_IGNORE_SLICE_RATIO)); + intel_rps_set_manual(rps, false); + } else { + intel_rps_set(rps, rps->rp0_freq); + } + + mutex_unlock(&rps->lock); +} + +void intel_rps_lower_unslice(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rpn_unslice_req; + + mutex_lock(&rps->lock); + + if (rps_uses_slpc(rps)) { + /* RP limits have not been initialized yet for SLPC path */ + rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16) + & 0xff) * GEN9_FREQ_SCALER; + + intel_rps_set_manual(rps, true); + intel_uncore_write(uncore, GEN6_RPNSWREQ, + ((rpn_unslice_req << + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | + GEN9_IGNORE_SLICE_RATIO)); + intel_rps_set_manual(rps, false); + } else { + intel_rps_set(rps, rps->min_freq); + } + + mutex_unlock(&rps->lock); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; @@ -2230,7 +2361,7 @@ unsigned long i915_read_mch_val(void) return 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_ips *ips = &i915->gt.rps.ips; + struct intel_ips *ips = &to_gt(i915)->rps.ips; spin_lock_irq(&mchdev_lock); chipset_val = __ips_chipset_val(ips); @@ -2257,7 +2388,7 @@ bool i915_gpu_raise(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); if (rps->max_freq_softlimit < rps->max_freq) @@ -2284,7 +2415,7 @@ bool i915_gpu_lower(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); if (rps->max_freq_softlimit > rps->min_freq) @@ -2310,7 +2441,7 @@ bool i915_gpu_busy(void) if (!i915) return false; - ret = i915->gt.awake; + ret = to_gt(i915)->awake; drm_dev_put(&i915->drm); return ret; @@ -2333,11 +2464,11 @@ bool i915_gpu_turbo_disable(void) if (!i915) return false; - rps = &i915->gt.rps; + rps = &to_gt(i915)->rps; spin_lock_irq(&mchdev_lock); rps->max_freq_softlimit = rps->min_freq; - ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq); + ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); spin_unlock_irq(&mchdev_lock); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 11960d64ca82..c6d76a3d1331 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -23,6 +23,9 @@ void intel_rps_disable(struct intel_rps *rps); void intel_rps_park(struct intel_rps *rps); void intel_rps_unpark(struct intel_rps *rps); void intel_rps_boost(struct i915_request *rq); +void intel_rps_dec_waiters(struct intel_rps *rps); +u32 intel_rps_get_boost_frequency(struct intel_rps *rps); +int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq); int intel_rps_set(struct intel_rps *rps, u8 val); void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); @@ -42,6 +45,8 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_state_cap(struct intel_rps *rps); +void intel_rps_raise_unslice(struct intel_rps *rps); +void intel_rps_lower_unslice(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e1f362530889..ab3277a3d593 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -482,7 +482,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER)) + if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER)) wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -560,6 +560,22 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* * These settings aren't actually workarounds, but general tuning settings that + * need to be programmed on dg2 platform. + */ +static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); + wa_add(wal, + FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0, false); +} + +/* + * These settings aren't actually workarounds, but general tuning settings that * need to be programmed on several platforms. */ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, @@ -621,13 +637,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0, false); - - /* - * Wa_14012131227:dg1 - * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p - */ - wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -644,6 +653,42 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + dg2_ctx_gt_tuning_init(engine, wal); + + /* Wa_16011186671:dg2_g11 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); + wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010469329:dg2_g10 */ + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); + + /* + * Wa_22010465075:dg2_g10 + * Wa_22010613112:dg2_g10 + * Wa_14010698770:dg2_g10 + */ + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + } + + /* Wa_16013271637:dg2 */ + wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + + /* Wa_22012532006:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); +} + static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -730,7 +775,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_DG1(i915)) + if (IS_DG2(i915)) + dg2_ctx_workarounds_init(engine, wal); + else if (IS_XEHPSDV(i915)) + ; /* noop; none at this time */ + else if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) gen12_ctx_workarounds_init(engine, wal); @@ -878,10 +927,51 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) } static void +gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu; + unsigned int slice, subslice; + u32 mcr, mcr_mask; + + GEM_BUG_ON(GRAPHICS_VER(i915) != 9); + + /* + * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml + * Before any MMIO read into slice/subslice specific registers, MCR + * packet control register needs to be programmed to point to any + * enabled s/ss pair. Otherwise, incorrect values will be returned. + * This means each subsequent MMIO read will be forwarded to an + * specific s/ss combination, but this is OK since these registers + * are consistent across s/ss in almost all cases. In the rare + * occasions, such as INSTDONE, where this value is dependent + * on s/ss combo, the read should be done with read_subslice_reg. + */ + slice = ffs(sseu->slice_mask) - 1; + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = ffs(intel_sseu_get_subslices(sseu, slice)); + GEM_BUG_ON(!subslice); + subslice--; + + /* + * We use GEN8_MCR..() macros to calculate the |mcr| value for + * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads + */ + mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + + drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr); + + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); +} + +static void gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; + /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */ + gen9_wa_init_mcr(i915, wal); + /* WaDisableKillLogic:bxt,skl,kbl */ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, @@ -916,7 +1006,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0)) + if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); @@ -928,7 +1018,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen9_gt_workarounds_init(gt, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -1134,9 +1224,18 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + /* Wa_1407352427:icl,ehl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + PSDUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl,ehl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) + IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1190,19 +1289,19 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen12_gt_workarounds_init(gt, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); } @@ -1215,7 +1314,7 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen12_gt_workarounds_init(gt, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1236,7 +1335,179 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = gt->i915; + + xehp_init_mcr(gt, wal); + + /* Wa_1409757795:xehpsdv */ + wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); + + /* Wa_18011725039:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { + wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + } + + /* Wa_16011155590:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + TSGUNIT_CLKGATE_DIS); + + /* Wa_14011780169:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) { + wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | + GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | + GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | + GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | + GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | + GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS); + wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | + GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | + GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | + GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | + GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | + GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | + GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | + GAMTLBVEBOX0_CLKGATE_DIS); + } + + /* Wa_14012362059:xehpsdv */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_16012725990:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS); + + /* Wa_14011060649:xehpsdv */ + wa_14011060649(gt, wal); + + /* Wa_14014368820:xehpsdv */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); +} + +static void +dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + int id; + xehp_init_mcr(gt, wal); + + /* Wa_14011060649:dg2 */ + wa_14011060649(gt, wal); + + /* + * Although there are per-engine instances of these registers, + * they technically exist outside the engine itself and are not + * impacted by engine resets. Furthermore, they're part of the + * GuC blacklist so trying to treat them as engine workarounds + * will result in GuC initialization failure and a wedged GPU. + */ + for_each_engine(engine, gt, id) { + if (engine->class != VIDEO_DECODE_CLASS) + continue; + + /* Wa_16010515920:dg2_g10 */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) + wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), + ALNUNIT_CLKGATE_DIS); + } + + if (IS_DG2_G10(gt->i915)) { + /* Wa_22010523718:dg2 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + CG3DDISCFEG_CLKGATE_DIS); + + /* Wa_14011006942:dg2 */ + wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE, + DSS_ROUTER_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_14010680813:dg2_g10 */ + wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS); + + /* Wa_14010948348:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); + + /* Wa_14011037102:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); + + /* Wa_14011371254:dg2_g10 */ + wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); + + /* Wa_14011431319:dg2_g10 */ + wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | + GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | + GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | + GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | + GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | + GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS); + wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | + GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | + GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | + GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | + GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | + GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | + GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | + GAMTLBVEBOX0_CLKGATE_DIS); + + /* Wa_14010569222:dg2_g10 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + GAMEDIA_CLKGATE_DIS); + + /* Wa_14011028019:dg2_g10 */ + wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); + } + + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012362059:dg2 */ + wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + } + + /* Wa_1509235366:dg2 */ + wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE); + + /* Wa_14014830051:dg2 */ + wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + + /* + * The following are not actually "workarounds" but rather + * recommended tuning settings documented in the bspec's + * performance guide section. + */ + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); } static void @@ -1244,7 +1515,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_XEHPSDV(i915)) + if (IS_DG2(i915)) + dg2_gt_workarounds_init(gt, wal); + else if (IS_XEHPSDV(i915)) xehpsdv_gt_workarounds_init(gt, wal); else if (IS_DG1(i915)) dg1_gt_workarounds_init(gt, wal); @@ -1518,7 +1791,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4); } -static void cml_whitelist_build(struct intel_engine_cs *engine) +static void allow_read_ctx_timestamp(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1526,6 +1799,11 @@ static void cml_whitelist_build(struct intel_engine_cs *engine) whitelist_reg_ext(w, RING_CTX_TIMESTAMP(engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); +} + +static void cml_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); cfl_whitelist_build(engine); } @@ -1534,6 +1812,8 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; + allow_read_ctx_timestamp(engine); + switch (engine->class) { case RENDER_CLASS: /* WaAllowUMDToModifyHalfSliceChicken7:icl */ @@ -1569,15 +1849,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; default: - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1586,6 +1860,8 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; + allow_read_ctx_timestamp(engine); + switch (engine->class) { case RENDER_CLASS: /* @@ -1602,16 +1878,17 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4); - /* Wa_1808121037:tgl */ + /* + * Wa_1808121037:tgl + * Wa_14012131227:dg1 + * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p + */ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); /* Wa_1806527549:tgl */ whitelist_reg(w, HIZ_CHICKEN); break; default: - whitelist_reg_ext(w, - RING_CTX_TIMESTAMP(engine->mmio_base), - RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1623,13 +1900,46 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) && + if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); } +static void xehpsdv_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); +} + +static void dg2_whitelist_build(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + allow_read_ctx_timestamp(engine); + + switch (engine->class) { + case RENDER_CLASS: + /* + * Wa_1507100340:dg2_g10 + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + + break; + default: + break; + } +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1637,7 +1947,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, "whitelist", engine->name); - if (IS_DG1(i915)) + if (IS_DG2(i915)) + dg2_whitelist_build(engine); + else if (IS_XEHPSDV(i915)) + xehpsdv_whitelist_build(engine); + else if (IS_DG1(i915)) dg1_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 12) tgl_whitelist_build(engine); @@ -1711,13 +2025,119 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); } } + +static bool needs_wa_1308578152(struct intel_engine_cs *engine) +{ + u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0); + + return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0; +} + static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || - IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { + if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14013392000:dg2_g11 */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); + + /* Wa_16011620976:dg2_g11 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { + /* Wa_14012419201:dg2 */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_G11(engine->i915)) { + /* + * Wa_22012826095:dg2 + * Wa_22013059131:dg2 + */ + wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); + + /* Wa_22013059131:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + } + + /* Wa_1308578152:dg2_g10 when first gslice is fused off */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) && + needs_wa_1308578152(engine)) { + wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, + GEN12_REPLAY_MODE_GRANULARITY); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(engine->i915)) { + /* Wa_22013037850:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_22012856258:dg2 */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); + + /* + * Wa_22010960976:dg2 + * Wa_14013347512:dg2 + */ + wa_masked_dis(wal, GEN12_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* + * Wa_1608949956:dg2_g10 + * Wa_14010198302:dg2_g10 + */ + wa_masked_en(wal, GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + + /* + * Wa_14010918519:dg2_g10 + * + * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, + * so ignoring verification. + */ + wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, + 0, false); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { + /* Wa_22010430635:dg2 */ + wa_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); + + /* Wa_14010648519:dg2 */ + wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + } + + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_G11(engine->i915)) { + /* Wa_22012654132:dg2 */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } + + /* Wa_14013202645:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) + wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || + IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] * Wa_1607063988:tgl[a0],dg1[a0] @@ -1727,7 +2147,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { + if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) @@ -1762,7 +2182,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, @@ -1775,8 +2195,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } - - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl @@ -1859,15 +2278,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); - /* Wa_1407352427:icl,ehl */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, - PSDUNIT_CLKGATE_DIS); - - /* Wa_1406680159:icl,ehl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); - /* * Wa_1408767742:icl[a2..forever],ehl[all] * Wa_1605460711:icl[a0..c0] @@ -2138,7 +2548,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) { + if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 8b89215afe46..c0637bf799a3 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -17,7 +17,7 @@ static int mock_timeline_pin(struct intel_timeline *tl) { int err; - if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj))) + if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj, NULL))) return -EBUSY; err = intel_timeline_pin_map(tl); @@ -35,9 +35,31 @@ static void mock_timeline_unpin(struct intel_timeline *tl) atomic_dec(&tl->pin_count); } +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { - const unsigned long sz = PAGE_SIZE / 2; + const unsigned long sz = PAGE_SIZE; struct intel_ring *ring; ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); @@ -50,15 +72,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); atomic_set(&ring->pin_count, 1); - ring->vma = i915_vma_alloc(); - if (!ring->vma) { + ring->vma = create_ring_vma(engine->gt->ggtt, PAGE_SIZE); + if (IS_ERR(ring->vma)) { kfree(ring); return NULL; } - i915_active_init(&ring->vma->active, NULL, NULL, 0); - __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma)); - __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags); - ring->vma->node.size = sz; intel_ring_update_space(ring); @@ -67,8 +85,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) static void mock_ring_free(struct intel_ring *ring) { - i915_active_fini(&ring->vma->active); - i915_vma_free(ring->vma); + i915_vma_put(ring->vma); kfree(ring); } @@ -125,6 +142,7 @@ static void mock_context_unpin(struct intel_context *ce) static void mock_context_post_unpin(struct intel_context *ce) { + i915_vma_unpin(ce->ring->vma); } static void mock_context_destroy(struct kref *ref) @@ -169,7 +187,7 @@ static int mock_context_alloc(struct intel_context *ce) static int mock_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **unused) { - return 0; + return i915_vma_pin_ww(ce->ring->vma, ww, 0, 0, PIN_GLOBAL | PIN_HIGH); } static int mock_context_pin(struct intel_context *ce, void *unused) @@ -327,7 +345,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); - GEM_BUG_ON(!i915->gt.uncore); + GEM_BUG_ON(!to_gt(i915)->uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) @@ -335,8 +353,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; - engine->base.gt = &i915->gt; - engine->base.uncore = i915->gt.uncore; + engine->base.gt = to_gt(i915); + engine->base.uncore = to_gt(i915)->uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); @@ -359,8 +377,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.release = mock_engine_release; - i915->gt.engine[id] = &engine->base; - i915->gt.engine_class[0][id] = &engine->base; + to_gt(i915)->engine[id] = &engine->base; + to_gt(i915)->engine_class[0][id] = &engine->base; /* fake hw queue */ spin_lock_init(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index fa7b99a671dd..76fbae358072 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -442,7 +442,7 @@ int intel_context_live_selftests(struct drm_i915_private *i915) SUBTEST(live_active_context), SUBTEST(live_remote_context), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c index 262764f6d90a..57fea9ea1705 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine.c @@ -12,7 +12,7 @@ int intel_engine_live_selftests(struct drm_i915_private *i915) live_engine_pm_selftests, NULL, }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); typeof(*tests) *fn; for (fn = tests; *fn; fn++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 64abf5feabfa..1b75f478d1b8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -361,10 +361,10 @@ int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) SUBTEST(perf_mi_noop), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } static int intel_mmio_bases_check(void *arg) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 6e6e4d747cca..273d440a53e3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -378,13 +378,13 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) int saved_hangcheck; int err; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; saved_hangcheck = i915->params.enable_hangcheck; i915->params.enable_hangcheck = INT_MAX; - err = intel_gt_live_subtests(tests, &i915->gt); + err = intel_gt_live_subtests(tests, to_gt(i915)); i915->params.enable_hangcheck = saved_hangcheck; return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 75569666105d..8af261831470 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -214,6 +214,31 @@ static int live_engine_timestamps(void *arg) return 0; } +static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness) +{ + ktime_t start, unused, dt; + + if (!intel_engine_uses_guc(engine)) + return 0; + + /* + * In GuC mode of submission, the busyness stats may get updated after + * the batch starts running. Poll for a change in busyness and timeout + * after 500 us. + */ + start = ktime_get(); + while (intel_engine_get_busy_time(engine, &unused) == busyness) { + dt = ktime_get() - start; + if (dt > 10000000) { + pr_err("active wait timed out %lld\n", dt); + ENGINE_TRACE(engine, "active wait time out %lld\n", dt); + return -ETIME; + } + } + + return 0; +} + static int live_engine_busy_stats(void *arg) { struct intel_gt *gt = arg; @@ -232,6 +257,7 @@ static int live_engine_busy_stats(void *arg) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); for_each_engine(engine, gt, id) { struct i915_request *rq; + ktime_t busyness, dummy; ktime_t de, dt; ktime_t t[2]; @@ -274,16 +300,23 @@ static int live_engine_busy_stats(void *arg) } i915_request_add(rq); + busyness = intel_engine_get_busy_time(engine, &dummy); if (!igt_wait_for_spinner(&spin, rq)) { intel_gt_set_wedged(engine->gt); err = -ETIME; goto end; } + err = __spin_until_busier(engine, busyness); + if (err) { + GEM_TRACE_DUMP(); + goto end; + } + ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - udelay(100); + mdelay(10); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index b367ecfa42de..e10da897e07a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -4502,11 +4502,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_reset), }; - if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP) + if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP) return 0; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index b9441217ca3d..8bf62a5826cc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -43,7 +43,7 @@ static void measure_clocks(struct intel_engine_cs *engine, int i; for (i = 0; i < 5; i++) { - preempt_disable(); + local_irq_disable(); cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP); dt[i] = ktime_get(); @@ -51,7 +51,7 @@ static void measure_clocks(struct intel_engine_cs *engine, dt[i] = ktime_sub(ktime_get(), dt[i]); cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP); - preempt_enable(); + local_irq_enable(); } /* Use the median of both cycle/dt; close enough */ @@ -193,10 +193,10 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gt_resume), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } int intel_gt_pm_late_selftests(struct drm_i915_private *i915) @@ -210,8 +210,8 @@ int intel_gt_pm_late_selftests(struct drm_i915_private *i915) SUBTEST(live_rc6_ctx_wa), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7e2d99dd012d..15d63435ec4d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -471,7 +471,8 @@ static int igt_reset_nop_engine(void *arg) count = 0; st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); do { int i; @@ -528,7 +529,7 @@ static int igt_reset_nop_engine(void *arg) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -582,7 +583,8 @@ static int igt_reset_fail_engine(void *arg) } st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); force_reset_timeout(engine); err = intel_engine_reset(engine, NULL); @@ -679,7 +681,7 @@ static int igt_reset_fail_engine(void *arg) out: pr_info("%s(%s): %d resets\n", __func__, engine->name, count); skip: - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); intel_context_put(ce); @@ -734,7 +736,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_engine_count = i915_reset_engine_count(global, engine); st_engine_heartbeat_disable(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); count = 0; do { struct i915_request *rq = NULL; @@ -824,7 +827,7 @@ restore: if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); pr_info("%s: Completed %lu %s resets\n", engine->name, count, active ? "active" : "idle"); @@ -1042,7 +1045,8 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ st_engine_heartbeat_disable_no_pm(engine); - set_bit(I915_RESET_ENGINE + id, >->reset.flags); + GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)); do { struct i915_request *rq = NULL; struct intel_selftest_saved_policy saved; @@ -1165,7 +1169,7 @@ restore: if (err) break; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", @@ -2014,7 +2018,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_evict_fence), SUBTEST(igt_handle_error), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); intel_wakeref_t wakeref; int err; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b0977a3b699b..618c905daa19 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1847,5 +1847,5 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 12ef2837c89b..fa4293d2944f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -49,6 +49,7 @@ static int copy(struct intel_migrate *migrate, if (IS_ERR(src)) return 0; + sz = src->base.size; dst = i915_gem_object_create_internal(i915, sz); if (IS_ERR(dst)) goto err_free_src; @@ -441,7 +442,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) SUBTEST(thread_global_copy), SUBTEST(thread_global_clear), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (!gt->migrate.context) return 0; @@ -464,7 +465,7 @@ create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) return obj; } - i915_gem_object_trylock(obj); + i915_gem_object_trylock(obj, NULL); err = i915_gem_object_pin_pages(obj); if (err) { i915_gem_object_unlock(obj); @@ -657,7 +658,7 @@ int intel_migrate_perf_selftests(struct drm_i915_private *i915) SUBTEST(perf_clear_blt), SUBTEST(perf_copy_blt), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 13d25bf2a94a..c1d861333c44 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -451,5 +451,5 @@ int intel_mocs_live_selftests(struct drm_i915_private *i915) if (!get_mocs_settings(i915, &table)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 7a50c9f4071b..8a873f6bda7f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -376,7 +376,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_atomic_reset), SUBTEST(igt_atomic_engine_reset), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (!intel_has_gpu_reset(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 041954408d0f..70f9ac1ec2c7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -291,8 +291,8 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915) SUBTEST(live_ctx_switch_wa), }; - if (i915->gt.submission_method > INTEL_SUBMISSION_RING) + if (to_gt(i915)->submission_method > INTEL_SUBMISSION_RING) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 9334bad131a2..b768cea5943d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -39,7 +39,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) static int live_slpc_clamp_min(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_guc_slpc *slpc = >->uc.guc.slpc; struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; @@ -166,7 +166,7 @@ static int live_slpc_clamp_min(void *arg) static int live_slpc_clamp_max(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_guc_slpc *slpc; struct intel_rps *rps; struct intel_engine_cs *engine; @@ -304,7 +304,7 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_slpc_clamp_min), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index d0b6a3afcf44..e2eb686a9763 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -159,7 +159,7 @@ static int mock_hwsp_freelist(void *arg) INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - state.gt = &i915->gt; + state.gt = to_gt(i915); /* * Create a bunch of timelines and check that their HWSP do not overlap. @@ -1416,8 +1416,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_rollover_user), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 962e91ba3be4..0287c2573c51 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -1387,8 +1387,8 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_engine_reset_workarounds), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index ba10bd374cee..fe5d7d261797 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -144,6 +144,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, + INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 31cf9fb48c7e..f9240d4baa69 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -95,6 +95,11 @@ struct intel_guc { */ struct ida guc_ids; /** + * @num_guc_ids: Number of guc_ids, selftest feature to be able + * to reduce this number while testing. + */ + int num_guc_ids; + /** * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ unsigned long *guc_ids_bitmap; @@ -138,6 +143,8 @@ struct intel_guc { u32 ads_regset_size; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_engine_usage_size: size of engine usage in the ADS */ + u32 ads_engine_usage_size; /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; @@ -172,6 +179,41 @@ struct intel_guc { /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; + + /** + * @timestamp: GT timestamp object that stores a copy of the timestamp + * and adjusts it for overflow using a worker. + */ + struct { + /** + * @lock: Lock protecting the below fields and the engine stats. + */ + spinlock_t lock; + + /** + * @gt_stamp: 64 bit extended value of the GT timestamp. + */ + u64 gt_stamp; + + /** + * @ping_delay: Period for polling the GT timestamp for + * overflow. + */ + unsigned long ping_delay; + + /** + * @work: Periodic work to adjust GT timestamp, engine and + * context usage for overflows. + */ + struct delayed_work work; + } timestamp; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** + * @number_guc_id_stolen: The number of guc_ids that have been stolen + */ + int number_guc_id_stolen; +#endif }; static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 621c893a009f..1a1edae67e4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -26,6 +26,8 @@ * | guc_policies | * +---------------------------------------+ * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_engine_usage | * +---------------------------------------+ <== static * | guc_mmio_reg[countA] (engine 0.0) | * | guc_mmio_reg[countB] (engine 0.1) | @@ -47,6 +49,7 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; + struct guc_engine_usage engine_usage; /* From here on, location is dynamic! Refer to above diagram. */ struct guc_mmio_reg regset[0]; } __packed; @@ -628,3 +631,21 @@ void intel_guc_ads_reset(struct intel_guc *guc) guc_ads_private_data_reset(guc); } + +u32 intel_guc_engine_usage_offset(struct intel_guc *guc) +{ + struct __guc_ads_blob *blob = guc->ads_blob; + u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma); + u32 offset = base + ptr_offset(blob, engine_usage); + + return offset; +} + +struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct __guc_ads_blob *blob = guc->ads_blob; + u8 guc_class = engine_class_to_guc_class(engine->class); + + return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)]; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index 3d85051d57e4..e74c110facff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -6,8 +6,11 @@ #ifndef _INTEL_GUC_ADS_H_ #define _INTEL_GUC_ADS_H_ +#include <linux/types.h> + struct intel_guc; struct drm_printer; +struct intel_engine_cs; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); @@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); void intel_guc_ads_print_policy_info(struct intel_guc *guc, struct drm_printer *p); +struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine); +u32 intel_guc_engine_usage_offset(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index a0cc34be7b56..aa6dd6415202 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -523,6 +523,15 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n", ktime_ms_delta(ktime_get(), ct->stall_time), send->status, recv->status); + CT_ERROR(ct, "H2G Space: %u (Bytes)\n", + atomic_read(&ct->ctbs.send.space) * 4); + CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head); + CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail); + CT_ERROR(ct, "G2H Space: %u (Bytes)\n", + atomic_read(&ct->ctbs.recv.space) * 4); + CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head); + CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail); + ct->ctbs.send.broken = true; } @@ -582,12 +591,19 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) { + bool h2g = h2g_has_room(ct, h2g_dw); + bool g2h = g2h_has_room(ct, g2h_dw); + lockdep_assert_held(&ct->ctbs.send.lock); - if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { + if (unlikely(!h2g || !g2h)) { if (ct->stall_time == KTIME_MAX) ct->stall_time = ktime_get(); + /* Be paranoid and kick G2H tasklet to free credits */ + if (!g2h) + tasklet_hi_schedule(&ct->receive_tasklet); + if (unlikely(ct_deadlocked(ct))) return -EPIPE; else diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 196424be0998..31420ce1ce6b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -40,9 +40,8 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) } } -/* Copy RSA signature from the fw image to HW for verification */ -static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, - struct intel_uncore *uncore) +static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) { u32 rsa[UOS_RSA_SCRATCH_COUNT]; size_t copied; @@ -58,6 +57,27 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, return 0; } +static int guc_xfer_rsa_vma(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) +{ + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + + intel_uncore_write(uncore, UOS_RSA_SCRATCH(0), + intel_guc_ggtt_offset(guc, guc_fw->rsa_data)); + + return 0; +} + +/* Copy RSA signature from the fw image to HW for verification */ +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, + struct intel_uncore *uncore) +{ + if (guc_fw->rsa_data) + return guc_xfer_rsa_vma(guc_fw, uncore); + else + return guc_xfer_rsa_mmio(guc_fw, uncore); +} + /* * Read the GuC status register (GUC_STATUS) and store it in the * specified location; then return a boolean indicating whether @@ -142,7 +162,10 @@ int intel_guc_fw_upload(struct intel_guc *guc) /* * Note that GuC needs the CSS header plus uKernel code to be copied * by the DMA engine in one operation, whereas the RSA signature is - * loaded via MMIO. + * loaded separately, either by copying it to the UOS_RSA_SCRATCH + * register (if key size <= 256) or through a ggtt-pinned vma (if key + * size > 256). The RSA size and therefore the way we provide it to the + * HW is fixed for each platform and hard-coded in the bootrom. */ ret = guc_xfer_rsa(&guc->fw, uncore); if (ret) @@ -164,6 +187,6 @@ int intel_guc_fw_upload(struct intel_guc *guc) return 0; out: - intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 722933e26347..7072e30e99f4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -294,6 +294,19 @@ struct guc_ads { u32 reserved[15]; } __packed; +/* Engine usage stats */ +struct guc_engine_usage_record { + u32 current_context_index; + u32 last_switch_in_stamp; + u32 reserved0; + u32 total_runtime; + u32 reserved1[4]; +} __packed; + +struct guc_engine_usage { + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + /* GuC logging structures */ enum guc_log_buffer_type { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index ac1ee1d5ce10..fe6ab7550a14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -15,9 +15,12 @@ struct intel_guc; -#ifdef CONFIG_DRM_I915_DEBUG_GUC +#if defined(CONFIG_DRM_I915_DEBUG_GUC) #define CRASH_BUFFER_SIZE SZ_2M #define DEBUG_BUFFER_SIZE SZ_16M +#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#define CRASH_BUFFER_SIZE SZ_1M +#define DEBUG_BUFFER_SIZE SZ_2M #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c index 46026c2c1722..ddfbe334689f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c @@ -10,28 +10,80 @@ #include "intel_guc.h" #include "intel_guc_log.h" #include "intel_guc_log_debugfs.h" +#include "intel_uc.h" + +static u32 obj_to_guc_log_dump_size(struct drm_i915_gem_object *obj) +{ + u32 size; + + if (!obj) + return PAGE_SIZE; + + /* "0x%08x 0x%08x 0x%08x 0x%08x\n" => 16 bytes -> 44 chars => x2.75 */ + size = ((obj->base.size * 11) + 3) / 4; + + /* Add padding for final blank line, any extra header info, etc. */ + size = PAGE_ALIGN(size + PAGE_SIZE); + + return size; +} + +static u32 guc_log_dump_size(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + + if (!intel_guc_is_supported(guc)) + return PAGE_SIZE; + + if (!log->vma) + return PAGE_SIZE; + + return obj_to_guc_log_dump_size(log->vma->obj); +} static int guc_log_dump_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); + int ret; - return intel_guc_log_dump(m->private, &p, false); + ret = intel_guc_log_dump(m->private, &p, false); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m)) + pr_warn_once("preallocated size:%zx for %s exceeded\n", + m->size, __func__); + + return ret; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_log_dump, guc_log_dump_size); + +static u32 guc_load_err_dump_size(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); + + if (!intel_guc_is_supported(guc)) + return PAGE_SIZE; + + return obj_to_guc_log_dump_size(uc->load_err_log); } -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); static int guc_load_err_log_dump_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m)) + pr_warn_once("preallocated size:%zx for %s exceeded\n", + m->size, __func__); + return intel_guc_log_dump(m->private, &p, true); } -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_load_err_log_dump, guc_load_err_dump_size); static int guc_log_level_get(void *data, u64 *val) { struct intel_guc_log *log = data; - if (!intel_guc_is_used(log_to_guc(log))) + if (!log->vma) return -ENODEV; *val = intel_guc_log_get_level(log); @@ -43,7 +95,7 @@ static int guc_log_level_set(void *data, u64 val) { struct intel_guc_log *log = data; - if (!intel_guc_is_used(log_to_guc(log))) + if (!log->vma) return -ENODEV; return intel_guc_log_set_level(log, val); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 65a3e7fdb2b2..13b27b8ff74e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data, slpc_mem_set_param(data, enable_id, 0); } -int intel_guc_slpc_init(struct intel_guc_slpc *slpc) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - struct drm_i915_private *i915 = slpc_to_i915(slpc); - u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); - int err; - - GEM_BUG_ON(slpc->vma); - - err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); - if (unlikely(err)) { - drm_err(&i915->drm, - "Failed to allocate SLPC struct (err=%pe)\n", - ERR_PTR(err)); - return err; - } - - slpc->max_freq_softlimit = 0; - slpc->min_freq_softlimit = 0; - - return err; -} - static u32 slpc_get_state(struct intel_guc_slpc *slpc) { struct slpc_shared_data *data; @@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc, return guc_action_slpc_unset_param(guc, id); } +static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct intel_guc *guc = slpc_to_guc(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + lockdep_assert_held(&slpc->lock); + + if (!intel_guc_is_ready(guc)) + return -ENODEV; + + /* + * This function is a little different as compared to + * intel_guc_slpc_set_min_freq(). Softlimit will not be updated + * here since this is used to temporarily change min freq, + * for example, during a waitboost. Caller is responsible for + * checking bounds. + */ + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); + if (ret) + drm_err(&i915->drm, "Unable to force min freq to %u: %d", + freq, ret); + } + + return ret; +} + +static void slpc_boost_work(struct work_struct *work) +{ + struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + + /* + * Raise min freq to boost. It's possible that + * this is greater than current max. But it will + * certainly be limited by RP0. An error setting + * the min param is not fatal. + */ + mutex_lock(&slpc->lock); + if (atomic_read(&slpc->num_waiters)) { + slpc_force_min_freq(slpc, slpc->boost_freq); + slpc->num_boosts++; + } + mutex_unlock(&slpc->lock); +} + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + GEM_BUG_ON(slpc->vma); + + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); + if (unlikely(err)) { + drm_err(&i915->drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + + slpc->boost_freq = 0; + atomic_set(&slpc->num_waiters, 0); + slpc->num_boosts = 0; + + mutex_init(&slpc->lock); + INIT_WORK(&slpc->boost_work, slpc_boost_work); + + return err; +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { switch (state) { @@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) val > slpc->max_freq_softlimit) return -EINVAL; + /* Need a lock now since waitboost can be modifying min as well */ + mutex_lock(&slpc->lock); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, val); @@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) if (!ret) slpc->min_freq_softlimit = val; + mutex_unlock(&slpc->lock); + return ret; } @@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) GT_FREQUENCY_MULTIPLIER; slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * GT_FREQUENCY_MULTIPLIER; + + if (!slpc->boost_freq) + slpc->boost_freq = slpc->rp0_freq; } /* @@ -557,7 +623,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) if (unlikely(ret < 0)) return ret; - intel_guc_pm_intrmsk_enable(&i915->gt); + intel_guc_pm_intrmsk_enable(to_gt(i915)); slpc_get_rp_values(slpc); @@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return 0; } +int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val) +{ + int ret = 0; + + if (val < slpc->min_freq || val > slpc->rp0_freq) + return -EINVAL; + + mutex_lock(&slpc->lock); + + if (slpc->boost_freq != val) { + /* Apply only if there are active waiters */ + if (atomic_read(&slpc->num_waiters)) { + ret = slpc_force_min_freq(slpc, val); + if (ret) { + ret = -EIO; + goto done; + } + } + + slpc->boost_freq = val; + } + +done: + mutex_unlock(&slpc->lock); + return ret; +} + +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) +{ + /* + * Return min back to the softlimit. + * This is called during request retire, + * so we don't need to fail that if the + * set_param fails. + */ + mutex_lock(&slpc->lock); + if (atomic_dec_and_test(&slpc->num_waiters)) + slpc_force_min_freq(slpc, slpc->min_freq_softlimit); + mutex_unlock(&slpc->lock); +} + int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p slpc_decode_max_freq(slpc)); drm_printf(p, "\tMin freq: %u MHz\n", slpc_decode_min_freq(slpc)); + drm_printf(p, "\twaitboosts: %u\n", + slpc->num_boosts); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index e45054d5b9b4..0caa8fee3c04 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); +void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); +void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 41d13527666f..bf5b9a563c09 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -6,6 +6,9 @@ #ifndef _INTEL_GUC_SLPC_TYPES_H_ #define _INTEL_GUC_SLPC_TYPES_H_ +#include <linux/atomic.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> #include <linux/types.h> #define SLPC_RESET_TIMEOUT_MS 5 @@ -20,10 +23,20 @@ struct intel_guc_slpc { u32 min_freq; u32 rp0_freq; u32 rp1_freq; + u32 boost_freq; /* frequency softlimits */ u32 min_freq_softlimit; u32 max_freq_softlimit; + + /* Protects set/reset of boost freq + * and value of num_waiters + */ + struct mutex lock; + + struct work_struct boost_work; + atomic_t num_waiters; + u32 num_boosts; }; #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c48557dfa04c..e7517206af82 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -13,6 +13,7 @@ #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -21,6 +22,7 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -143,7 +145,8 @@ guc_create_parallel(struct intel_engine_cs **engines, * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for * multi-lrc. */ -#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16) +#define NUMBER_MULTI_LRC_GUC_ID(guc) \ + ((guc)->submission_state.num_guc_ids / 16) /* * Below is a set of functions which control the GuC scheduling state which @@ -1038,8 +1041,6 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) spin_unlock(&ce->guc_state.lock); - GEM_BUG_ON(!do_put && !destroyed); - if (pending_enable || destroyed || deregister) { decr_outstanding_submission_g2h(guc); if (deregister) @@ -1077,6 +1078,271 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) xa_unlock_irqrestore(&guc->context_lookup, flags); } +/* + * GuC stores busyness stats for each engine at context in/out boundaries. A + * context 'in' logs execution start time, 'out' adds in -> out delta to total. + * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with + * GuC. + * + * __i915_pmu_event_read samples engine busyness. When sampling, if context id + * is valid (!= ~0) and start is non-zero, the engine is considered to be + * active. For an active engine total busyness = total + (now - start), where + * 'now' is the time at which the busyness is sampled. For inactive engine, + * total busyness = total. + * + * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. + * + * The start and total values provided by GuC are 32 bits and wrap around in a + * few minutes. Since perf pmu provides busyness as 64 bit monotonically + * increasing ns values, there is a need for this implementation to account for + * overflows and extend the GuC provided values to 64 bits before returning + * busyness to the user. In order to do that, a worker runs periodically at + * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in + * 27 seconds for a gt clock frequency of 19.2 MHz). + */ + +#define WRAP_TIME_CLKS U32_MAX +#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) + +static void +__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) +{ + u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); + u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); + + if (new_start == lower_32_bits(*prev_start)) + return; + + if (new_start < gt_stamp_last && + (new_start - gt_stamp_last) <= POLL_TIME_CLKS) + gt_stamp_hi++; + + if (new_start > gt_stamp_last && + (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) + gt_stamp_hi--; + + *prev_start = ((u64)gt_stamp_hi << 32) | new_start; +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ + struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + struct intel_engine_guc_stats *stats = &engine->stats.guc; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 last_switch = rec->last_switch_in_stamp; + u32 ctx_id = rec->current_context_index; + u32 total = rec->total_runtime; + + lockdep_assert_held(&guc->timestamp.lock); + + stats->running = ctx_id != ~0U && last_switch; + if (stats->running) + __extend_last_switch(guc, &stats->start_gt_clk, last_switch); + + /* + * Instead of adjusting the total for overflow, just add the + * difference from previous sample stats->total_gt_clks + */ + if (total && total != ~0U) { + stats->total_gt_clks += (u32)(total - stats->prev_total); + stats->prev_total = total; + } +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, + struct intel_engine_cs *engine, + ktime_t *now) +{ + u32 gt_stamp_now, gt_stamp_hi; + + lockdep_assert_held(&guc->timestamp.lock); + + gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); + gt_stamp_now = intel_uncore_read(engine->uncore, + RING_TIMESTAMP(engine->mmio_base)); + *now = ktime_get(); + + if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + gt_stamp_hi++; + + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; +} + +/* + * Unlike the execlist mode of submission total and active times are in terms of + * gt clocks. The *now parameter is retained to return the cpu time at which the + * busyness was sampled. + */ +static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) +{ + struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; + struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; + struct intel_gt *gt = engine->gt; + struct intel_guc *guc = >->uc.guc; + u64 total, gt_stamp_saved; + unsigned long flags; + u32 reset_count; + bool in_reset; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + + /* + * If a reset happened, we risk reading partially updated engine + * busyness from GuC, so we just use the driver stored copy of busyness. + * Synchronize with gt reset using reset_count and the + * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count + * after I915_RESET_BACKOFF flag, so ensure that the reset_count is + * usable by checking the flag afterwards. + */ + reset_count = i915_reset_count(gpu_error); + in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); + + *now = ktime_get(); + + /* + * The active busyness depends on start_gt_clk and gt_stamp. + * gt_stamp is updated by i915 only when gt is awake and the + * start_gt_clk is derived from GuC state. To get a consistent + * view of activity, we query the GuC state only if gt is awake. + */ + if (!in_reset && intel_gt_pm_get_if_awake(gt)) { + stats_saved = *stats; + gt_stamp_saved = guc->timestamp.gt_stamp; + guc_update_engine_gt_clks(engine); + guc_update_pm_timestamp(guc, engine, now); + intel_gt_pm_put_async(gt); + if (i915_reset_count(gpu_error) != reset_count) { + *stats = stats_saved; + guc->timestamp.gt_stamp = gt_stamp_saved; + } + } + + total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + total += intel_gt_clock_interval_to_ns(gt, clk); + } + + spin_unlock_irqrestore(&guc->timestamp.lock, flags); + + return ns_to_ktime(total); +} + +static void __reset_guc_busyness_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + ktime_t unused; + + cancel_delayed_work_sync(&guc->timestamp.work); + + spin_lock_irqsave(&guc->timestamp.lock, flags); + + for_each_engine(engine, gt, id) { + guc_update_pm_timestamp(guc, engine, &unused); + guc_update_engine_gt_clks(engine); + engine->stats.guc.prev_total = 0; + } + + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void __update_guc_busyness_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + ktime_t unused; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) { + guc_update_pm_timestamp(guc, engine, &unused); + guc_update_engine_gt_clks(engine); + } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void guc_timestamp_ping(struct work_struct *wrk) +{ + struct intel_guc *guc = container_of(wrk, typeof(*guc), + timestamp.work.work); + struct intel_uc *uc = container_of(guc, typeof(*uc), guc); + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int srcu, ret; + + /* + * Synchronize with gt reset to make sure the worker does not + * corrupt the engine/guc stats. + */ + ret = intel_gt_reset_trylock(gt, &srcu); + if (ret) + return; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + __update_guc_busyness_stats(guc); + + intel_gt_reset_unlock(gt, srcu); + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); +} + +static int guc_action_enable_usage_stats(struct intel_guc *guc) +{ + u32 offset = intel_guc_engine_usage_offset(guc); + u32 action[] = { + INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, + offset, + 0, + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static void guc_init_engine_stats(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { + int ret = guc_action_enable_usage_stats(guc); + + if (ret) + drm_err(>->i915->drm, + "Failed to enable usage stats: %d!\n", ret); + } +} + +void intel_guc_busyness_park(struct intel_gt *gt) +{ + struct intel_guc *guc = >->uc.guc; + + if (!guc_submission_initialized(guc)) + return; + + cancel_delayed_work(&guc->timestamp.work); + __update_guc_busyness_stats(guc); +} + +void intel_guc_busyness_unpark(struct intel_gt *gt) +{ + struct intel_guc *guc = >->uc.guc; + + if (!guc_submission_initialized(guc)) + return; + + mod_delayed_work(system_highpri_wq, &guc->timestamp.work, + guc->timestamp.ping_delay); +} + static inline bool submission_disabled(struct intel_guc *guc) { @@ -1138,6 +1404,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) intel_gt_park_heartbeats(guc_to_gt(guc)); disable_submission(guc); guc->interrupts.disable(guc); + __reset_guc_busyness_stats(guc); /* Flush IRQ handler */ spin_lock_irq(&guc_to_gt(guc)->irq_lock); @@ -1484,6 +1751,7 @@ static void destroyed_worker_func(struct work_struct *w); */ int intel_guc_submission_init(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); int ret; if (guc->lrc_desc_pool) @@ -1508,10 +1776,14 @@ int intel_guc_submission_init(struct intel_guc *guc) destroyed_worker_func); guc->submission_state.guc_ids_bitmap = - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL); + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) return -ENOMEM; + spin_lock_init(&guc->timestamp.lock); + INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + return 0; } @@ -1598,13 +1870,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (intel_context_is_parent(ce)) ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, - NUMBER_MULTI_LRC_GUC_ID, + NUMBER_MULTI_LRC_GUC_ID(guc), order_base_2(ce->parallel.number_children + 1)); else ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID, - GUC_MAX_LRC_DESCRIPTORS, + NUMBER_MULTI_LRC_GUC_ID(guc), + guc->submission_state.num_guc_ids, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(ret < 0)) @@ -1662,14 +1934,18 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_parent(cn)); list_del_init(&cn->guc_id.link); - ce->guc_id = cn->guc_id; + ce->guc_id.id = cn->guc_id.id; - spin_lock(&ce->guc_state.lock); + spin_lock(&cn->guc_state.lock); clr_context_registered(cn); - spin_unlock(&ce->guc_state.lock); + spin_unlock(&cn->guc_state.lock); set_context_guc_id_invalid(cn); +#ifdef CONFIG_DRM_I915_SELFTEST + guc->number_guc_id_stolen++; +#endif + return 0; } else { return -EAGAIN; @@ -2373,7 +2649,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) unsigned long flags; bool disabled; - lockdep_assert_held(&guc->submission_state.lock); GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); @@ -2389,7 +2664,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { - __release_guc_id(guc, ce); + release_guc_id(guc, ce); __guc_context_destroy(ce); return; } @@ -2423,36 +2698,48 @@ static void __guc_context_destroy(struct intel_context *ce) static void guc_flush_destroyed_contexts(struct intel_guc *guc) { - struct intel_context *ce, *cn; + struct intel_context *ce; unsigned long flags; GEM_BUG_ON(!submission_disabled(guc) && guc_submission_initialized(guc)); - spin_lock_irqsave(&guc->submission_state.lock, flags); - list_for_each_entry_safe(ce, cn, - &guc->submission_state.destroyed_contexts, - destroyed_link) { - list_del_init(&ce->destroyed_link); - __release_guc_id(guc, ce); + while (!list_empty(&guc->submission_state.destroyed_contexts)) { + spin_lock_irqsave(&guc->submission_state.lock, flags); + ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, + struct intel_context, + destroyed_link); + if (ce) + list_del_init(&ce->destroyed_link); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + + if (!ce) + break; + + release_guc_id(guc, ce); __guc_context_destroy(ce); } - spin_unlock_irqrestore(&guc->submission_state.lock, flags); } static void deregister_destroyed_contexts(struct intel_guc *guc) { - struct intel_context *ce, *cn; + struct intel_context *ce; unsigned long flags; - spin_lock_irqsave(&guc->submission_state.lock, flags); - list_for_each_entry_safe(ce, cn, - &guc->submission_state.destroyed_contexts, - destroyed_link) { - list_del_init(&ce->destroyed_link); + while (!list_empty(&guc->submission_state.destroyed_contexts)) { + spin_lock_irqsave(&guc->submission_state.lock, flags); + ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, + struct intel_context, + destroyed_link); + if (ce) + list_del_init(&ce->destroyed_link); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + + if (!ce) + break; + guc_lrc_desc_unpin(ce); } - spin_unlock_irqrestore(&guc->submission_state.lock, flags); } static void destroyed_worker_func(struct work_struct *w) @@ -3369,7 +3656,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen12_emit_flush_xcs; } engine->set_default_submission = guc_set_default_submission; + engine->busyness = guc_engine_busyness; + engine->flags |= I915_ENGINE_SUPPORTS_STATS; engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->flags |= I915_ENGINE_HAS_TIMESLICES; @@ -3468,6 +3757,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { guc_init_lrc_mapping(guc); + guc_init_engine_stats(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -3494,6 +3784,7 @@ static bool __guc_submission_selected(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { + guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS; guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } @@ -3695,6 +3986,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { struct intel_context *ce; + unsigned long flags; int desc_idx; if (unlikely(len != 1)) { @@ -3703,11 +3995,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, } desc_idx = msg[0]; + + /* + * The context lookup uses the xarray but lookups only require an RCU lock + * not the full spinlock. So take the lock explicitly and keep it until the + * context has been reference count locked to ensure it can't be destroyed + * asynchronously until the reset is done. + */ + xa_lock_irqsave(&guc->context_lookup, flags); ce = g2h_context_lookup(guc, desc_idx); + if (ce) + intel_context_get(ce); + xa_unlock_irqrestore(&guc->context_lookup, flags); + if (unlikely(!ce)) return -EPROTO; guc_handle_context_reset(guc, ce); + intel_context_put(ce); return 0; } @@ -3728,11 +4033,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { struct intel_engine_cs *engine; + struct intel_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; if (unlikely(len != 3)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + drm_err(>->i915->drm, "Invalid length %u", len); return -EPROTO; } @@ -3742,12 +4048,19 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, engine = guc_lookup_engine(guc, guc_class, instance); if (unlikely(!engine)) { - drm_err(&guc_to_gt(guc)->i915->drm, + drm_err(>->i915->drm, "Invalid engine %d:%d", guc_class, instance); return -EPROTO; } - intel_gt_handle_error(guc_to_gt(guc), engine->mask, + /* + * This is an unexpected failure of a hardware feature. So, log a real + * error message not just the informational that comes with the reset. + */ + drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", + guc_class, instance, engine->name, reason); + + intel_gt_handle_error(gt, engine->mask, I915_ERROR_CAPTURE, "GuC failed to reset %s (reason=0x%08x)\n", engine->name, reason); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index c7ef44fa0c36..5a95a9f0a8e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, void intel_guc_dump_active_requests(struct intel_engine_cs *engine, struct i915_request *hung_rq, struct drm_printer *m); +void intel_guc_busyness_park(struct intel_gt *gt); +void intel_guc_busyness_unpark(struct intel_gt *gt); bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index ff4b6869b80b..d10b227ac4aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -54,65 +54,6 @@ void intel_huc_init_early(struct intel_huc *huc) } } -static int intel_huc_rsa_data_create(struct intel_huc *huc) -{ - struct intel_gt *gt = huc_to_gt(huc); - struct intel_guc *guc = >->uc.guc; - struct i915_vma *vma; - size_t copied; - void *vaddr; - int err; - - err = i915_inject_probe_error(gt->i915, -ENXIO); - if (err) - return err; - - /* - * HuC firmware will sit above GUC_GGTT_TOP and will not map - * through GTT. Unfortunately, this means GuC cannot perform - * the HuC auth. as the rsa offset now falls within the GuC - * inaccessible range. We resort to perma-pinning an additional - * vma within the accessible range that only contains the rsa - * signature. The GuC can use this extra pinning to perform - * the authentication since its GGTT offset will be GuC - * accessible. - */ - GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE); - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map_unlocked(vma->obj, - i915_coherent_map_type(gt->i915, - vma->obj, true)); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - err = PTR_ERR(vaddr); - goto unpin_out; - } - - copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); - i915_gem_object_unpin_map(vma->obj); - - if (copied < huc->fw.rsa_size) { - err = -ENOMEM; - goto unpin_out; - } - - huc->rsa_data = vma; - - return 0; - -unpin_out: - i915_vma_unpin_and_release(&vma, 0); - return err; -} - -static void intel_huc_rsa_data_destroy(struct intel_huc *huc) -{ - i915_vma_unpin_and_release(&huc->rsa_data, 0); -} - int intel_huc_init(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -122,21 +63,10 @@ int intel_huc_init(struct intel_huc *huc) if (err) goto out; - /* - * HuC firmware image is outside GuC accessible range. - * Copy the RSA signature out of the image into - * a perma-pinned region set aside for it - */ - err = intel_huc_rsa_data_create(huc); - if (err) - goto out_fini; - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE); return 0; -out_fini: - intel_uc_fw_fini(&huc->fw); out: i915_probe_error(i915, "failed with %d\n", err); return err; @@ -147,7 +77,6 @@ void intel_huc_fini(struct intel_huc *huc) if (!intel_uc_fw_is_loadable(&huc->fw)) return; - intel_huc_rsa_data_destroy(huc); intel_uc_fw_fini(&huc->fw); } @@ -177,7 +106,7 @@ int intel_huc_auth(struct intel_huc *huc) goto fail; ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, huc->rsa_data)); + intel_guc_ggtt_offset(guc, huc->fw.rsa_data)); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); goto fail; @@ -199,7 +128,7 @@ int intel_huc_auth(struct intel_huc *huc) fail: i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index daee43b661d4..ae8c8a6c8cc8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -15,8 +15,6 @@ struct intel_huc { struct intel_uc_fw fw; /* HuC-specific additions */ - struct i915_vma *rsa_data; - struct { i915_reg_t reg; u32 mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 2fef3b0bbe95..09ed29df67bc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -8,6 +8,7 @@ #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" +#include "gt/intel_rps.h" #include "intel_uc.h" #include "i915_drv.h" @@ -35,7 +36,7 @@ static void uc_expand_default_options(struct intel_uc *uc) } /* Intermediate platforms are HuC authentication only */ - if (IS_ALDERLAKE_S(i915)) { + if (IS_ALDERLAKE_S(i915) && !IS_ADLS_RPLS(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; } @@ -462,6 +463,8 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + intel_rps_raise_unslice(&uc_to_gt(uc)->rps); + while (attempts--) { /* * Always reset the GuC just before (re)loading, so @@ -499,6 +502,9 @@ static int __uc_init_hw(struct intel_uc *uc) ret = intel_guc_slpc_enable(&guc->slpc); if (ret) goto err_submission; + } else { + /* Restore GT back to RPn for non-SLPC path */ + intel_rps_lower_unslice(&uc_to_gt(uc)->rps); } drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", @@ -529,6 +535,9 @@ err_submission: err_log_capture: __uc_capture_load_err_log(uc); err_out: + /* Return GT back to RPn */ + intel_rps_lower_unslice(&uc_to_gt(uc)->rps); + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3aa87be4f2e4..a5af05bde6f2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,22 +48,39 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same * firmware as TGL. */ -#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ - fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0)) +#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0)) \ + fw_def(DG1, 0, guc_def(dg1, 62, 0, 0)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0)) + +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ + fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ + fw_def(ROCKETLAKE, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) \ + fw_def(JASPERLAKE, 0, huc_def(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, huc_def(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ @@ -79,11 +96,11 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) /* All blobs need to be declared via MODULE_FIRMWARE() */ -#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ - MODULE_FIRMWARE(guc_); \ - MODULE_FIRMWARE(huc_); +#define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \ + MODULE_FIRMWARE(uc_); -INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) /* The below structs and macros are used to iterate across the list of blobs */ struct __packed uc_fw_blob { @@ -106,31 +123,47 @@ struct __packed uc_fw_blob { struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ - const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES]; + const struct uc_fw_blob blob; }; -#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \ +#define MAKE_FW_LIST(platform_, revid_, uc_) \ { \ .p = INTEL_##platform_, \ .rev = revid_, \ - .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \ - .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \ + .blob = uc_, \ }, +struct fw_blobs_by_type { + const struct uc_fw_platform_requirement *blobs; + u32 count; +}; + static void __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { - static const struct uc_fw_platform_requirement fw_blobs[] = { - INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB) + static const struct uc_fw_platform_requirement blobs_guc[] = { + INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) + }; + static const struct uc_fw_platform_requirement blobs_huc[] = { + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) }; + static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { + [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, + [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + }; + static const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; + u32 fw_count; u8 rev = INTEL_REVID(i915); int i; - for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) { + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); + fw_blobs = blobs_all[uc_fw->type].blobs; + fw_count = blobs_all[uc_fw->type].count; + + for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { - const struct uc_fw_blob *blob = - &fw_blobs[i].blobs[uc_fw->type]; + const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; uc_fw->major_ver_wanted = blob->major; uc_fw->minor_ver_wanted = blob->minor; @@ -140,7 +173,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) /* make sure the list is ordered as expected */ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { - for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) { + for (i = 1; i < fw_count; i++) { if (fw_blobs[i].p < fw_blobs[i - 1].p) continue; @@ -322,13 +355,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); /* now RSA */ - if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) { - drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, - css->key_size_dw, UOS_RSA_SCRATCH_COUNT); - err = -EPROTO; - goto fail; - } uc_fw->rsa_size = css->key_size_dw * sizeof(u32); /* At least, it should have header, uCode and RSA. Size of all three. */ @@ -540,10 +566,79 @@ fail: i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return err; } +static inline bool uc_fw_need_rsa_in_memory(struct intel_uc_fw *uc_fw) +{ + /* + * The HW reads the GuC RSA from memory if the key size is > 256 bytes, + * while it reads it from the 64 RSA registers if it is smaller. + * The HuC RSA is always read from memory. + */ + return uc_fw->type == INTEL_UC_FW_TYPE_HUC || uc_fw->rsa_size > 256; +} + +static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct i915_vma *vma; + size_t copied; + void *vaddr; + int err; + + err = i915_inject_probe_error(gt->i915, -ENXIO); + if (err) + return err; + + if (!uc_fw_need_rsa_in_memory(uc_fw)) + return 0; + + /* + * uC firmwares will sit above GUC_GGTT_TOP and will not map through + * GGTT. Unfortunately, this means that the GuC HW cannot perform the uC + * authentication from memory, as the RSA offset now falls within the + * GuC inaccessible range. We resort to perma-pinning an additional vma + * within the accessible range that only contains the RSA signature. + * The GuC HW can use this extra pinning to perform the authentication + * since its GGTT offset will be GuC accessible. + */ + GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE); + vma = intel_guc_allocate_vma(>->uc.guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map_unlocked(vma->obj, + i915_coherent_map_type(gt->i915, vma->obj, true)); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + err = PTR_ERR(vaddr); + goto unpin_out; + } + + copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size); + i915_gem_object_unpin_map(vma->obj); + + if (copied < uc_fw->rsa_size) { + err = -ENOMEM; + goto unpin_out; + } + + uc_fw->rsa_data = vma; + + return 0; + +unpin_out: + i915_vma_unpin_and_release(&vma, 0); + return err; +} + +static void uc_fw_rsa_data_destroy(struct intel_uc_fw *uc_fw) +{ + i915_vma_unpin_and_release(&uc_fw->rsa_data, 0); +} + int intel_uc_fw_init(struct intel_uc_fw *uc_fw) { int err; @@ -558,14 +653,29 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) if (err) { DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", intel_uc_fw_type_repr(uc_fw->type), err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL); + goto out; } + err = uc_fw_rsa_data_create(uc_fw); + if (err) { + DRM_DEBUG_DRIVER("%s fw rsa data creation failed, err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto out_unpin; + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(uc_fw->obj); +out: + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL); return err; } void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { + uc_fw_rsa_data_destroy(uc_fw); + if (i915_gem_object_has_pinned_pages(uc_fw->obj)) i915_gem_object_unpin_pages(uc_fw->obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 1e00bf65639e..d9d1dc0b4cbb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -32,11 +32,12 @@ struct intel_gt; * | | MISSING <--/ | \--> ERROR | * | fetch | V | * | | AVAILABLE | - * +------------+- | -+ + * +------------+- | \ -+ + * | | | \--> INIT FAIL | * | init | V | * | | /------> LOADABLE <----<-----------\ | * +------------+- \ / \ \ \ -+ - * | | FAIL <--< \--> TRANSFERRED \ | + * | | LOAD FAIL <--< \--> TRANSFERRED \ | * | upload | \ / \ / | * | | \---------/ \--> RUNNING | * +------------+---------------------------------------------------+ @@ -50,8 +51,9 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ - INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ + INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ }; @@ -84,6 +86,7 @@ struct intel_uc_fw { * or during a GT reset (mutex guarantees single threaded). */ struct i915_vma dummy; + struct i915_vma *rsa_data; /* * The firmware build process will generate a version header file with major and @@ -130,10 +133,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) return "ERROR"; case INTEL_UC_FIRMWARE_AVAILABLE: return "AVAILABLE"; + case INTEL_UC_FIRMWARE_INIT_FAIL: + return "INIT FAIL"; case INTEL_UC_FIRMWARE_LOADABLE: return "LOADABLE"; - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; + case INTEL_UC_FIRMWARE_LOAD_FAIL: + return "LOAD FAIL"; case INTEL_UC_FIRMWARE_TRANSFERRED: return "TRANSFERRED"; case INTEL_UC_FIRMWARE_RUNNING: @@ -155,7 +160,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) return -ENOENT; case INTEL_UC_FIRMWARE_ERROR: return -ENOEXEC; - case INTEL_UC_FIRMWARE_FAIL: + case INTEL_UC_FIRMWARE_INIT_FAIL: + case INTEL_UC_FIRMWARE_LOAD_FAIL: return -EIO; case INTEL_UC_FIRMWARE_SELECTED: return -ESTALE; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index fb0e4a7bd8ca..d3327b802b76 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -3,8 +3,21 @@ * Copyright �� 2021 Intel Corporation */ +#include "selftests/igt_spinner.h" #include "selftests/intel_scheduler_helpers.h" +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + + return err; +} + static struct i915_request *nop_user_request(struct intel_context *ce, struct i915_request *from) { @@ -110,12 +123,172 @@ err: return ret; } +/* + * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one + * + * This test creates a spinner which is used to block all subsequent submissions + * until it completes. Next, a loop creates a context and a NOP request each + * iteration until the guc_ids are exhausted (request creation returns -EAGAIN). + * The spinner is ended, unblocking all requests created in the loop. At this + * point all guc_ids are exhausted but are available to steal. Try to create + * another request which should successfully steal a guc_id. Wait on last + * request to complete, idle GPU, verify a guc_id was stolen via a counter, and + * exit the test. Test also artificially reduces the number of guc_ids so the + * test runs in a timely manner. + */ +static int intel_guc_steal_guc_ids(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; + int ret, sv, context_index = 0; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine; + struct intel_context **ce; + struct igt_spinner spin; + struct i915_request *spin_rq = NULL, *rq, *last = NULL; + int number_guc_id_stolen = guc->number_guc_id_stolen; + + ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL); + if (!ce) { + pr_err("Context array allocation failed\n"); + return -ENOMEM; + } + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + engine = intel_selftest_find_any_engine(gt); + sv = guc->submission_state.num_guc_ids; + guc->submission_state.num_guc_ids = 4096; + + /* Create spinner to block requests in below loop */ + ce[context_index] = intel_context_create(engine); + if (IS_ERR(ce[context_index])) { + ret = PTR_ERR(ce[context_index]); + ce[context_index] = NULL; + pr_err("Failed to create context: %d\n", ret); + goto err_wakeref; + } + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + pr_err("Failed to create spinner: %d\n", ret); + goto err_contexts; + } + spin_rq = igt_spinner_create_request(&spin, ce[context_index], + MI_ARB_CHECK); + if (IS_ERR(spin_rq)) { + ret = PTR_ERR(spin_rq); + pr_err("Failed to create spinner request: %d\n", ret); + goto err_contexts; + } + ret = request_add_spin(spin_rq, &spin); + if (ret) { + pr_err("Failed to add Spinner request: %d\n", ret); + goto err_spin_rq; + } + + /* Use all guc_ids */ + while (ret != -EAGAIN) { + ce[++context_index] = intel_context_create(engine); + if (IS_ERR(ce[context_index])) { + ret = PTR_ERR(ce[context_index--]); + ce[context_index] = NULL; + pr_err("Failed to create context: %d\n", ret); + goto err_spin_rq; + } + + rq = nop_user_request(ce[context_index], spin_rq); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + rq = NULL; + if (ret != -EAGAIN) { + pr_err("Failed to create request, %d: %d\n", + context_index, ret); + goto err_spin_rq; + } + } else { + if (last) + i915_request_put(last); + last = rq; + } + } + + /* Release blocked requests */ + igt_spinner_end(&spin); + ret = intel_selftest_wait_for_rq(spin_rq); + if (ret) { + pr_err("Spin request failed to complete: %d\n", ret); + i915_request_put(last); + goto err_spin_rq; + } + i915_request_put(spin_rq); + igt_spinner_fini(&spin); + spin_rq = NULL; + + /* Wait for last request */ + ret = i915_request_wait(last, 0, HZ * 30); + i915_request_put(last); + if (ret < 0) { + pr_err("Last request failed to complete: %d\n", ret); + goto err_spin_rq; + } + + /* Try to steal guc_id */ + rq = nop_user_request(ce[context_index], NULL); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret); + goto err_spin_rq; + } + + /* Wait for request with stolen guc_id */ + ret = i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + if (ret < 0) { + pr_err("Request with stolen guc_id failed to complete: %d\n", + ret); + goto err_spin_rq; + } + + /* Wait for idle */ + ret = intel_gt_wait_for_idle(gt, HZ * 30); + if (ret < 0) { + pr_err("GT failed to idle: %d\n", ret); + goto err_spin_rq; + } + + /* Verify a guc_id was stolen */ + if (guc->number_guc_id_stolen == number_guc_id_stolen) { + pr_err("No guc_id was stolen"); + ret = -EINVAL; + } else { + ret = 0; + } + +err_spin_rq: + if (spin_rq) { + igt_spinner_end(&spin); + intel_selftest_wait_for_rq(spin_rq); + i915_request_put(spin_rq); + igt_spinner_fini(&spin); + intel_gt_wait_for_idle(gt, HZ * 30); + } +err_contexts: + for (; context_index >= 0 && ce[context_index]; --context_index) + intel_context_put(ce[context_index]); +err_wakeref: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + kfree(ce); + guc->submission_state.num_guc_ids = sv; + + return ret; +} + int intel_guc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(intel_guc_scrub_ctbs), + SUBTEST(intel_guc_steal_guc_ids), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c index 50953c8e8b53..1297ddbf7f88 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -167,7 +167,7 @@ int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(intel_guc_multi_lrc_basic), }; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); if (intel_gt_is_wedged(gt)) return 0; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 11a8baba6822..9ec064199364 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -427,7 +427,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->tiled = !!(val & SPRITE_TILED); color_order = !!(val & SPRITE_RGB_ORDER_RGBX); - yuv_order = (val & SPRITE_YUV_BYTE_ORDER_MASK) >> + yuv_order = (val & SPRITE_YUV_ORDER_MASK) >> _SPRITE_YUV_ORDER_SHIFT; fmt = (val & SPRITE_PIXFORMAT_MASK) >> _SPRITE_FMT_SHIFT; diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 53d0cb327539..99d1781fa5f0 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -446,17 +446,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) return (e->val64 != 0); else - return (e->val64 & _PAGE_PRESENT); + return (e->val64 & GEN8_PAGE_PRESENT); } static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) { - e->val64 &= ~_PAGE_PRESENT; + e->val64 &= ~GEN8_PAGE_PRESENT; } static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) { - e->val64 |= _PAGE_PRESENT; + e->val64 |= GEN8_PAGE_PRESENT; } static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) @@ -2439,7 +2439,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, /* The entry parameters like present/writeable/cache type * set to the same as i915's scratch page tree. */ - se.val64 |= _PAGE_PRESENT | _PAGE_RW; + se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (type == GTT_TYPE_PPGTT_PDE_PT) se.val64 |= PPAT_CACHED; @@ -2896,7 +2896,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt) offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; for (idx = 0; idx < num_low; idx++) { pte = mm->ggtt_mm.host_ggtt_aperture[idx]; - if (pte & _PAGE_PRESENT) + if (pte & GEN8_PAGE_PRESENT) write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); } @@ -2904,7 +2904,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt) offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; for (idx = 0; idx < num_hi; idx++) { pte = mm->ggtt_mm.host_ggtt_hidden[idx]; - if (pte & _PAGE_PRESENT) + if (pte & GEN8_PAGE_PRESENT) write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); } } diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index cbac409f6c8a..f0b69e4dcb52 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -205,7 +205,7 @@ int intel_gvt_init_device(struct drm_i915_private *i915) spin_lock_init(&gvt->scheduler.mmio_context_lock); mutex_init(&gvt->lock); mutex_init(&gvt->sched_lock); - gvt->gt = &i915->gt; + gvt->gt = to_gt(i915); i915->gvt = gvt; init_device_info(gvt); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6c804102528b..42a0c9ae0a73 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1386,7 +1386,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) enum intel_engine_id i; int ret; - ppgtt = i915_ppgtt_create(&i915->gt, I915_BO_ALLOC_PM_EARLY); + ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 3103c1e1fd14..ee2b3a375362 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -426,8 +426,9 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active) return true; } -int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) +int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) { + struct dma_fence *fence = &rq->fence; struct i915_active_fence *active; int err; @@ -436,7 +437,7 @@ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) if (err) return err; - active = active_instance(ref, idx); + active = active_instance(ref, i915_request_timeline(rq)->fence_context); if (!active) { err = -ENOMEM; goto out; @@ -477,29 +478,6 @@ __i915_active_set_fence(struct i915_active *ref, return prev; } -static struct i915_active_fence * -__active_fence(struct i915_active *ref, u64 idx) -{ - struct active_node *it; - - it = __active_lookup(ref, idx); - if (unlikely(!it)) { /* Contention with parallel tree builders! */ - spin_lock_irq(&ref->tree_lock); - it = __active_lookup(ref, idx); - spin_unlock_irq(&ref->tree_lock); - } - GEM_BUG_ON(!it); /* slot must be preallocated */ - - return &it->base; -} - -struct dma_fence * -__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) -{ - /* Only valid while active, see i915_active_acquire_for_context() */ - return __i915_active_set_fence(ref, __active_fence(ref, idx), fence); -} - struct dma_fence * i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) { diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 5fcdb0e2bc9e..7eb44132183a 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -164,26 +164,11 @@ void __i915_active_init(struct i915_active *ref, __i915_active_init(ref, active, retire, flags, &__mkey, &__wkey); \ } while (0) -struct dma_fence * -__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence); -int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence); - -static inline int -i915_active_add_request(struct i915_active *ref, struct i915_request *rq) -{ - return i915_active_ref(ref, - i915_request_timeline(rq)->fence_context, - &rq->fence); -} +int i915_active_add_request(struct i915_active *ref, struct i915_request *rq); struct dma_fence * i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); -static inline bool i915_active_has_exclusive(struct i915_active *ref) -{ - return rcu_access_pointer(ref->excl.fence); -} - int __i915_active_wait(struct i915_active *ref, int state); static inline int i915_active_wait(struct i915_active *ref) { diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index c149f348a972..b02a78ac87db 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -15,8 +15,6 @@ #include <linux/rcupdate.h> #include <linux/workqueue.h> -#include "i915_utils.h" - struct i915_active_fence { struct dma_fence __rcu *fence; struct dma_fence_cb cb; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fe638b5da7c0..e0e052cdf8b8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -48,7 +48,6 @@ #include "i915_debugfs_params.h" #include "i915_irq.h" #include "i915_scheduler.h" -#include "i915_trace.h" #include "intel_pm.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) @@ -65,7 +64,8 @@ static int i915_capabilities(struct seq_file *m, void *data) intel_device_info_print_static(INTEL_INFO(i915), &p); intel_device_info_print_runtime(RUNTIME_INFO(i915), &p); - intel_gt_info_print(&i915->gt.info, &p); + i915_print_iommu_status(i915, &p); + intel_gt_info_print(&to_gt(i915)->info, &p); intel_driver_caps_print(&i915->caps, &p); kernel_param_lock(THIS_MODULE); @@ -293,7 +293,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) gpu = NULL; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - gpu = i915_gpu_coredump(&i915->gt, ALL_ENGINES); + gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -351,7 +351,7 @@ static const struct file_operations i915_error_state_fops = { static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct drm_printer p = drm_seq_file_printer(m); intel_gt_pm_frequency_dump(gt, &p); @@ -439,11 +439,11 @@ static int i915_swizzle_info(struct seq_file *m, void *data) static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &to_gt(dev_priv)->rps; seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); - seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake)); + seq_printf(m, "GPU busy? %s\n", yesno(to_gt(dev_priv)->awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -476,7 +476,7 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) seq_printf(m, "Runtime power status: %s\n", enableddisabled(!dev_priv->power_domains.init_wakeref)); - seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake)); + seq_printf(m, "GPU idle: %s\n", yesno(!to_gt(dev_priv)->awake)); seq_printf(m, "IRQs disabled: %s\n", yesno(!intel_irqs_enabled(dev_priv))); #ifdef CONFIG_PM @@ -508,18 +508,18 @@ static int i915_engine_info(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(&i915->runtime_pm); seq_printf(m, "GT awake? %s [%d], %llums\n", - yesno(i915->gt.awake), - atomic_read(&i915->gt.wakeref.count), - ktime_to_ms(intel_gt_get_awake_time(&i915->gt))); + yesno(to_gt(i915)->awake), + atomic_read(&to_gt(i915)->wakeref.count), + ktime_to_ms(intel_gt_get_awake_time(to_gt(i915)))); seq_printf(m, "CS timestamp frequency: %u Hz, %d ns\n", - i915->gt.clock_frequency, - i915->gt.clock_period_ns); + to_gt(i915)->clock_frequency, + to_gt(i915)->clock_period_ns); p = drm_seq_file_printer(m); for_each_uabi_engine(engine, i915) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_gt_show_timelines(&i915->gt, &p, i915_request_show_with_schedule); + intel_gt_show_timelines(to_gt(i915), &p, i915_request_show_with_schedule); intel_runtime_pm_put(&i915->runtime_pm, wakeref); @@ -558,14 +558,14 @@ static int i915_wedged_get(void *data, u64 *val) { struct drm_i915_private *i915 = data; - return intel_gt_debugfs_reset_show(&i915->gt, val); + return intel_gt_debugfs_reset_show(to_gt(i915), val); } static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - return intel_gt_debugfs_reset_store(&i915->gt, val); + return intel_gt_debugfs_reset_store(to_gt(i915), val); } DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, @@ -581,7 +581,7 @@ i915_perf_noa_delay_set(void *data, u64 val) * This would lead to infinite waits as we're doing timestamp * difference on the CS with only 32bits. */ - if (intel_gt_ns_to_clock_interval(&i915->gt, val) > U32_MAX) + if (intel_gt_ns_to_clock_interval(to_gt(i915), val) > U32_MAX) return -EINVAL; atomic64_set(&i915->perf.noa_programming_delay, val); @@ -666,16 +666,18 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + unsigned int flags; int ret; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); - ret = gt_drop_caches(&i915->gt, val); + ret = gt_drop_caches(to_gt(i915), val); if (ret) return ret; fs_reclaim_acquire(GFP_KERNEL); + flags = memalloc_noreclaim_save(); if (val & DROP_BOUND) i915_gem_shrink(NULL, i915, LONG_MAX, NULL, I915_SHRINK_BOUND); @@ -684,6 +686,7 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_SHRINK_ALL) i915_gem_shrink_all(i915); + memalloc_noreclaim_restore(flags); fs_reclaim_release(GFP_KERNEL); if (val & DROP_RCU) @@ -702,7 +705,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); return intel_sseu_status(m, gt); } @@ -711,14 +714,14 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - return intel_gt_pm_debugfs_forcewake_user_open(&i915->gt); + return intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915)); } static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - return intel_gt_pm_debugfs_forcewake_user_release(&i915->gt); + return intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915)); } static const struct file_operations i915_forcewake_fops = { diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 20424275d41e..783c8676eee2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -40,8 +40,8 @@ static int notify_guc(struct drm_i915_private *i915) { int ret = 0; - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - ret = intel_guc_global_policies_update(&i915->gt.uc.guc); + if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) + ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc); return ret; } diff --git a/drivers/gpu/drm/i915/i915_deps.c b/drivers/gpu/drm/i915/i915_deps.c new file mode 100644 index 000000000000..999210b37325 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_deps.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/dma-fence.h> +#include <linux/slab.h> + +#include <drm/ttm/ttm_bo_api.h> + +#include "i915_deps.h" + +/** + * DOC: Set of utilities to dynamically collect dependencies into a + * structure which is fed into the GT migration code. + * + * Once we can do async unbinding, this is also needed to coalesce + * the migration fence with the unbind fences if these are coalesced + * post-migration. + * + * While collecting the individual dependencies, we store the refcounted + * struct dma_fence pointers in a realloc-managed pointer array, since + * that can be easily fed into a dma_fence_array. Other options are + * available, like for example an xarray for similarity with drm/sched. + * Can be changed easily if needed. + * + * A struct i915_deps need to be initialized using i915_deps_init(). + * If i915_deps_add_dependency() or i915_deps_add_resv() return an + * error code they will internally call i915_deps_fini(), which frees + * all internal references and allocations. + */ + +/* Min number of fence pointers in the array when an allocation occurs. */ +#define I915_DEPS_MIN_ALLOC_CHUNK 8U + +static void i915_deps_reset_fences(struct i915_deps *deps) +{ + if (deps->fences != &deps->single) + kfree(deps->fences); + deps->num_deps = 0; + deps->fences_size = 1; + deps->fences = &deps->single; +} + +/** + * i915_deps_init - Initialize an i915_deps structure + * @deps: Pointer to the i915_deps structure to initialize. + * @gfp: The allocation mode for subsequenst allocations. + */ +void i915_deps_init(struct i915_deps *deps, gfp_t gfp) +{ + deps->fences = NULL; + deps->gfp = gfp; + i915_deps_reset_fences(deps); +} + +/** + * i915_deps_fini - Finalize an i915_deps structure + * @deps: Pointer to the i915_deps structure to finalize. + * + * This function drops all fence references taken, conditionally frees and + * then resets the fences array. + */ +void i915_deps_fini(struct i915_deps *deps) +{ + unsigned int i; + + for (i = 0; i < deps->num_deps; ++i) + dma_fence_put(deps->fences[i]); + + if (deps->fences != &deps->single) + kfree(deps->fences); +} + +static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence, + const struct ttm_operation_ctx *ctx) +{ + int ret; + + if (deps->num_deps >= deps->fences_size) { + unsigned int new_size = 2 * deps->fences_size; + struct dma_fence **new_fences; + + new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK); + new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp); + if (!new_fences) + goto sync; + + memcpy(new_fences, deps->fences, + deps->fences_size * sizeof(*new_fences)); + swap(new_fences, deps->fences); + if (new_fences != &deps->single) + kfree(new_fences); + deps->fences_size = new_size; + } + deps->fences[deps->num_deps++] = dma_fence_get(fence); + return 0; + +sync: + if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) { + ret = -EBUSY; + goto unref; + } + + ret = dma_fence_wait(fence, ctx->interruptible); + if (ret) + goto unref; + + ret = fence->error; + if (ret) + goto unref; + + return 0; + +unref: + i915_deps_fini(deps); + return ret; +} + +/** + * i915_deps_sync - Wait for all the fences in the dependency collection + * @deps: Pointer to the i915_deps structure the fences of which to wait for. + * @ctx: Pointer to a struct ttm_operation_ctx indicating how the waits + * should be performed. + * + * This function waits for fences in the dependency collection. If it + * encounters an error during the wait or a fence error, the wait for + * further fences is aborted and the error returned. + * + * Return: Zero if successful, Negative error code on error. + */ +int i915_deps_sync(const struct i915_deps *deps, const struct ttm_operation_ctx *ctx) +{ + struct dma_fence **fences = deps->fences; + unsigned int i; + int ret = 0; + + for (i = 0; i < deps->num_deps; ++i, ++fences) { + if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) { + ret = -EBUSY; + break; + } + + ret = dma_fence_wait(*fences, ctx->interruptible); + if (!ret) + ret = (*fences)->error; + if (ret) + break; + } + + return ret; +} + +/** + * i915_deps_add_dependency - Add a fence to the dependency collection + * @deps: Pointer to the i915_deps structure a fence is to be added to. + * @fence: The fence to add. + * @ctx: Pointer to a struct ttm_operation_ctx indicating how waits are to + * be performed if waiting. + * + * Adds a fence to the dependency collection, and takes a reference on it. + * If the fence context is not zero and there was a later fence from the + * same fence context already added, then the fence is not added to the + * dependency collection. If the fence context is not zero and there was + * an earlier fence already added, then the fence will replace the older + * fence from the same context and the reference on the earlier fence will + * be dropped. + * If there is a failure to allocate memory to accommodate the new fence to + * be added, the new fence will instead be waited for and an error may + * be returned; depending on the value of @ctx, or if there was a fence + * error. If an error was returned, the dependency collection will be + * finalized and all fence reference dropped. + * + * Return: 0 if success. Negative error code on error. + */ +int i915_deps_add_dependency(struct i915_deps *deps, + struct dma_fence *fence, + const struct ttm_operation_ctx *ctx) +{ + unsigned int i; + int ret; + + if (!fence) + return 0; + + if (dma_fence_is_signaled(fence)) { + ret = fence->error; + if (ret) + i915_deps_fini(deps); + return ret; + } + + for (i = 0; i < deps->num_deps; ++i) { + struct dma_fence *entry = deps->fences[i]; + + if (!entry->context || entry->context != fence->context) + continue; + + if (dma_fence_is_later(fence, entry)) { + dma_fence_put(entry); + deps->fences[i] = dma_fence_get(fence); + } + + return 0; + } + + return i915_deps_grow(deps, fence, ctx); +} + +/** + * i915_deps_add_resv - Add the fences of a reservation object to a dependency + * collection. + * @deps: Pointer to the i915_deps structure a fence is to be added to. + * @resv: The reservation object, then fences of which to add. + * @ctx: Pointer to a struct ttm_operation_ctx indicating how waits are to + * be performed if waiting. + * + * Calls i915_deps_add_depencency() on the indicated fences of @resv. + * + * Return: Zero on success. Negative error code on error. + */ +int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv, + const struct ttm_operation_ctx *ctx) +{ + struct dma_resv_iter iter; + struct dma_fence *fence; + + dma_resv_assert_held(resv); + dma_resv_for_each_fence(&iter, resv, true, fence) { + int ret = i915_deps_add_dependency(deps, fence, ctx); + + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_deps.h b/drivers/gpu/drm/i915/i915_deps.h new file mode 100644 index 000000000000..d76c0106c910 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_deps.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _I915_DEPS_H_ +#define _I915_DEPS_H_ + +#include <linux/types.h> + +struct ttm_operation_ctx; +struct dma_fence; +struct dma_resv; + +/** + * struct i915_deps - Collect dependencies into a single dma-fence + * @single: Storage for pointer if the collection is a single fence. + * @fences: Allocated array of fence pointers if more than a single fence; + * otherwise points to the address of @single. + * @num_deps: Current number of dependency fences. + * @fences_size: Size of the @fences array in number of pointers. + * @gfp: Allocation mode. + */ +struct i915_deps { + struct dma_fence *single; + struct dma_fence **fences; + unsigned int num_deps; + unsigned int fences_size; + gfp_t gfp; +}; + +void i915_deps_init(struct i915_deps *deps, gfp_t gfp); + +void i915_deps_fini(struct i915_deps *deps); + +int i915_deps_add_dependency(struct i915_deps *deps, + struct dma_fence *fence, + const struct ttm_operation_ctx *ctx); + +int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv, + const struct ttm_operation_ctx *ctx); + +int i915_deps_sync(const struct i915_deps *deps, + const struct ttm_operation_ctx *ctx); +#endif diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index b394b8702f1c..95174938b160 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -82,7 +82,6 @@ #include "i915_suspend.h" #include "i915_switcheroo.h" #include "i915_sysfs.h" -#include "i915_trace.h" #include "i915_vgpu.h" #include "intel_dram.h" #include "intel_gvt.h" @@ -292,7 +291,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) static void sanitize_gpu(struct drm_i915_private *i915) { if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) - __intel_gt_reset(&i915->gt, ALL_ENGINES); + __intel_gt_reset(to_gt(i915), ALL_ENGINES); } /** @@ -315,8 +314,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_device_info_subplatform_init(dev_priv); intel_step_init(dev_priv); + intel_gt_init_early(to_gt(dev_priv), dev_priv); intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug); - intel_uncore_init_early(&dev_priv->uncore, dev_priv); + intel_uncore_init_early(&dev_priv->uncore, to_gt(dev_priv)); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); @@ -347,7 +347,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_wopcm_init_early(&dev_priv->wopcm); - intel_gt_init_early(&dev_priv->gt, dev_priv); + __intel_gt_init_early(to_gt(dev_priv), dev_priv); i915_gem_init_early(dev_priv); @@ -368,7 +368,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) err_gem: i915_gem_cleanup_early(dev_priv); - intel_gt_driver_late_release(&dev_priv->gt); + intel_gt_driver_late_release(to_gt(dev_priv)); intel_region_ttm_device_fini(dev_priv); err_ttm: vlv_suspend_cleanup(dev_priv); @@ -387,7 +387,7 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) intel_irq_fini(dev_priv); intel_power_domains_cleanup(dev_priv); i915_gem_cleanup_early(dev_priv); - intel_gt_driver_late_release(&dev_priv->gt); + intel_gt_driver_late_release(to_gt(dev_priv)); intel_region_ttm_device_fini(dev_priv); vlv_suspend_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); @@ -418,15 +418,19 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) if (ret < 0) return ret; - ret = intel_uncore_init_mmio(&dev_priv->uncore); + ret = intel_uncore_setup_mmio(&dev_priv->uncore); if (ret < 0) goto err_bridge; + ret = intel_uncore_init_mmio(&dev_priv->uncore); + if (ret) + goto err_mmio; + /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev_priv); intel_device_info_runtime_init(dev_priv); - ret = intel_gt_init_mmio(&dev_priv->gt); + ret = intel_gt_init_mmio(to_gt(dev_priv)); if (ret) goto err_uncore; @@ -438,6 +442,8 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) err_uncore: intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); +err_mmio: + intel_uncore_cleanup_mmio(&dev_priv->uncore); err_bridge: pci_dev_put(dev_priv->bridge_dev); @@ -452,6 +458,7 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); + intel_uncore_cleanup_mmio(&dev_priv->uncore); pci_dev_put(dev_priv->bridge_dev); } @@ -580,9 +587,9 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; - intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt); + intel_gt_init_hw_early(to_gt(dev_priv), &dev_priv->ggtt); - ret = intel_gt_probe_lmem(&dev_priv->gt); + ret = intel_gt_probe_lmem(to_gt(dev_priv)); if (ret) goto err_mem_regions; @@ -695,7 +702,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Depends on sysfs having been initialized */ i915_perf_register(dev_priv); - intel_gt_driver_register(&dev_priv->gt); + intel_gt_driver_register(to_gt(dev_priv)); intel_display_driver_register(dev_priv); @@ -723,7 +730,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_display_driver_unregister(dev_priv); - intel_gt_driver_unregister(&dev_priv->gt); + intel_gt_driver_unregister(to_gt(dev_priv)); i915_perf_unregister(dev_priv); i915_pmu_unregister(dev_priv); @@ -734,6 +741,12 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_gem_driver_unregister(dev_priv); } +void +i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p) +{ + drm_printf(p, "iommu: %s\n", enableddisabled(intel_vtd_active(i915))); +} + static void i915_welcome_messages(struct drm_i915_private *dev_priv) { if (drm_debug_enabled(DRM_UT_DRIVER)) { @@ -749,7 +762,8 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv) intel_device_info_print_static(INTEL_INFO(dev_priv), &p); intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p); - intel_gt_info_print(&dev_priv->gt.info, &p); + i915_print_iommu_status(dev_priv, &p); + intel_gt_info_print(&to_gt(dev_priv)->info, &p); } if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) @@ -810,7 +824,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return PTR_ERR(i915); /* Disable nuclear pageflip by default on pre-ILK */ - if (!i915->params.nuclear_pageflip && match_info->graphics_ver < 5) + if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5) i915->drm.driver_features &= ~DRIVER_ATOMIC; /* @@ -1371,7 +1385,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_uncore_resume_early(&dev_priv->uncore); - intel_gt_check_and_clear_faults(&dev_priv->gt); + intel_gt_check_and_clear_faults(to_gt(dev_priv)); intel_display_power_resume_early(dev_priv); @@ -1554,7 +1568,7 @@ static int intel_runtime_suspend(struct device *kdev) */ i915_gem_runtime_suspend(dev_priv); - intel_gt_runtime_suspend(&dev_priv->gt); + intel_gt_runtime_suspend(to_gt(dev_priv)); intel_runtime_pm_disable_interrupts(dev_priv); @@ -1570,7 +1584,7 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_enable_interrupts(dev_priv); - intel_gt_runtime_resume(&dev_priv->gt); + intel_gt_runtime_resume(to_gt(dev_priv)); enable_rpm_wakeref_asserts(rpm); @@ -1658,7 +1672,7 @@ static int intel_runtime_resume(struct device *kdev) * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). */ - intel_gt_runtime_resume(&dev_priv->gt); + intel_gt_runtime_resume(to_gt(dev_priv)); /* * On VLV/CHV display interrupts are part of the display diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4f58e1456505..0c70ab08fc0c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -89,6 +89,7 @@ #include "intel_device_info.h" #include "intel_memory_region.h" #include "intel_pch.h" +#include "intel_pm_types.h" #include "intel_runtime_pm.h" #include "intel_step.h" #include "intel_uncore.h" @@ -116,30 +117,6 @@ struct drm_i915_gem_object; -enum hpd_pin { - HPD_NONE = 0, - HPD_TV = HPD_NONE, /* TV is known to be unreliable */ - HPD_CRT, - HPD_SDVO_B, - HPD_SDVO_C, - HPD_PORT_A, - HPD_PORT_B, - HPD_PORT_C, - HPD_PORT_D, - HPD_PORT_E, - HPD_PORT_TC1, - HPD_PORT_TC2, - HPD_PORT_TC3, - HPD_PORT_TC4, - HPD_PORT_TC5, - HPD_PORT_TC6, - - HPD_NUM_PINS -}; - -#define for_each_hpd_pin(__pin) \ - for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) - /* Threshold == 5 for long IRQs, 50 for short */ #define HPD_STORM_DEFAULT_THRESHOLD 50 @@ -190,8 +167,6 @@ struct i915_hotplug { I915_GEM_DOMAIN_VERTEX) struct drm_i915_private; -struct i915_mm_struct; -struct i915_mmu_object; struct drm_i915_file_private { struct drm_i915_private *dev_priv; @@ -401,106 +376,8 @@ struct drm_i915_display_funcs { void (*commit_modeset_enables)(struct intel_atomic_state *state); }; -struct intel_fbc_funcs; - #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */ -struct intel_fbc { - struct drm_i915_private *i915; - const struct intel_fbc_funcs *funcs; - - /* This is always the inner lock when overlapping with struct_mutex and - * it's the outer lock when overlapping with stolen_lock. */ - struct mutex lock; - unsigned int possible_framebuffer_bits; - unsigned int busy_bits; - struct intel_crtc *crtc; - - struct drm_mm_node compressed_fb; - struct drm_mm_node compressed_llb; - - u8 limit; - - bool false_color; - - bool active; - bool activated; - bool flip_pending; - - bool underrun_detected; - struct work_struct underrun_work; - - /* - * Due to the atomic rules we can't access some structures without the - * appropriate locking, so we cache information here in order to avoid - * these problems. - */ - struct intel_fbc_state_cache { - struct { - unsigned int mode_flags; - u32 hsw_bdw_pixel_rate; - } crtc; - - struct { - unsigned int rotation; - int src_w; - int src_h; - bool visible; - /* - * Display surface base address adjustement for - * pageflips. Note that on gen4+ this only adjusts up - * to a tile, offsets within a tile are handled in - * the hw itself (with the TILEOFF register). - */ - int adjusted_x; - int adjusted_y; - - u16 pixel_blend_mode; - } plane; - - struct { - const struct drm_format_info *format; - unsigned int stride; - u64 modifier; - } fb; - - unsigned int fence_y_offset; - u16 interval; - s8 fence_id; - bool psr2_active; - } state_cache; - - /* - * This structure contains everything that's relevant to program the - * hardware registers. When we want to figure out if we need to disable - * and re-enable FBC for a new configuration we just check if there's - * something different in the struct. The genx_fbc_activate functions - * are supposed to read from it in order to program the registers. - */ - struct intel_fbc_reg_params { - struct { - enum pipe pipe; - enum i9xx_plane_id i9xx_plane; - } crtc; - - struct { - const struct drm_format_info *format; - unsigned int stride; - u64 modifier; - } fb; - - unsigned int cfb_stride; - unsigned int cfb_size; - unsigned int fence_y_offset; - u16 override_cfb_stride; - u16 interval; - s8 fence_id; - bool plane_visible; - } params; - - const char *no_fbc_reason; -}; - /* * HIGH_RR is the highest eDP panel refresh rate read from EDID * LOW_RR is the lowest eDP panel refresh rate found from EDID @@ -537,7 +414,6 @@ struct i915_drrs { #define QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK (1<<8) struct intel_fbdev; -struct intel_fbc_work; struct intel_gmbus { struct i2c_adapter adapter; @@ -597,7 +473,7 @@ struct i915_gem_mm { * List of objects which are pending destruction. */ struct llist_head free_list; - struct work_struct free_work; + struct delayed_work free_work; /** * Count of objects pending destructions. Used to skip needlessly * waiting on an RCU barrier if no objects are waiting to be freed. @@ -732,69 +608,6 @@ struct intel_vbt_data { struct sdvo_device_mapping sdvo_mappings[2]; }; -enum intel_ddb_partitioning { - INTEL_DDB_PART_1_2, - INTEL_DDB_PART_5_6, /* IVB+ */ -}; - -struct ilk_wm_values { - u32 wm_pipe[3]; - u32 wm_lp[3]; - u32 wm_lp_spr[3]; - bool enable_fbc_wm; - enum intel_ddb_partitioning partitioning; -}; - -struct g4x_pipe_wm { - u16 plane[I915_MAX_PLANES]; - u16 fbc; -}; - -struct g4x_sr_wm { - u16 plane; - u16 cursor; - u16 fbc; -}; - -struct vlv_wm_ddl_values { - u8 plane[I915_MAX_PLANES]; -}; - -struct vlv_wm_values { - struct g4x_pipe_wm pipe[3]; - struct g4x_sr_wm sr; - struct vlv_wm_ddl_values ddl[3]; - u8 level; - bool cxsr; -}; - -struct g4x_wm_values { - struct g4x_pipe_wm pipe[2]; - struct g4x_sr_wm sr; - struct g4x_sr_wm hpll; - bool cxsr; - bool hpll_en; - bool fbc_en; -}; - -struct skl_ddb_entry { - u16 start, end; /* in number of blocks, 'end' is exclusive */ -}; - -static inline u16 skl_ddb_entry_size(const struct skl_ddb_entry *entry) -{ - return entry->end - entry->start; -} - -static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1, - const struct skl_ddb_entry *e2) -{ - if (e1->start == e2->start && e1->end == e2->end) - return true; - - return false; -} - struct i915_frontbuffer_tracking { spinlock_t lock; @@ -936,7 +749,7 @@ struct drm_i915_private { u32 pipestat_irq_mask[I915_MAX_PIPES]; struct i915_hotplug hotplug; - struct intel_fbc fbc; + struct intel_fbc *fbc; struct i915_drrs drrs; struct intel_opregion opregion; struct intel_vbt_data vbt; @@ -1031,9 +844,6 @@ struct drm_i915_private { /* Kernel Modesetting */ - struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES]; - struct intel_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES]; - /** * dpll and cdclk state is protected by connection_mutex * dpll.lock serializes intel_{prepare,enable,disable}_shared_dpll. @@ -1195,7 +1005,7 @@ struct drm_i915_private { struct i915_perf perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ - struct intel_gt gt; + struct intel_gt gt0; struct { struct i915_gem_contexts { @@ -1267,6 +1077,11 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) return pci_get_drvdata(pdev); } +static inline struct intel_gt *to_gt(struct drm_i915_private *i915) +{ + return &i915->gt0; +} + /* Simple iterator over all initialised engines */ #define for_each_engine(engine__, dev_priv__, id__) \ for ((id__) = 0; \ @@ -1324,15 +1139,15 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define IP_VER(ver, rel) ((ver) << 8 | (rel)) -#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) -#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \ - INTEL_INFO(i915)->graphics_rel) +#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics.ver) +#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics.ver, \ + INTEL_INFO(i915)->graphics.rel) #define IS_GRAPHICS_VER(i915, from, until) \ (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) -#define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver) -#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \ - INTEL_INFO(i915)->media_rel) +#define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver) +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \ + INTEL_INFO(i915)->media.rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) @@ -1345,15 +1160,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) -#define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step) +#define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) +#define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) #define IS_DISPLAY_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) -#define IS_GT_STEP(__i915, since, until) \ - (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \ - INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until)) +#define IS_GRAPHICS_STEP(__i915, since, until) \ + (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \ + INTEL_GRAPHICS_STEP(__i915) >= (since) && INTEL_GRAPHICS_STEP(__i915) < (until)) + +#define IS_MEDIA_STEP(__i915, since, until) \ + (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \ + INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until)) static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, @@ -1466,6 +1286,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11) +#define IS_ADLS_RPLS(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL_S) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -1526,15 +1348,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_TGL_Y(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) -#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until)) +#define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until)) -#define IS_KBL_GT_STEP(dev_priv, since, until) \ - (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until)) +#define IS_KBL_GRAPHICS_STEP(dev_priv, since, until) \ + (IS_KABYLAKE(dev_priv) && IS_GRAPHICS_STEP(dev_priv, since, until)) #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define IS_JSL_EHL_GT_STEP(p, since, until) \ - (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until)) +#define IS_JSL_EHL_GRAPHICS_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_GRAPHICS_STEP(p, since, until)) #define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \ (IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until)) @@ -1542,19 +1364,19 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_TIGERLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_TGL_UY_GT_STEP(__i915, since, until) \ +#define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \ ((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_TGL_GT_STEP(__i915, since, until) \ +#define IS_TGL_GRAPHICS_STEP(__i915, since, until) \ (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) #define IS_RKL_DISPLAY_STEP(p, since, until) \ (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until)) -#define IS_DG1_GT_STEP(p, since, until) \ - (IS_DG1(p) && IS_GT_STEP(p, since, until)) +#define IS_DG1_GRAPHICS_STEP(p, since, until) \ + (IS_DG1(p) && IS_GRAPHICS_STEP(p, since, until)) #define IS_DG1_DISPLAY_STEP(p, since, until) \ (IS_DG1(p) && IS_DISPLAY_STEP(p, since, until)) @@ -1562,20 +1384,20 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_ALDERLAKE_S(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_ADLS_GT_STEP(__i915, since, until) \ +#define IS_ADLS_GRAPHICS_STEP(__i915, since, until) \ (IS_ALDERLAKE_S(__i915) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) #define IS_ADLP_DISPLAY_STEP(__i915, since, until) \ (IS_ALDERLAKE_P(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) -#define IS_ADLP_GT_STEP(__i915, since, until) \ +#define IS_ADLP_GRAPHICS_STEP(__i915, since, until) \ (IS_ALDERLAKE_P(__i915) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_XEHPSDV_GT_STEP(__i915, since, until) \ - (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until)) +#define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ + (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) /* * DG2 hardware steppings are a bit unusual. The hardware design was forked @@ -1591,11 +1413,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, * and stepping-specific logic will be applied with a general DG2-wide stepping * number. */ -#define IS_DG2_GT_STEP(__i915, variant, since, until) \ +#define IS_DG2_GRAPHICS_STEP(__i915, variant, since, until) \ (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \ - IS_GT_STEP(__i915, since, until)) + IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_DG2_DISP_STEP(__i915, since, until) \ +#define IS_DG2_DISPLAY_STEP(__i915, since, until) \ (IS_DG2(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) @@ -1692,7 +1514,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_PSR_HW_TRACKING(dev_priv) \ (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) #define HAS_PSR2_SEL_FETCH(dev_priv) (GRAPHICS_VER(dev_priv) >= 12) -#define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->cpu_transcoder_mask & BIT(trans)) != 0) +#define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->display.cpu_transcoder_mask & BIT(trans)) != 0) #define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) @@ -1710,6 +1532,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_MSLICES(dev_priv) \ (INTEL_INFO(dev_priv)->has_mslices) +/* + * Set this flag, when platform requires 64K GTT page sizes or larger for + * device local memory access. Also this flag implies that we require or + * at least support the compact PT layout for the ppGTT when using the 64K + * GTT pages. + */ +#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) @@ -1723,7 +1553,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_PXP(dev_priv) ((IS_ENABLED(CONFIG_DRM_I915_PXP) && \ INTEL_INFO(dev_priv)->has_pxp) && \ - VDBOX_MASK(&dev_priv->gt)) + VDBOX_MASK(to_gt(dev_priv))) #define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) @@ -1737,9 +1567,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 -#define INTEL_NUM_PIPES(dev_priv) (hweight8(INTEL_INFO(dev_priv)->pipe_mask)) +#define INTEL_NUM_PIPES(dev_priv) (hweight8(INTEL_INFO(dev_priv)->display.pipe_mask)) -#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0) +#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->display.pipe_mask != 0) #define HAS_VRR(i915) (GRAPHICS_VER(i915) >= 11) @@ -1757,26 +1587,27 @@ static inline bool run_as_guest(void) #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -static inline bool intel_vtd_active(void) +static inline bool intel_vtd_active(struct drm_i915_private *i915) { -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (device_iommu_mapped(i915->drm.dev)) return true; -#endif /* Running as a guest, we assume the host is enforcing VT'd */ return run_as_guest(); } +void +i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p); + static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) { - return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active(); + return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active(dev_priv); } static inline bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) { - return IS_BROXTON(i915) && intel_vtd_active(); + return IS_BROXTON(i915) && intel_vtd_active(i915); } static inline bool @@ -1805,7 +1636,8 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) * armed the work again. */ while (atomic_read(&i915->mm.free_count)) { - flush_work(&i915->mm.free_work); + flush_delayed_work(&i915->mm.free_work); + flush_delayed_work(&i915->bdev.wq); rcu_barrier(); } } @@ -1838,13 +1670,10 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, u64 size, u64 alignment, u64 flags); -static inline struct i915_vma * __must_check +struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - u64 size, u64 alignment, u64 flags) -{ - return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags); -} + u64 size, u64 alignment, u64 flags); int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags); @@ -1920,6 +1749,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm); struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, phys_addr_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_internal(struct drm_i915_private *dev_priv, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size); /* i915_gem_tiling.c */ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) @@ -1959,14 +1792,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -/* i915_mm.c */ -int remap_io_mapping(struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn, unsigned long size, - struct io_mapping *iomap); -int remap_io_sg(struct vm_area_struct *vma, - unsigned long addr, unsigned long size, - struct scatterlist *sgl, resource_size_t iobase); - static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { if (GRAPHICS_VER(i915) >= 11) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 981e383d1a5d..915bf431f320 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -764,7 +764,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * perspective, requiring manual detiling by the client. */ if (!i915_gem_object_has_struct_page(obj) || - cpu_write_needs_clflush(obj)) + i915_gem_cpu_write_needs_clflush(obj)) /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case. @@ -877,6 +877,8 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; + GEM_WARN_ON(!ww); + if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { /* @@ -936,10 +938,7 @@ new_vma: return ERR_PTR(ret); } - if (ww) - ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); - else - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); @@ -959,6 +958,29 @@ new_vma: return vma; } +struct i915_vma * __must_check +i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, + const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags) +{ + struct i915_gem_ww_ctx ww; + struct i915_vma *ret; + int err; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + ret = i915_gem_object_ggtt_pin_ww(obj, &ww, view, size, + alignment, flags); + if (IS_ERR(ret)) + err = PTR_ERR(ret); + } + + return err ? ERR_PTR(err) : ret; +} + int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -1005,7 +1027,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->ops->adjust_lru(obj); } - if (i915_gem_object_has_pages(obj)) { + if (i915_gem_object_has_pages(obj) || + i915_gem_object_has_self_managed_shrink_list(obj)) { unsigned long flags; spin_lock_irqsave(&i915->mm.obj_lock, flags); @@ -1048,7 +1071,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; - intel_uc_fetch_firmwares(&dev_priv->gt.uc); + intel_uc_fetch_firmwares(&to_gt(dev_priv)->uc); intel_wopcm_init(&dev_priv->wopcm); ret = i915_init_ggtt(dev_priv); @@ -1068,7 +1091,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); - ret = intel_gt_init(&dev_priv->gt); + ret = intel_gt_init(to_gt(dev_priv)); if (ret) goto err_unlock; @@ -1084,7 +1107,7 @@ err_unlock: i915_gem_drain_workqueue(dev_priv); if (ret != -EIO) - intel_uc_cleanup_firmwares(&dev_priv->gt.uc); + intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc); if (ret == -EIO) { /* @@ -1092,10 +1115,10 @@ err_unlock: * as wedged. But we only want to do this when the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - if (!intel_gt_is_wedged(&dev_priv->gt)) { + if (!intel_gt_is_wedged(to_gt(dev_priv))) { i915_probe_error(dev_priv, "Failed to initialize GPU, declaring it wedged!\n"); - intel_gt_set_wedged(&dev_priv->gt); + intel_gt_set_wedged(to_gt(dev_priv)); } /* Minimal basic recovery for KMS */ @@ -1126,7 +1149,7 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); i915_gem_suspend_late(dev_priv); - intel_gt_driver_remove(&dev_priv->gt); + intel_gt_driver_remove(to_gt(dev_priv)); dev_priv->uabi_engines = RB_ROOT; /* Flush any outstanding unpin_work. */ @@ -1137,9 +1160,9 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - intel_gt_driver_release(&dev_priv->gt); + intel_gt_driver_release(to_gt(dev_priv)); - intel_uc_cleanup_firmwares(&dev_priv->gt.uc); + intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 77490cb5ff9c..7f80ad247bc8 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -13,7 +13,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, { struct drm_i915_private *i915 = to_i915(dev); struct pci_dev *pdev = to_pci_dev(dev->dev); - const struct sseu_dev_info *sseu = &i915->gt.info.sseu; + const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu; drm_i915_getparam_t *param = data; int value = 0; @@ -82,8 +82,8 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, break; case I915_PARAM_HAS_GPU_RESET: value = i915->params.enable_hangcheck && - intel_has_gpu_reset(&i915->gt); - if (value && intel_has_reset_engine(&i915->gt)) + intel_has_gpu_reset(to_gt(i915)); + if (value && intel_has_reset_engine(to_gt(i915))) value = 2; break; case I915_PARAM_HAS_RESOURCE_STREAMER: @@ -96,7 +96,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = sseu->min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - value = intel_huc_check_status(&i915->gt.uc.huc); + value = intel_huc_check_status(&to_gt(i915)->uc.huc); if (value < 0) return value; break; @@ -158,7 +158,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_CS_TIMESTAMP_FREQUENCY: - value = i915->gt.clock_frequency; + value = to_gt(i915)->clock_frequency; break; case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2a2d7643b551..5ae812d60abe 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -48,8 +48,9 @@ #include "i915_gpu_error.h" #include "i915_memcpy.h" #include "i915_scatterlist.h" +#include "i915_vma_snapshot.h" -#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) +#define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN) static void __sg_set_buf(struct scatterlist *sg, @@ -275,16 +276,16 @@ static bool compress_start(struct i915_vma_compress *c) static void *compress_next_page(struct i915_vma_compress *c, struct i915_vma_coredump *dst) { - void *page; + void *page_addr; + struct page *page; - if (dst->page_count >= dst->num_pages) - return ERR_PTR(-ENOSPC); - - page = pool_alloc(&c->pool, ALLOW_FAIL); - if (!page) + page_addr = pool_alloc(&c->pool, ALLOW_FAIL); + if (!page_addr) return ERR_PTR(-ENOMEM); - return dst->pages[dst->page_count++] = page; + page = virt_to_page(page_addr); + list_add_tail(&page->lru, &dst->page_list); + return page_addr; } static int compress_page(struct i915_vma_compress *c, @@ -397,7 +398,7 @@ static int compress_page(struct i915_vma_compress *c, if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); - dst->pages[dst->page_count++] = ptr; + list_add_tail(&virt_to_page(ptr)->lru, &dst->page_list); cond_resched(); return 0; @@ -504,7 +505,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct i915_gem_context_coredump *ctx) { - const u32 period = m->i915->gt.clock_period_ns; + const u32 period = to_gt(m->i915)->clock_period_ns; err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", header, ctx->comm, ctx->pid, ctx->sched_attr.priority, @@ -614,7 +615,7 @@ static void print_error_vma(struct drm_i915_error_state_buf *m, const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; - int page; + struct page *page; if (!vma) return; @@ -628,16 +629,17 @@ static void print_error_vma(struct drm_i915_error_state_buf *m, err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes); err_compression_marker(m); - for (page = 0; page < vma->page_count; page++) { + list_for_each_entry(page, &vma->page_list, lru) { int i, len; + const u32 *addr = page_address(page); len = PAGE_SIZE; - if (page == vma->page_count - 1) + if (page == list_last_entry(&vma->page_list, typeof(*page), lru)) len -= vma->unused; len = ascii85_encode_len(len); for (i = 0; i < len; i++) - err_puts(m, ascii85_encode(vma->pages[page][i], out)); + err_puts(m, ascii85_encode(addr[i], out)); } err_puts(m, "\n"); } @@ -946,10 +948,12 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma) { while (vma) { struct i915_vma_coredump *next = vma->next; - int page; + struct page *page, *n; - for (page = 0; page < vma->page_count; page++) - free_page((unsigned long)vma->pages[page]); + list_for_each_entry_safe(page, n, &vma->page_list, lru) { + list_del_init(&page->lru); + __free_page(page); + } kfree(vma); vma = next; @@ -1009,25 +1013,21 @@ void __i915_gpu_coredump_free(struct kref *error_ref) static struct i915_vma_coredump * i915_vma_coredump_create(const struct intel_gt *gt, - const struct i915_vma *vma, - const char *name, + const struct i915_vma_snapshot *vsnap, struct i915_vma_compress *compress) { struct i915_ggtt *ggtt = gt->ggtt; const u64 slot = ggtt->error_capture.start; struct i915_vma_coredump *dst; - unsigned long num_pages; struct sgt_iter iter; int ret; might_sleep(); - if (!vma || !vma->pages || !compress) + if (!vsnap || !vsnap->pages || !compress) return NULL; - num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; - num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */ - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL); + dst = kmalloc(sizeof(*dst), ALLOW_FAIL); if (!dst) return NULL; @@ -1036,14 +1036,13 @@ i915_vma_coredump_create(const struct intel_gt *gt, return NULL; } - strcpy(dst->name, name); + INIT_LIST_HEAD(&dst->page_list); + strcpy(dst->name, vsnap->name); dst->next = NULL; - dst->gtt_offset = vma->node.start; - dst->gtt_size = vma->node.size; - dst->gtt_page_sizes = vma->page_sizes.gtt; - dst->num_pages = num_pages; - dst->page_count = 0; + dst->gtt_offset = vsnap->gtt_offset; + dst->gtt_size = vsnap->gtt_size; + dst->gtt_page_sizes = vsnap->page_sizes; dst->unused = 0; ret = -EINVAL; @@ -1051,7 +1050,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, void __iomem *s; dma_addr_t dma; - for_each_sgt_daddr(dma, iter, vma->pages) { + for_each_sgt_daddr(dma, iter, vsnap->pages) { mutex_lock(&ggtt->error_mutex); ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); @@ -1069,11 +1068,11 @@ i915_vma_coredump_create(const struct intel_gt *gt, if (ret) break; } - } else if (__i915_gem_object_is_lmem(vma->obj)) { - struct intel_memory_region *mem = vma->obj->mm.region; + } else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) { + struct intel_memory_region *mem = vsnap->mr; dma_addr_t dma; - for_each_sgt_daddr(dma, iter, vma->pages) { + for_each_sgt_daddr(dma, iter, vsnap->pages) { void __iomem *s; s = io_mapping_map_wc(&mem->iomap, @@ -1089,7 +1088,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, } else { struct page *page; - for_each_sgt_page(page, iter, vma->pages) { + for_each_sgt_page(page, iter, vsnap->pages) { void *s; drm_clflush_pages(&page, 1); @@ -1106,8 +1105,13 @@ i915_vma_coredump_create(const struct intel_gt *gt, } if (ret || compress_flush(compress, dst)) { - while (dst->page_count--) - pool_free(&compress->pool, dst->pages[dst->page_count]); + struct page *page, *n; + + list_for_each_entry_safe_reverse(page, n, &dst->page_list, lru) { + list_del_init(&page->lru); + pool_free(&compress->pool, page_address(page)); + } + kfree(dst); dst = NULL; } @@ -1320,38 +1324,72 @@ static bool record_context(struct i915_gem_context_coredump *e, struct intel_engine_capture_vma { struct intel_engine_capture_vma *next; - struct i915_vma *vma; + struct i915_vma_snapshot *vsnap; char name[16]; + bool lockdep_cookie; }; static struct intel_engine_capture_vma * -capture_vma(struct intel_engine_capture_vma *next, - struct i915_vma *vma, - const char *name, - gfp_t gfp) +capture_vma_snapshot(struct intel_engine_capture_vma *next, + struct i915_vma_snapshot *vsnap, + gfp_t gfp) { struct intel_engine_capture_vma *c; - if (!vma) + if (!i915_vma_snapshot_present(vsnap)) return next; c = kmalloc(sizeof(*c), gfp); if (!c) return next; - if (!i915_active_acquire_if_busy(&vma->active)) { + if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) { kfree(c); return next; } - strcpy(c->name, name); - c->vma = vma; /* reference held while active */ + strcpy(c->name, vsnap->name); + c->vsnap = vsnap; + i915_vma_snapshot_get(vsnap); c->next = next; return c; } static struct intel_engine_capture_vma * +capture_vma(struct intel_engine_capture_vma *next, + struct i915_vma *vma, + const char *name, + gfp_t gfp) +{ + struct i915_vma_snapshot *vsnap; + + if (!vma) + return next; + + /* + * If the vma isn't pinned, then the vma should be snapshotted + * to a struct i915_vma_snapshot at command submission time. + * Not here. + */ + GEM_WARN_ON(!i915_vma_is_pinned(vma)); + if (!i915_vma_is_pinned(vma)) + return next; + + vsnap = i915_vma_snapshot_alloc(gfp); + if (!vsnap) + return next; + + i915_vma_snapshot_init(vsnap, vma, name); + next = capture_vma_snapshot(next, vsnap, gfp); + + /* FIXME: Replace on async unbind. */ + i915_vma_snapshot_put(vsnap); + + return next; +} + +static struct intel_engine_capture_vma * capture_user(struct intel_engine_capture_vma *capture, const struct i915_request *rq, gfp_t gfp) @@ -1359,7 +1397,7 @@ capture_user(struct intel_engine_capture_vma *capture, struct i915_capture_list *c; for (c = rq->capture_list; c; c = c->next) - capture = capture_vma(capture, c->vma, "user", gfp); + capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp); return capture; } @@ -1373,6 +1411,33 @@ static void add_vma(struct intel_engine_coredump *ee, } } +static struct i915_vma_coredump * +create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma, + const char *name, struct i915_vma_compress *compress) +{ + struct i915_vma_coredump *ret; + struct i915_vma_snapshot tmp; + + if (!vma) + return NULL; + + GEM_WARN_ON(!i915_vma_is_pinned(vma)); + i915_vma_snapshot_init_onstack(&tmp, vma, name); + ret = i915_vma_coredump_create(gt, &tmp, compress); + i915_vma_snapshot_put_onstack(&tmp); + + return ret; +} + +static void add_vma_coredump(struct intel_engine_coredump *ee, + const struct intel_gt *gt, + struct i915_vma *vma, + const char *name, + struct i915_vma_compress *compress) +{ + add_vma(ee, create_vma_coredump(gt, vma, name, compress)); +} + struct intel_engine_coredump * intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) { @@ -1406,7 +1471,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee, * as the simplest method to avoid being overwritten * by userspace. */ - vma = capture_vma(vma, rq->batch, "batch", gfp); + vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp); vma = capture_user(vma, rq, gfp); vma = capture_vma(vma, rq->ring->vma, "ring", gfp); vma = capture_vma(vma, rq->context->state, "HW context", gfp); @@ -1427,30 +1492,24 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, while (capture) { struct intel_engine_capture_vma *this = capture; - struct i915_vma *vma = this->vma; + struct i915_vma_snapshot *vsnap = this->vsnap; add_vma(ee, i915_vma_coredump_create(engine->gt, - vma, this->name, - compress)); + vsnap, compress)); - i915_active_release(&vma->active); + i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie); + i915_vma_snapshot_put(vsnap); capture = this->next; kfree(this); } - add_vma(ee, - i915_vma_coredump_create(engine->gt, - engine->status_page.vma, - "HW Status", - compress)); + add_vma_coredump(ee, engine->gt, engine->status_page.vma, + "HW Status", compress); - add_vma(ee, - i915_vma_coredump_create(engine->gt, - engine->wa_ctx.vma, - "WA context", - compress)); + add_vma_coredump(ee, engine->gt, engine->wa_ctx.vma, + "WA context", compress); } static struct intel_engine_coredump * @@ -1486,17 +1545,25 @@ capture_engine(struct intel_engine_cs *engine, } } if (rq) - capture = intel_engine_coredump_add_request(ee, rq, - ATOMIC_MAYFAIL); + rq = i915_request_get_rcu(rq); + + if (!rq) + goto no_request_capture; + + capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); if (!capture) { -no_request_capture: - kfree(ee); - return NULL; + i915_request_put(rq); + goto no_request_capture; } intel_engine_coredump_add_vma(ee, capture, compress); + i915_request_put(rq); return ee; + +no_request_capture: + kfree(ee); + return NULL; } static void @@ -1550,10 +1617,8 @@ gt_record_uc(struct intel_gt_coredump *gt, */ error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); - error_uc->guc_log = - i915_vma_coredump_create(gt->_gt, - uc->guc.log.vma, "GuC log buffer", - compress); + error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, + "GuC log buffer", compress); return error_uc; } @@ -1750,10 +1815,7 @@ static void capture_gen(struct i915_gpu_coredump *error) error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); error->suspended = i915->runtime_pm.suspended; - error->iommu = -1; -#ifdef CONFIG_INTEL_IOMMU - error->iommu = intel_iommu_gfx_mapped; -#endif + error->iommu = intel_vtd_active(i915); error->reset_count = i915_reset_count(&i915->gpu_error); error->suspend_count = i915->suspend_count; @@ -1784,7 +1846,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) error->time = ktime_get_real(); error->boottime = ktime_get_boottime(); - error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time); + error->uptime = ktime_sub(ktime_get(), to_gt(i915)->last_init_time); error->capture = jiffies; capture_gen(error); @@ -1839,8 +1901,8 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt, kfree(compress); } -struct i915_gpu_coredump * -i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) +static struct i915_gpu_coredump * +__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) { struct drm_i915_private *i915 = gt->i915; struct i915_gpu_coredump *error; @@ -1881,6 +1943,22 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) return error; } +struct i915_gpu_coredump * +i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) +{ + static DEFINE_MUTEX(capture_mutex); + int ret = mutex_lock_interruptible(&capture_mutex); + struct i915_gpu_coredump *dump; + + if (ret) + return ERR_PTR(ret); + + dump = __i915_gpu_coredump(gt, engine_mask); + mutex_unlock(&capture_mutex); + + return dump; +} + void i915_error_state_store(struct i915_gpu_coredump *error) { struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index b98d8cdbe4f2..5aedf5129814 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -39,10 +39,8 @@ struct i915_vma_coredump { u64 gtt_size; u32 gtt_page_sizes; - int num_pages; - int page_count; int unused; - u32 *pages[]; + struct list_head page_list; }; struct i915_request_coredump { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a4b0a0c45b13..21f75b069fa8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -35,6 +35,7 @@ #include <drm/drm_drv.h> #include "display/intel_de.h" +#include "display/intel_display_trace.h" #include "display/intel_display_types.h" #include "display/intel_fifo_underrun.h" #include "display/intel_hotplug.h" @@ -49,7 +50,6 @@ #include "i915_drv.h" #include "i915_irq.h" -#include "i915_trace.h" #include "intel_pm.h" /** @@ -224,7 +224,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) static void intel_handle_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); drm_crtc_handle_vblank(&crtc->base); } @@ -1040,7 +1040,7 @@ static void ivb_parity_work(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), l3_parity.error_work); - struct intel_gt *gt = &dev_priv->gt; + struct intel_gt *gt = to_gt(dev_priv); u32 error_status, row, bank, subbank; char *parity_event[6]; u32 misccpctl; @@ -1318,7 +1318,7 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, u32 crc2, u32 crc3, u32 crc4) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe); struct intel_pipe_crc *pipe_crc = &crtc->pipe_crc; u32 crcs[5] = { crc0, crc1, crc2, crc3, crc4 }; @@ -1357,7 +1357,7 @@ display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, static void flip_done_handler(struct drm_i915_private *i915, enum pipe pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(i915, pipe); + struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe); struct drm_crtc_state *crtc_state = crtc->base.state; struct drm_pending_vblank_event *e = crtc_state->event; struct drm_device *dev = &i915->drm; @@ -1718,9 +1718,9 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE); if (gt_iir) - gen6_gt_irq_handler(&dev_priv->gt, gt_iir); + gen6_gt_irq_handler(to_gt(dev_priv), gt_iir); if (pm_iir) - gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); + gen6_rps_irq_handler(&to_gt(dev_priv)->rps, pm_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -1777,7 +1777,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) ier = intel_uncore_read(&dev_priv->uncore, VLV_IER); intel_uncore_write(&dev_priv->uncore, VLV_IER, 0); - gen8_gt_irq_handler(&dev_priv->gt, master_ctl); + gen8_gt_irq_handler(to_gt(dev_priv), master_ctl); if (iir & I915_DISPLAY_PORT_INTERRUPT) hotplug_status = i9xx_hpd_irq_ack(dev_priv); @@ -2108,7 +2108,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, } if (DISPLAY_VER(dev_priv) == 5 && de_iir & DE_PCU_EVENT) - gen5_rps_irq_handler(&dev_priv->gt.rps); + gen5_rps_irq_handler(&to_gt(dev_priv)->rps); } static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, @@ -2189,9 +2189,9 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) if (gt_iir) { raw_reg_write(regs, GTIIR, gt_iir); if (GRAPHICS_VER(i915) >= 6) - gen6_gt_irq_handler(&i915->gt, gt_iir); + gen6_gt_irq_handler(to_gt(i915), gt_iir); else - gen5_gt_irq_handler(&i915->gt, gt_iir); + gen5_gt_irq_handler(to_gt(i915), gt_iir); ret = IRQ_HANDLED; } @@ -2209,7 +2209,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) u32 pm_iir = raw_reg_read(regs, GEN6_PMIIR); if (pm_iir) { raw_reg_write(regs, GEN6_PMIIR, pm_iir); - gen6_rps_irq_handler(&i915->gt.rps, pm_iir); + gen6_rps_irq_handler(&to_gt(i915)->rps, pm_iir); ret = IRQ_HANDLED; } } @@ -2635,7 +2635,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) } /* Find, queue (onto bottom-halves), then clear each source */ - gen8_gt_irq_handler(&dev_priv->gt, master_ctl); + gen8_gt_irq_handler(to_gt(dev_priv), master_ctl); /* IRQs are synced during runtime_suspend, we don't require a wakeref */ if (master_ctl & ~GEN8_GT_IRQS) { @@ -2715,7 +2715,7 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) { struct drm_i915_private *i915 = arg; void __iomem * const regs = i915->uncore.regs; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); u32 master_ctl; u32 gu_misc_iir; @@ -2771,8 +2771,8 @@ static inline void dg1_master_intr_enable(void __iomem * const regs) static irqreturn_t dg1_irq_handler(int irq, void *arg) { struct drm_i915_private * const i915 = arg; - struct intel_gt *gt = &i915->gt; - void __iomem * const regs = i915->uncore.regs; + struct intel_gt *gt = to_gt(i915); + void __iomem * const regs = gt->uncore->regs; u32 master_tile_ctl, master_ctl; u32 gu_misc_iir; @@ -3075,7 +3075,7 @@ static void ilk_irq_reset(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); } - gen5_gt_irq_reset(&dev_priv->gt); + gen5_gt_irq_reset(to_gt(dev_priv)); ibx_irq_reset(dev_priv); } @@ -3085,7 +3085,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, 0); intel_uncore_posting_read(&dev_priv->uncore, VLV_MASTER_IER); - gen5_gt_irq_reset(&dev_priv->gt); + gen5_gt_irq_reset(to_gt(dev_priv)); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -3119,7 +3119,7 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) gen8_master_intr_disable(dev_priv->uncore.regs); - gen8_gt_irq_reset(&dev_priv->gt); + gen8_gt_irq_reset(to_gt(dev_priv)); gen8_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN8_PCU_); @@ -3173,11 +3173,12 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) static void gen11_irq_reset(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = to_gt(dev_priv); + struct intel_uncore *uncore = gt->uncore; gen11_master_intr_disable(dev_priv->uncore.regs); - gen11_gt_irq_reset(&dev_priv->gt); + gen11_gt_irq_reset(gt); gen11_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); @@ -3186,11 +3187,12 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) static void dg1_irq_reset(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = to_gt(dev_priv); + struct intel_uncore *uncore = gt->uncore; dg1_master_intr_disable(dev_priv->uncore.regs); - gen11_gt_irq_reset(&dev_priv->gt); + gen11_gt_irq_reset(gt); gen11_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); @@ -3250,7 +3252,7 @@ static void cherryview_irq_reset(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, GEN8_MASTER_IRQ, 0); intel_uncore_posting_read(&dev_priv->uncore, GEN8_MASTER_IRQ); - gen8_gt_irq_reset(&dev_priv->gt); + gen8_gt_irq_reset(to_gt(dev_priv)); GEN3_IRQ_RESET(uncore, GEN8_PCU_); @@ -3707,7 +3709,7 @@ static void ilk_irq_postinstall(struct drm_i915_private *dev_priv) ibx_irq_postinstall(dev_priv); - gen5_gt_irq_postinstall(&dev_priv->gt); + gen5_gt_irq_postinstall(to_gt(dev_priv)); GEN3_IRQ_INIT(uncore, DE, dev_priv->irq_mask, display_mask | extra_mask); @@ -3744,7 +3746,7 @@ void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) static void valleyview_irq_postinstall(struct drm_i915_private *dev_priv) { - gen5_gt_irq_postinstall(&dev_priv->gt); + gen5_gt_irq_postinstall(to_gt(dev_priv)); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -3850,7 +3852,7 @@ static void gen8_irq_postinstall(struct drm_i915_private *dev_priv) else if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_postinstall(dev_priv); - gen8_gt_irq_postinstall(&dev_priv->gt); + gen8_gt_irq_postinstall(to_gt(dev_priv)); gen8_de_irq_postinstall(dev_priv); gen8_master_intr_enable(dev_priv->uncore.regs); @@ -3869,13 +3871,14 @@ static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv) static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = to_gt(dev_priv); + struct intel_uncore *uncore = gt->uncore; u32 gu_misc_masked = GEN11_GU_MISC_GSE; if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) icp_irq_postinstall(dev_priv); - gen11_gt_irq_postinstall(&dev_priv->gt); + gen11_gt_irq_postinstall(gt); gen11_de_irq_postinstall(dev_priv); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); @@ -3886,10 +3889,11 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt = to_gt(dev_priv); + struct intel_uncore *uncore = gt->uncore; u32 gu_misc_masked = GEN11_GU_MISC_GSE; - gen11_gt_irq_postinstall(&dev_priv->gt); + gen11_gt_irq_postinstall(gt); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); @@ -3900,13 +3904,13 @@ static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) GEN11_DISPLAY_IRQ_ENABLE); } - dg1_master_intr_enable(dev_priv->uncore.regs); - intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR); + dg1_master_intr_enable(uncore->regs); + intel_uncore_posting_read(uncore, DG1_MSTR_TILE_INTR); } static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) { - gen8_gt_irq_postinstall(&dev_priv->gt); + gen8_gt_irq_postinstall(to_gt(dev_priv)); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -4069,7 +4073,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir); + intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4177,7 +4181,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir); + intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4322,11 +4326,11 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_cs_irq(dev_priv->gt.engine[RCS0], + intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir); if (iir & I915_BSD_USER_INTERRUPT) - intel_engine_cs_irq(dev_priv->gt.engine[VCS0], + intel_engine_cs_irq(to_gt(dev_priv)->engine[VCS0], iir >> 25); if (iir & I915_MASTER_ERROR_INTERRUPT) @@ -4377,7 +4381,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) /* pre-gen11 the guc irqs bits are in the upper 16 bits of the pm reg */ if (HAS_GT_UC(dev_priv) && GRAPHICS_VER(dev_priv) < 11) - dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16; + to_gt(dev_priv)->pm_guc_events = GUC_INTR_GUC2HOST << 16; if (!HAS_DISPLAY(dev_priv)) return; diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 666808cb3a32..7998bc74ab49 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -27,6 +27,7 @@ #include "i915_drv.h" +#include "i915_mm.h" struct remap_pfn { struct mm_struct *mm; @@ -37,17 +38,6 @@ struct remap_pfn { resource_size_t iobase; }; -static int remap_pfn(pte_t *pte, unsigned long addr, void *data) -{ - struct remap_pfn *r = data; - - /* Special PTE are not associated with any struct page */ - set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); - r->pfn++; - - return 0; -} - #define use_dma(io) ((io) != -1) static inline unsigned long sgt_pfn(const struct remap_pfn *r) @@ -77,6 +67,20 @@ static int remap_sg(pte_t *pte, unsigned long addr, void *data) return 0; } +#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + +#if IS_ENABLED(CONFIG_X86) +static int remap_pfn(pte_t *pte, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); + r->pfn++; + + return 0; +} + /** * remap_io_mapping - remap an IO mapping to userspace * @vma: user vma to map to @@ -94,7 +98,6 @@ int remap_io_mapping(struct vm_area_struct *vma, struct remap_pfn r; int err; -#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ @@ -111,6 +114,7 @@ int remap_io_mapping(struct vm_area_struct *vma, return 0; } +#endif /** * remap_io_sg - remap an IO mapping to userspace diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h new file mode 100644 index 000000000000..76f1d53bdf34 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mm.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_MM_H__ +#define __I915_MM_H__ + +#include <linux/types.h> + +struct vm_area_struct; +struct io_mapping; +struct scatterlist; + +#if IS_ENABLED(CONFIG_X86) +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap); +#else +static inline +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap) +{ + pr_err("Architecture has no %s() and shouldn't be calling this function\n", __func__); + WARN_ON_ONCE(1); + return 0; +} +#endif + +int remap_io_sg(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, + struct scatterlist *sgl, resource_size_t iobase); + +#endif /* __I915_MM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index c7507266aa83..f6bcd2f89257 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -4,7 +4,7 @@ * Copyright © 2021 Intel Corporation */ -#include <linux/console.h> +#include <drm/drm_drv.h> #include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" @@ -31,7 +31,7 @@ static int i915_check_nomodeset(void) if (i915_modparams.modeset == 0) use_kms = false; - if (vgacon_text_force() && i915_modparams.modeset == -1) + if (drm_firmware_drivers_only() && i915_modparams.modeset == -1) use_kms = false; if (!use_kms) { diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index e07f4cfea63a..525ae832aa9a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -140,6 +140,9 @@ i915_param_named_unsafe(invert_brightness, int, 0400, i915_param_named(disable_display, bool, 0400, "Disable display (default: false)"); +i915_param_named(memtest, bool, 0400, + "Perform a read/write test of all device memory on module load (default: off)"); + i915_param_named(mmio_debug, int, 0400, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 8d725b64592d..c9d53ff910a0 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -64,6 +64,7 @@ struct drm_printer; param(char *, guc_firmware_path, NULL, 0400) \ param(char *, huc_firmware_path, NULL, 0400) \ param(char *, dmc_firmware_path, NULL, 0400) \ + param(bool, memtest, false, 0400) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ param(int, edp_vswing, 0, 0400) \ param(unsigned int, reset, 3, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 52749b8f4f6b..261294df535c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -22,8 +22,6 @@ * */ -#include <linux/vga_switcheroo.h> - #include <drm/drm_drv.h> #include <drm/i915_pciids.h> @@ -33,8 +31,8 @@ #define PLATFORM(x) .platform = (x) #define GEN(x) \ - .graphics_ver = (x), \ - .media_ver = (x), \ + .graphics.ver = (x), \ + .media.ver = (x), \ .display.ver = (x) #define I845_PIPE_OFFSETS \ @@ -164,8 +162,8 @@ #define I830_FEATURES \ GEN(2), \ .is_mobile = 1, \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ .display.has_overlay = 1, \ .display.cursor_needs_physical = 1, \ .display.overlay_needs_physical = 1, \ @@ -185,8 +183,8 @@ #define I845_FEATURES \ GEN(2), \ - .pipe_mask = BIT(PIPE_A), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A), \ + .display.pipe_mask = BIT(PIPE_A), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A), \ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ .display.has_gmch = 1, \ @@ -227,8 +225,8 @@ static const struct intel_device_info i865g_info = { #define GEN3_FEATURES \ GEN(3), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .platform_engine_mask = BIT(RCS0), \ @@ -317,8 +315,8 @@ static const struct intel_device_info pnv_m_info = { #define GEN4_FEATURES \ GEN(4), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ .display.has_hotplug = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ @@ -370,8 +368,8 @@ static const struct intel_device_info gm45_info = { #define GEN5_FEATURES \ GEN(5), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ .display.has_hotplug = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ @@ -400,8 +398,8 @@ static const struct intel_device_info ilk_m_info = { #define GEN6_FEATURES \ GEN(6), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -451,8 +449,8 @@ static const struct intel_device_info snb_m_gt2_info = { #define GEN7_FEATURES \ GEN(7), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -506,8 +504,8 @@ static const struct intel_device_info ivb_q_info = { GEN7_FEATURES, PLATFORM(INTEL_IVYBRIDGE), .gt = 2, - .pipe_mask = 0, /* legal, last one wins */ - .cpu_transcoder_mask = 0, + .display.pipe_mask = 0, /* legal, last one wins */ + .display.cpu_transcoder_mask = 0, .has_l3_dpf = 1, }; @@ -515,8 +513,8 @@ static const struct intel_device_info vlv_info = { PLATFORM(INTEL_VALLEYVIEW), GEN(7), .is_lp = 1, - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, .has_rc6 = 1, .has_reset_engine = true, @@ -540,7 +538,7 @@ static const struct intel_device_info vlv_info = { #define G75_FEATURES \ GEN7_FEATURES, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP), \ .display.has_ddi = 1, \ .display.has_fpga_dbg = 1, \ @@ -610,8 +608,8 @@ static const struct intel_device_info bdw_gt3_info = { static const struct intel_device_info chv_info = { PLATFORM(INTEL_CHERRYVIEW), GEN(8), - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), .display.has_hotplug = 1, .is_lp = 1, .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), @@ -688,8 +686,8 @@ static const struct intel_device_info skl_gt4_info = { .dbuf.slice_mask = BIT(DBUF_S1), \ .display.has_hotplug = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \ BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \ .has_64bit_reloc = 1, \ @@ -797,8 +795,8 @@ static const struct intel_device_info cml_gt2_info = { #define GEN11_FEATURES \ GEN9_FEATURES, \ GEN11_DEFAULT_PAGE_SIZES, \ - .abox_mask = BIT(0), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + .display.abox_mask = BIT(0), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \ BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1), \ .pipe_offsets = { \ @@ -849,9 +847,9 @@ static const struct intel_device_info jsl_info = { #define GEN12_FEATURES \ GEN11_FEATURES, \ GEN(12), \ - .abox_mask = GENMASK(2, 1), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + .display.abox_mask = GENMASK(2, 1), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_D) | \ BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1), \ .pipe_offsets = { \ @@ -886,9 +884,9 @@ static const struct intel_device_info tgl_info = { static const struct intel_device_info rkl_info = { GEN12_FEATURES, PLATFORM(INTEL_ROCKETLAKE), - .abox_mask = BIT(0), - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | + .display.abox_mask = BIT(0), + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), .display.has_hti = 1, .display.has_psr_hw_tracking = 0, @@ -906,9 +904,9 @@ static const struct intel_device_info rkl_info = { static const struct intel_device_info dg1_info = { GEN12_FEATURES, DGFX_FEATURES, - .graphics_rel = 10, + .graphics.rel = 10, PLATFORM(INTEL_DG1), - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | @@ -920,7 +918,7 @@ static const struct intel_device_info dg1_info = { static const struct intel_device_info adl_s_info = { GEN12_FEATURES, PLATFORM(INTEL_ALDERLAKE_S), - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .display.has_hti = 1, .display.has_psr_hw_tracking = 0, .platform_engine_mask = @@ -937,8 +935,11 @@ static const struct intel_device_info adl_s_info = { } #define XE_LPD_FEATURES \ - .abox_mask = GENMASK(1, 0), \ - .color = { .degamma_lut_size = 0, .gamma_lut_size = 0 }, \ + .display.abox_mask = GENMASK(1, 0), \ + .color = { .degamma_lut_size = 128, .gamma_lut_size = 1024, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \ + DRM_COLOR_LUT_EQUAL_CHANNELS, \ + }, \ .dbuf.size = 4096, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ BIT(DBUF_S4), \ @@ -954,7 +955,7 @@ static const struct intel_device_info adl_s_info = { .display.has_ipc = 1, \ .display.has_psr = 1, \ .display.ver = 13, \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ + .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ .pipe_offsets = { \ [TRANSCODER_A] = PIPE_A_OFFSET, \ [TRANSCODER_B] = PIPE_B_OFFSET, \ @@ -977,8 +978,7 @@ static const struct intel_device_info adl_p_info = { GEN12_FEATURES, XE_LPD_FEATURES, PLATFORM(INTEL_ALDERLAKE_P), - .require_force_probe = 1, - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C) | BIT(TRANSCODER_D) | BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1), .display.has_cdclk_crawl = 1, @@ -998,8 +998,8 @@ static const struct intel_device_info adl_p_info = { I915_GTT_PAGE_SIZE_2M #define XE_HP_FEATURES \ - .graphics_ver = 12, \ - .graphics_rel = 50, \ + .graphics.ver = 12, \ + .graphics.rel = 50, \ XE_HP_PAGE_SIZES, \ .dma_mask_size = 46, \ .has_64bit_reloc = 1, \ @@ -1017,8 +1017,8 @@ static const struct intel_device_info adl_p_info = { .ppgtt_type = INTEL_PPGTT_FULL #define XE_HPM_FEATURES \ - .media_ver = 12, \ - .media_rel = 50 + .media.ver = 12, \ + .media.rel = 50 __maybe_unused static const struct intel_device_info xehpsdv_info = { @@ -1027,7 +1027,7 @@ static const struct intel_device_info xehpsdv_info = { DGFX_FEATURES, PLATFORM(INTEL_XEHPSDV), .display = { }, - .pipe_mask = 0, + .has_64k_pages = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | @@ -1042,15 +1042,16 @@ static const struct intel_device_info dg2_info = { XE_HPM_FEATURES, XE_LPD_FEATURES, DGFX_FEATURES, - .graphics_rel = 55, - .media_rel = 55, + .graphics.rel = 55, + .media.rel = 55, PLATFORM(INTEL_DG2), + .has_64k_pages = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | BIT(VCS0) | BIT(VCS2), .require_force_probe = 1, - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | + .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C) | BIT(TRANSCODER_D), }; @@ -1131,6 +1132,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_ADLS_IDS(&adl_s_info), INTEL_ADLP_IDS(&adl_p_info), INTEL_DG1_IDS(&dg1_info), + INTEL_RPLS_IDS(&adl_s_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); @@ -1203,11 +1205,8 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (PCI_FUNC(pdev->devfn)) return -ENODEV; - /* - * apple-gmux is needed on dual GPU MacBook Pro - * to probe the panel if we're the inactive GPU. - */ - if (vga_switcheroo_client_probe_defer(pdev)) + /* Detect if we need to wait for other drivers early on */ + if (intel_modeset_probe_defer(pdev)) return -EPROBE_DEFER; err = i915_driver_probe(pdev, ent); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2f01b8c0284c..170bba913c30 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4443,7 +4443,7 @@ void i915_perf_init(struct drm_i915_private *i915) mutex_init(&perf->lock); /* Choose a representative limit */ - oa_sample_rate_hard_limit = i915->gt.clock_frequency / 2; + oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2; mutex_init(&perf->metrics_lock); idr_init_base(&perf->metrics_idr, 1); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 0b488d49694c..ea655161793e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -210,8 +210,8 @@ static void init_rc6(struct i915_pmu *pmu) struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); intel_wakeref_t wakeref; - with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) { - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) { + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915)); pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = pmu->sample[__I915_SAMPLE_RC6].cur; pmu->sleep_last = ktime_get_raw(); @@ -222,7 +222,7 @@ static void park_rc6(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915)); pmu->sleep_last = ktime_get_raw(); } @@ -419,7 +419,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) struct drm_i915_private *i915 = container_of(hrtimer, struct drm_i915_private, pmu.timer); struct i915_pmu *pmu = &i915->pmu; - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); unsigned int period_ns; ktime_t now; @@ -476,7 +476,7 @@ engine_event_status(struct intel_engine_cs *engine, static int config_status(struct drm_i915_private *i915, u64 config) { - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); switch (config) { case I915_PMU_ACTUAL_FREQUENCY: @@ -601,10 +601,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event) val = READ_ONCE(pmu->irq_count); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(&i915->gt); + val = get_rc6(to_gt(i915)); break; case I915_PMU_SOFTWARE_GT_AWAKE_TIME: - val = ktime_to_ns(intel_gt_get_awake_time(&i915->gt)); + val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915))); break; } } diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 51b368be0fc4..2dfbc22857a3 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -31,7 +31,7 @@ static int copy_query_item(void *query_hdr, size_t query_sz, static int query_topology_info(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) { - const struct sseu_dev_info *sseu = &dev_priv->gt.info.sseu; + const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu; struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; int ret; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 47fcd104868f..4c28dadf8d69 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -501,6 +501,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOBITS_PPGTT_CACHE64B (3 << 8) #define ECOBITS_PPGTT_CACHE4B (0 << 8) +#define GEN12_GAMCNTRL_CTRL _MMIO(0xcf54) +#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) +#define GLOBAL_INVALIDATION_MODE REG_BIT(2) + +#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c) +#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) +#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) +#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) + +#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28) +#define FORCE_MISS_FTLB REG_BIT(3) + #define GAB_CTL _MMIO(0x24000) #define GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8) @@ -722,6 +734,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_OA_TLB_INV_CR _MMIO(0xceec) +#define GEN12_SQCM _MMIO(0x8724) +#define EN_32B_ACCESS REG_BIT(30) + /* Gen12 OAR unit */ #define GEN12_OAR_OACONTROL _MMIO(0x2960) #define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 @@ -773,6 +788,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define EU_PERF_CNTL5 _MMIO(0xe55c) #define EU_PERF_CNTL6 _MMIO(0xe65c) +#define RT_CTRL _MMIO(0xe530) +#define DIS_NULL_QUERY REG_BIT(10) + /* * OA Boolean state */ @@ -2247,6 +2265,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define SNPS_PHY_MPLLB_DP2_MODE REG_BIT(9) #define SNPS_PHY_MPLLB_WORD_DIV2_EN REG_BIT(8) #define SNPS_PHY_MPLLB_TX_CLK_DIV REG_GENMASK(7, 5) +#define SNPS_PHY_MPLLB_SHIM_DIV32_CLK_SEL REG_BIT(0) #define SNPS_PHY_MPLLB_FRACN1(phy) _MMIO_SNPS(phy, 0x168008) #define SNPS_PHY_MPLLB_FRACN_EN REG_BIT(31) @@ -2665,6 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ +#define GUCPMTIMESTAMP _MMIO(0xC3E8) + /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ #define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) #define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) @@ -2775,6 +2796,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VDBOX_CGCTL3F10(base) _MMIO((base) + 0x3f10) #define IECPUNIT_CLKGATE_DIS REG_BIT(22) +#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) +#define ALNUNIT_CLKGATE_DIS REG_BIT(13) + #define ERROR_GEN6 _MMIO(0x40a0) #define GEN7_ERR_INT _MMIO(0x44040) #define ERR_INT_POISON (1 << 31) @@ -2873,6 +2897,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2) #define GEN11_ENABLE_32_PLANE_MODE (1 << 7) +#define SCCGCTL94DC _MMIO(0x94dc) +#define CG3DDISURB REG_BIT(14) + +#define MLTICTXCTL _MMIO(0xb170) +#define TDONRENDER REG_BIT(2) + +#define L3SQCREG1_CCS0 _MMIO(0xb200) +#define FLUSHALLNONCOH REG_BIT(5) + /* WaClearTdlStateAckDirtyBits */ #define GEN8_STATE_ACK _MMIO(0x20F0) #define GEN9_STATE_ACK_SLICE1 _MMIO(0x20F8) @@ -3109,7 +3142,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) #define GEN10_CACHE_MODE_SS _MMIO(0xe420) -#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) +#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) +#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) /* Fuse readout registers for GT */ #define HSW_PAVP_FUSE1 _MMIO(0x911C) @@ -4288,21 +4322,62 @@ enum { /* * GEN10 clock gating regs */ + +#define UNSLCGCTL9440 _MMIO(0x9440) +#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) +#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) +#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) +#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24) +#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23) +#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22) +#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21) +#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17) +#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16) +#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15) +#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) +#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) + +#define UNSLCGCTL9444 _MMIO(0x9444) +#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) +#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) +#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) +#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27) +#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26) +#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25) +#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24) +#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23) +#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22) +#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21) +#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20) +#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19) +#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18) +#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17) +#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) +#define LTCDD_CLKGATE_DIS REG_BIT(10) + #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) +#define NODEDSS_CLKGATE_DIS REG_BIT(12) #define L3_CLKGATE_DIS REG_BIT(16) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) -#define GWUNIT_CLKGATE_DIS (1 << 16) +#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) +#define GWUNIT_CLKGATE_DIS REG_BIT(16) #define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) #define CPSSUNIT_CLKGATE_DIS REG_BIT(9) +#define SSMCGCTL9530 _MMIO(0x9530) +#define RTFUNIT_CLKGATE_DIS REG_BIT(18) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS REG_BIT(20) +#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ +#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ +#define GAMEDIA_CLKGATE_DIS REG_BIT(11) #define HSUNIT_CLKGATE_DIS REG_BIT(8) #define VSUNIT_CLKGATE_DIS REG_BIT(3) @@ -6892,7 +6967,7 @@ enum { #define DVS_SOURCE_KEY (1 << 22) #define DVS_RGB_ORDER_XBGR (1 << 20) #define DVS_YUV_FORMAT_BT709 (1 << 18) -#define DVS_YUV_BYTE_ORDER_MASK (3 << 16) +#define DVS_YUV_ORDER_MASK (3 << 16) #define DVS_YUV_ORDER_YUYV (0 << 16) #define DVS_YUV_ORDER_UYVY (1 << 16) #define DVS_YUV_ORDER_YVYU (2 << 16) @@ -6971,7 +7046,7 @@ enum { #define SPRITE_RGB_ORDER_RGBX (1 << 20) /* only for 888 and 161616 */ #define SPRITE_YUV_TO_RGB_CSC_DISABLE (1 << 19) #define SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709 (1 << 18) /* 0 is BT601 */ -#define SPRITE_YUV_BYTE_ORDER_MASK (3 << 16) +#define SPRITE_YUV_ORDER_MASK (3 << 16) #define SPRITE_YUV_ORDER_YUYV (0 << 16) #define SPRITE_YUV_ORDER_UYVY (1 << 16) #define SPRITE_YUV_ORDER_YVYU (2 << 16) @@ -7056,7 +7131,7 @@ enum { #define SP_ALPHA_PREMULTIPLY (1 << 23) /* CHV pipe B */ #define SP_SOURCE_KEY (1 << 22) #define SP_YUV_FORMAT_BT709 (1 << 18) -#define SP_YUV_BYTE_ORDER_MASK (3 << 16) +#define SP_YUV_ORDER_MASK (3 << 16) #define SP_YUV_ORDER_YUYV (0 << 16) #define SP_YUV_ORDER_UYVY (1 << 16) #define SP_YUV_ORDER_YVYU (2 << 16) @@ -7197,10 +7272,10 @@ enum { #define PLANE_CTL_YUV420_Y_PLANE (1 << 19) #define PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709 (1 << 18) #define PLANE_CTL_YUV422_ORDER_MASK (0x3 << 16) -#define PLANE_CTL_YUV422_YUYV (0 << 16) -#define PLANE_CTL_YUV422_UYVY (1 << 16) -#define PLANE_CTL_YUV422_YVYU (2 << 16) -#define PLANE_CTL_YUV422_VYUY (3 << 16) +#define PLANE_CTL_YUV422_ORDER_YUYV (0 << 16) +#define PLANE_CTL_YUV422_ORDER_UYVY (1 << 16) +#define PLANE_CTL_YUV422_ORDER_YVYU (2 << 16) +#define PLANE_CTL_YUV422_ORDER_VYUY (3 << 16) #define PLANE_CTL_RENDER_DECOMPRESSION_ENABLE (1 << 15) #define PLANE_CTL_TRICKLE_FEED_DISABLE (1 << 14) #define PLANE_CTL_CLEAR_COLOR_DISABLE (1 << 13) /* TGL+ */ @@ -7254,10 +7329,10 @@ enum { #define _PLANE_CUS_CTL_1_A 0x701c8 #define _PLANE_CUS_CTL_2_A 0x702c8 #define PLANE_CUS_ENABLE (1 << 31) -#define PLANE_CUS_PLANE_4_RKL (0 << 30) -#define PLANE_CUS_PLANE_5_RKL (1 << 30) -#define PLANE_CUS_PLANE_6 (0 << 30) -#define PLANE_CUS_PLANE_7 (1 << 30) +#define PLANE_CUS_Y_PLANE_4_RKL (0 << 30) +#define PLANE_CUS_Y_PLANE_5_RKL (1 << 30) +#define PLANE_CUS_Y_PLANE_6_ICL (0 << 30) +#define PLANE_CUS_Y_PLANE_7_ICL (1 << 30) #define PLANE_CUS_HPHASE_SIGN_NEGATIVE (1 << 19) #define PLANE_CUS_HPHASE_0 (0 << 16) #define PLANE_CUS_HPHASE_0_25 (1 << 16) @@ -7289,12 +7364,12 @@ enum { #define _PLANE_NV12_BUF_CFG_1_A 0x70278 #define _PLANE_NV12_BUF_CFG_2_A 0x70378 -#define _PLANE_CC_VAL_1_B 0x711b4 -#define _PLANE_CC_VAL_2_B 0x712b4 -#define _PLANE_CC_VAL_1(pipe) _PIPE(pipe, _PLANE_CC_VAL_1_A, _PLANE_CC_VAL_1_B) -#define _PLANE_CC_VAL_2(pipe) _PIPE(pipe, _PLANE_CC_VAL_2_A, _PLANE_CC_VAL_2_B) -#define PLANE_CC_VAL(pipe, plane) \ - _MMIO_PLANE(plane, _PLANE_CC_VAL_1(pipe), _PLANE_CC_VAL_2(pipe)) +#define _PLANE_CC_VAL_1_B 0x711b4 +#define _PLANE_CC_VAL_2_B 0x712b4 +#define _PLANE_CC_VAL_1(pipe, dw) (_PIPE(pipe, _PLANE_CC_VAL_1_A, _PLANE_CC_VAL_1_B) + (dw) * 4) +#define _PLANE_CC_VAL_2(pipe, dw) (_PIPE(pipe, _PLANE_CC_VAL_2_A, _PLANE_CC_VAL_2_B) + (dw) * 4) +#define PLANE_CC_VAL(pipe, plane, dw) \ + _MMIO_PLANE((plane), _PLANE_CC_VAL_1((pipe), (dw)), _PLANE_CC_VAL_2((pipe), (dw))) /* Input CSC Register Definitions */ #define _PLANE_INPUT_CSC_RY_GY_1_A 0x701E0 @@ -8367,6 +8442,9 @@ enum { #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11) +#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON _MMIO(0x20EC) +#define GEN12_REPLAY_MODE_GRANULARITY REG_BIT(0) + #define GEN8_CS_CHICKEN1 _MMIO(0x2580) #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) @@ -8390,9 +8468,10 @@ enum { #define GEN8_ERRDETBCTRL (1 << 9) #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) - #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) - #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) - #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) +#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) +#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) +#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) +#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) #define HIZ_CHICKEN _MMIO(0x7018) # define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15) @@ -8446,6 +8525,12 @@ enum { #define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) #define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22) +#define GEN11_L3SQCREG5 _MMIO(0xb158) +#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) + +#define XEHP_L3SCQREG7 _MMIO(0xb188) +#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) + /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) #define ICL_HDC_MODE _MMIO(0xE5F4) @@ -8456,6 +8541,12 @@ enum { #define HDC_FORCE_NON_COHERENT (1 << 4) #define HDC_BARRIER_PERFORMANCE_DISABLE (1 << 10) +#define GEN12_HDC_CHICKEN0 _MMIO(0xE5F0) +#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) + +#define SARB_CHICKEN1 _MMIO(0xe90c) +#define COMP_CKN_IN REG_GENMASK(30, 29) + #define GEN8_HDC_CHICKEN1 _MMIO(0x7304) /* GEN9 chicken */ @@ -8483,8 +8574,13 @@ enum { _PIPEB_CHICKEN) #define UNDERRUN_RECOVERY_DISABLE_ADLP REG_BIT(30) #define UNDERRUN_RECOVERY_ENABLE_DG2 REG_BIT(30) -#define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU (1 << 15) -#define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) +#define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU REG_BIT(15) +#define DG2_RENDER_CCSTAG_4_3_EN REG_BIT(12) +#define PER_PIXEL_ALPHA_BYPASS_EN REG_BIT(7) + +#define VFLSKPD _MMIO(0x62a8) +#define DIS_OVER_FETCH_CACHE REG_BIT(1) +#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) #define FF_MODE2 _MMIO(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) @@ -9309,6 +9405,9 @@ enum { #define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1 << 14) #define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1 << 28) +#define UNSLCGCTL9430 _MMIO(0x9430) +#define MSQDUNIT_CLKGATE_DIS REG_BIT(3) + #define GEN6_GFXPAUSE _MMIO(0xA000) #define GEN6_RPNSWREQ _MMIO(0xA008) #define GEN6_TURBO_DISABLE (1 << 31) @@ -9318,6 +9417,7 @@ enum { #define GEN6_OFFSET(x) ((x) << 19) #define GEN6_AGGRESSIVE_TURBO (0 << 15) #define GEN9_SW_REQ_UNSLICE_RATIO_SHIFT 23 +#define GEN9_IGNORE_SLICE_RATIO (0 << 0) #define GEN6_RC_VIDEO_FREQ _MMIO(0xA00C) #define GEN6_RC_CONTROL _MMIO(0xA090) @@ -9353,6 +9453,9 @@ enum { #define GEN6_RP_UP_BUSY_CONT (0x4 << 3) #define GEN6_RP_DOWN_IDLE_AVG (0x2 << 0) #define GEN6_RP_DOWN_IDLE_CONT (0x1 << 0) +#define GEN6_RPSWCTL_SHIFT 9 +#define GEN9_RPSWCTL_ENABLE (0x2 << GEN6_RPSWCTL_SHIFT) +#define GEN9_RPSWCTL_DISABLE (0x0 << GEN6_RPSWCTL_SHIFT) #define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C) #define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030) #define GEN6_RP_CUR_UP_EI _MMIO(0xA050) @@ -9624,24 +9727,39 @@ enum { #define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3) #define GEN8_ROW_CHICKEN _MMIO(0xe4f0) -#define FLOW_CONTROL_ENABLE (1 << 15) -#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1 << 8) -#define STALL_DOP_GATING_DISABLE (1 << 5) -#define THROTTLE_12_5 (7 << 2) -#define DISABLE_EARLY_EOT (1 << 1) +#define FLOW_CONTROL_ENABLE REG_BIT(15) +#define UGM_BACKUP_MODE REG_BIT(13) +#define MDQ_ARBITRATION_MODE REG_BIT(12) +#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8) +#define STALL_DOP_GATING_DISABLE REG_BIT(5) +#define THROTTLE_12_5 REG_GENMASK(4, 2) +#define DISABLE_EARLY_EOT REG_BIT(1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) +#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) +#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8) +#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) +#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4) +#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) +#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) +#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) +#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) + #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) #define DOP_CLOCK_GATING_DISABLE (1 << 0) #define PUSH_CONSTANT_DEREF_DISABLE (1 << 8) #define GEN11_TDL_CLOCK_GATING_FIX_DISABLE (1 << 1) -#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) -#define GEN12_DISABLE_TDL_PUSH REG_BIT(9) -#define GEN11_DIS_PICK_2ND_EU REG_BIT(7) +#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) +#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) +#define GEN12_DISABLE_TDL_PUSH REG_BIT(9) +#define GEN11_DIS_PICK_2ND_EU REG_BIT(7) +#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) @@ -9656,9 +9774,10 @@ enum { #define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) -#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR (1 << 8) -#define GEN9_ENABLE_YV12_BUGFIX (1 << 4) -#define GEN9_ENABLE_GPGPU_PREEMPTION (1 << 2) +#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) +#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8) +#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4) +#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2) /* Audio */ #define G4X_AUD_VID_DID _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x62020) @@ -10541,6 +10660,14 @@ enum skl_power_gate { #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1 << 16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) +/* CDCLK_SQUASH_CTL */ +#define CDCLK_SQUASH_CTL _MMIO(0x46008) +#define CDCLK_SQUASH_ENABLE REG_BIT(31) +#define CDCLK_SQUASH_WINDOW_SIZE_MASK REG_GENMASK(27, 24) +#define CDCLK_SQUASH_WINDOW_SIZE(x) REG_FIELD_PREP(CDCLK_SQUASH_WINDOW_SIZE_MASK, (x)) +#define CDCLK_SQUASH_WAVEFORM_MASK REG_GENMASK(15, 0) +#define CDCLK_SQUASH_WAVEFORM(x) REG_FIELD_PREP(CDCLK_SQUASH_WAVEFORM_MASK, (x)) + /* LCPLL_CTL */ #define LCPLL1_CTL _MMIO(0x46010) #define LCPLL2_CTL _MMIO(0x46014) @@ -12484,11 +12611,19 @@ enum skl_power_gate { #define PMFLUSH_GAPL3UNBLOCK (1 << 21) #define PMFLUSHDONE_LNEBLK (1 << 22) +#define XEHP_L3NODEARBCFG _MMIO(0xb0b4) +#define XEHP_LNESPARE REG_BIT(19) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define GEN12_GSMBASE _MMIO(0x108100) #define GEN12_DSMBASE _MMIO(0x1080C0) +#define XEHP_CLOCK_GATE_DIS _MMIO(0x101014) +#define SGSI_SIDECLK_DIS REG_BIT(17) +#define SGGI_DIS REG_BIT(15) +#define SGR_DIS REG_BIT(13) + /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) #define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ @@ -12865,4 +13000,7 @@ enum skl_power_gate { #define CLKGATE_DIS_MISC _MMIO(0x46534) #define CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21) +#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731C) +#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 42cd17357771..76cf5ac91e94 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,7 @@ #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" @@ -41,6 +42,7 @@ #include "gt/intel_rps.h" #include "i915_active.h" +#include "i915_deps.h" #include "i915_drv.h" #include "i915_trace.h" #include "intel_pm.h" @@ -113,6 +115,10 @@ static void i915_fence_release(struct dma_fence *fence) GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI); + i915_request_free_capture_list(fetch_and_zero(&rq->capture_list)); + if (i915_vma_snapshot_present(&rq->batch_snapshot)) + i915_vma_snapshot_put_onstack(&rq->batch_snapshot); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -186,19 +192,6 @@ void i915_request_notify_execute_cb_imm(struct i915_request *rq) __notify_execute_cb(rq, irq_work_imm); } -static void free_capture_list(struct i915_request *request) -{ - struct i915_capture_list *capture; - - capture = fetch_and_zero(&request->capture_list); - while (capture) { - struct i915_capture_list *next = capture->next; - - kfree(capture); - capture = next; - } -} - static void __i915_request_fill(struct i915_request *rq, u8 val) { void *vaddr = rq->ring->vaddr; @@ -303,6 +296,38 @@ static void __rq_cancel_watchdog(struct i915_request *rq) i915_request_put(rq); } +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +/** + * i915_request_free_capture_list - Free a capture list + * @capture: Pointer to the first list item or NULL + * + */ +void i915_request_free_capture_list(struct i915_capture_list *capture) +{ + while (capture) { + struct i915_capture_list *next = capture->next; + + i915_vma_snapshot_put(capture->vma_snapshot); + kfree(capture); + capture = next; + } +} + +#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list) + +#define clear_capture_list(_rq) ((_rq)->capture_list = NULL) + +#else + +#define i915_request_free_capture_list(_a) do {} while (0) + +#define assert_capture_list_is_null(_a) do {} while (0) + +#define clear_capture_list(_rq) do {} while (0) + +#endif + bool i915_request_retire(struct i915_request *rq) { if (!__i915_request_is_complete(rq)) @@ -339,7 +364,7 @@ bool i915_request_retire(struct i915_request *rq) } if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) - atomic_dec(&rq->engine->gt->rps.num_waiters); + intel_rps_dec_waiters(&rq->engine->gt->rps); /* * We only loosely track inflight requests across preemption, @@ -359,7 +384,6 @@ bool i915_request_retire(struct i915_request *rq) intel_context_exit(rq->context); intel_context_unpin(rq->context); - free_capture_list(rq); i915_sched_node_fini(&rq->sched); i915_request_put(rq); @@ -719,7 +743,7 @@ void i915_request_cancel(struct i915_request *rq, int error) intel_context_cancel_request(rq->context, rq); } -static int __i915_sw_fence_call +static int submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_request *request = @@ -755,7 +779,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static int __i915_sw_fence_call +static int semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); @@ -829,11 +853,18 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); - rq->capture_list = NULL; + clear_capture_list(rq); + rq->batch_snapshot.present = false; init_llist_head(&rq->execute_cb); } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#define clear_batch_ptr(_rq) ((_rq)->batch = NULL) +#else +#define clear_batch_ptr(_a) do {} while (0) +#endif + struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { @@ -925,10 +956,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) i915_sched_node_reinit(&rq->sched); /* No zalloc, everything must be cleared after use */ - rq->batch = NULL; + clear_batch_ptr(rq); __rq_init_watchdog(rq); - GEM_BUG_ON(rq->capture_list); + assert_capture_list_is_null(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); + GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot)); /* * Reserve space in the ring buffer for all the commands required to @@ -1513,6 +1545,27 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) } /** + * i915_request_await_deps - set this request to (async) wait upon a struct + * i915_deps dma_fence collection + * @rq: request we are wishing to use + * @deps: The struct i915_deps containing the dependencies. + * + * Returns 0 if successful, negative error code on error. + */ +int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps) +{ + int i, err; + + for (i = 0; i < deps->num_deps; ++i) { + err = i915_request_await_dma_fence(rq, deps->fences[i]); + if (err) + return err; + } + + return 0; +} + +/** * i915_request_await_object - set this request to (async) wait upon a bo * @to: request we are wishing to use * @obj: object which may be in use on another ring. diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 3c6e8acd1457..170ee78c2858 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -40,19 +40,27 @@ #include "i915_scheduler.h" #include "i915_selftest.h" #include "i915_sw_fence.h" +#include "i915_vma_snapshot.h" #include <uapi/drm/i915_drm.h> struct drm_file; struct drm_i915_gem_object; struct drm_printer; +struct i915_deps; struct i915_request; +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) struct i915_capture_list { + struct i915_vma_snapshot *vma_snapshot; struct i915_capture_list *next; - struct i915_vma *vma; }; +void i915_request_free_capture_list(struct i915_capture_list *capture); +#else +#define i915_request_free_capture_list(_a) do {} while (0) +#endif + #define RQ_TRACE(rq, fmt, ...) do { \ const struct i915_request *rq__ = (rq); \ ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \ @@ -289,10 +297,12 @@ struct i915_request { /** Preallocate space in the ring for the emitting the request */ u32 reserved_space; - /** Batch buffer related to this request if any (used for - * error state dump only). - */ - struct i915_vma *batch; + /** Batch buffer pointer for selftest internal use. */ + I915_SELFTEST_DECLARE(struct i915_vma *batch); + + struct i915_vma_snapshot batch_snapshot; + +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) /** * Additional buffers requested by userspace to be captured upon * a GPU hang. The vma/obj on this list are protected by their @@ -300,6 +310,7 @@ struct i915_request { * on the active_list (of their final request). */ struct i915_capture_list *capture_list; +#endif /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; @@ -401,6 +412,7 @@ int i915_request_await_object(struct i915_request *to, bool write); int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); +int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps); int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence); @@ -647,7 +659,8 @@ i915_request_timeline(const struct i915_request *rq) { /* Valid only while the request is being constructed (or retired). */ return rcu_dereference_protected(rq->timeline, - lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); + lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) || + test_bit(CONTEXT_IS_PARKING, &rq->context->flags)); } static inline struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 4a6712dca838..41f2adb6a583 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st) return true; } +static void i915_refct_sgt_release(struct kref *ref) +{ + struct i915_refct_sgt *rsgt = + container_of(ref, typeof(*rsgt), kref); + + sg_free_table(&rsgt->table); + kfree(rsgt); +} + +static const struct i915_refct_sgt_ops rsgt_ops = { + .release = i915_refct_sgt_release +}; + +/** + * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops + * @rsgt: The struct i915_refct_sgt to initialize. + * size: The size of the underlying memory buffer. + */ +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) +{ + __i915_refct_sgt_init(rsgt, size, &rsgt_ops); +} + /** - * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node + * i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct + * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. * @@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st) * taking a maximum segment length into account, splitting into segments * if necessary. * - * Return: A pointer to a kmalloced struct sg_table on success, negative + * Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative * error code cast to an error pointer on failure. */ -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, - u64 region_start) +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, + u64 region_start) { const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ u64 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; + struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); + if (!rsgt) return ERR_PTR(-ENOMEM); + i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); + st = &rsgt->table; if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), GFP_KERNEL)) { - kfree(st); + i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); } @@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, sg_mark_end(sg); i915_sg_trim(st); - return st; + return rsgt; } /** - * i915_sg_from_buddy_resource - Create an sg_table from a struct + * i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. @@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, * taking a maximum segment length into account, splitting into segments * if necessary. * - * Return: A pointer to a kmalloced struct sg_table on success, negative + * Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative * error code cast to an error pointer on failure. */ -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, - u64 region_start) +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, + u64 region_start) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; @@ -129,18 +156,21 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, struct i915_buddy_mm *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct i915_buddy_block *block; + struct i915_refct_sgt *rsgt; struct scatterlist *sg; struct sg_table *st; resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); + if (!rsgt) return ERR_PTR(-ENOMEM); + i915_refct_sgt_init(rsgt, size); + st = &rsgt->table; if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) { - kfree(st); + i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); } @@ -181,7 +211,7 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, sg_mark_end(sg); i915_sg_trim(st); - return st; + return rsgt; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index b8bd5925b03f..12c6a1684081 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -144,10 +144,78 @@ static inline unsigned int i915_sg_segment_size(void) bool i915_sg_trim(struct sg_table *orig_st); -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, - u64 region_start); +/** + * struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt + */ +struct i915_refct_sgt_ops { + /** + * release() - Free the memory of the struct i915_refct_sgt + * @ref: struct kref that is embedded in the struct i915_refct_sgt + */ + void (*release)(struct kref *ref); +}; + +/** + * struct i915_refct_sgt - A refcounted scatter-gather table + * @kref: struct kref for refcounting + * @table: struct sg_table holding the scatter-gather table itself. Note that + * @table->sgl = NULL can be used to determine whether a scatter-gather table + * is present or not. + * @size: The size in bytes of the underlying memory buffer + * @ops: The operations structure. + */ +struct i915_refct_sgt { + struct kref kref; + struct sg_table table; + size_t size; + const struct i915_refct_sgt_ops *ops; +}; + +/** + * i915_refct_sgt_put - Put a refcounted sg-table + * @rsgt the struct i915_refct_sgt to put. + */ +static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt) +{ + if (rsgt) + kref_put(&rsgt->kref, rsgt->ops->release); +} + +/** + * i915_refct_sgt_get - Get a refcounted sg-table + * @rsgt the struct i915_refct_sgt to get. + */ +static inline struct i915_refct_sgt * +i915_refct_sgt_get(struct i915_refct_sgt *rsgt) +{ + kref_get(&rsgt->kref); + return rsgt; +} + +/** + * __i915_refct_sgt_init - Initialize a refcounted sg-list with a custom + * operations structure + * @rsgt The struct i915_refct_sgt to initialize. + * @size: Size in bytes of the underlying memory buffer. + * @ops: A customized operations structure in case the refcounted sg-list + * is embedded into another structure. + */ +static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, + size_t size, + const struct i915_refct_sgt_ops *ops) +{ + kref_init(&rsgt->kref); + rsgt->table.sgl = NULL; + rsgt->size = size; + rsgt->ops = ops; +} + +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); + +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, + u64 region_start); -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, - u64 region_start); +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, + u64 region_start); #endif diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7ea0dbf81530..2a74a9a1cafe 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -18,7 +18,9 @@ #define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG static DEFINE_SPINLOCK(i915_sw_fence_lock); +#endif #define WQ_FLAG_BITS \ BITS_PER_TYPE(typeof_member(struct wait_queue_entry, flags)) @@ -34,7 +36,7 @@ enum { static void *i915_sw_fence_debug_hint(void *addr) { - return (void *)(((struct i915_sw_fence *)addr)->flags & I915_SW_FENCE_MASK); + return (void *)(((struct i915_sw_fence *)addr)->fn); } #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS @@ -126,10 +128,7 @@ static inline void debug_fence_assert(struct i915_sw_fence *fence) static int __i915_sw_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - i915_sw_fence_notify_t fn; - - fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK); - return fn(fence, state); + return fence->fn(fence, state); } #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS @@ -242,10 +241,13 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence, const char *name, struct lock_class_key *key) { - BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK); + BUG_ON(!fn); __init_waitqueue_head(&fence->wait, name, key); - fence->flags = (unsigned long)fn; + fence->fn = fn; +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG + fence->flags = 0; +#endif i915_sw_fence_reinit(fence); } @@ -257,7 +259,6 @@ void i915_sw_fence_reinit(struct i915_sw_fence *fence) atomic_set(&fence->pending, 1); fence->error = 0; - I915_SW_FENCE_BUG_ON(!fence->flags); I915_SW_FENCE_BUG_ON(!list_empty(&fence->wait.head)); } @@ -279,6 +280,7 @@ static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, return 0; } +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence, const struct i915_sw_fence * const signaler) { @@ -322,9 +324,6 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, unsigned long flags; bool err; - if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG)) - return false; - spin_lock_irqsave(&i915_sw_fence_lock, flags); err = __i915_sw_fence_check_if_after(fence, signaler); __i915_sw_fence_clear_checked_bit(fence); @@ -332,6 +331,13 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, return err; } +#else +static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, + const struct i915_sw_fence * const signaler) +{ + return false; +} +#endif static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *signaler, diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 30a863353ee6..a7c603bc1b01 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -17,26 +17,27 @@ struct completion; struct dma_resv; +struct i915_sw_fence; + +enum i915_sw_fence_notify { + FENCE_COMPLETE, + FENCE_FREE +}; + +typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *, + enum i915_sw_fence_notify state); struct i915_sw_fence { wait_queue_head_t wait; + i915_sw_fence_notify_t fn; +#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG unsigned long flags; +#endif atomic_t pending; int error; }; #define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */ -#define I915_SW_FENCE_PRIVATE_BIT 1 /* available for use by owner */ -#define I915_SW_FENCE_MASK (~3) - -enum i915_sw_fence_notify { - FENCE_COMPLETE, - FENCE_FREE -}; - -typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *, - enum i915_sw_fence_notify state); -#define __i915_sw_fence_call __aligned(4) void __i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn, diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index 5b33ef23d54c..d2e56b387993 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -23,7 +23,7 @@ static void fence_work(struct work_struct *work) dma_fence_put(&f->dma); } -static int __i915_sw_fence_call +static int fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct dma_fence_work *f = container_of(fence, typeof(*f), chain); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1804f4142740..fae4d1f4f275 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -52,7 +52,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, u64 res = 0; with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg); + res = intel_rc6_residency_us(&to_gt(dev_priv)->rc6, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -260,7 +260,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &i915->gt.rps; + struct intel_rps *rps = &to_gt(i915)->rps; return sysfs_emit(buf, "%d\n", intel_rps_read_actual_frequency(rps)); } @@ -269,7 +269,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &i915->gt.rps; + struct intel_rps *rps = &to_gt(i915)->rps; return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps)); } @@ -277,9 +277,9 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &i915->gt.rps; + struct intel_rps *rps = &to_gt(i915)->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->boost_freq)); + return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -287,8 +287,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; - bool boost = false; + struct intel_rps *rps = &to_gt(dev_priv)->rps; ssize_t ret; u32 val; @@ -296,28 +295,16 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, if (ret) return ret; - /* Validate against (static) hardware limits */ - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || val > rps->max_freq) - return -EINVAL; - - mutex_lock(&rps->lock); - if (val != rps->boost_freq) { - rps->boost_freq = val; - boost = atomic_read(&rps->num_waiters); - } - mutex_unlock(&rps->lock); - if (boost) - schedule_work(&rps->work); + ret = intel_rps_set_boost_frequency(rps, val); - return count; + return ret ?: count; } static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &to_gt(dev_priv)->rps; return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->efficient_freq)); } @@ -325,7 +312,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_gt *gt = &dev_priv->gt; + struct intel_gt *gt = to_gt(dev_priv); struct intel_rps *rps = >->rps; return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps)); @@ -336,7 +323,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_gt *gt = &dev_priv->gt; + struct intel_gt *gt = to_gt(dev_priv); struct intel_rps *rps = >->rps; ssize_t ret; u32 val; @@ -353,7 +340,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_rps *rps = >->rps; return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps)); @@ -364,7 +351,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &i915->gt.rps; + struct intel_rps *rps = &to_gt(i915)->rps; ssize_t ret; u32 val; @@ -394,7 +381,7 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &to_gt(dev_priv)->rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6b8fb6ffe8da..37b5c9e9d260 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 + #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _I915_TRACE_H_ @@ -8,582 +12,11 @@ #include <drm/drm_drv.h> -#include "display/intel_crtc.h" -#include "display/intel_display_types.h" #include "gt/intel_engine.h" #include "i915_drv.h" #include "i915_irq.h" -#undef TRACE_SYSTEM -#define TRACE_SYSTEM i915 -#define TRACE_INCLUDE_FILE i915_trace - -/* watermark/fifo updates */ - -TRACE_EVENT(intel_pipe_enable, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __array(u32, frame, 3) - __array(u32, scanline, 3) - __field(enum pipe, pipe) - ), - TP_fast_assign( - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc *it__; - for_each_intel_crtc(&dev_priv->drm, it__) { - __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); - __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); - } - __entry->pipe = crtc->pipe; - ), - - TP_printk("pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", - pipe_name(__entry->pipe), - __entry->frame[PIPE_A], __entry->scanline[PIPE_A], - __entry->frame[PIPE_B], __entry->scanline[PIPE_B], - __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) -); - -TRACE_EVENT(intel_pipe_disable, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __array(u32, frame, 3) - __array(u32, scanline, 3) - __field(enum pipe, pipe) - ), - - TP_fast_assign( - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc *it__; - for_each_intel_crtc(&dev_priv->drm, it__) { - __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); - __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); - } - __entry->pipe = crtc->pipe; - ), - - TP_printk("pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", - pipe_name(__entry->pipe), - __entry->frame[PIPE_A], __entry->scanline[PIPE_A], - __entry->frame[PIPE_B], __entry->scanline[PIPE_B], - __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) -); - -TRACE_EVENT(intel_pipe_crc, - TP_PROTO(struct intel_crtc *crtc, const u32 *crcs), - TP_ARGS(crtc, crcs), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __array(u32, crcs, 5) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - memcpy(__entry->crcs, crcs, sizeof(__entry->crcs)); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u crc=%08x %08x %08x %08x %08x", - pipe_name(__entry->pipe), __entry->frame, __entry->scanline, - __entry->crcs[0], __entry->crcs[1], __entry->crcs[2], - __entry->crcs[3], __entry->crcs[4]) -); - -TRACE_EVENT(intel_cpu_fifo_underrun, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - __entry->pipe = pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), - __entry->frame, __entry->scanline) -); - -TRACE_EVENT(intel_pch_fifo_underrun, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pch_transcoder), - TP_ARGS(dev_priv, pch_transcoder), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - enum pipe pipe = pch_transcoder; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - __entry->pipe = pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pch transcoder %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), - __entry->frame, __entry->scanline) -); - -TRACE_EVENT(intel_memory_cxsr, - TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), - TP_ARGS(dev_priv, old, new), - - TP_STRUCT__entry( - __array(u32, frame, 3) - __array(u32, scanline, 3) - __field(bool, old) - __field(bool, new) - ), - - TP_fast_assign( - struct intel_crtc *crtc; - for_each_intel_crtc(&dev_priv->drm, crtc) { - __entry->frame[crtc->pipe] = intel_crtc_get_vblank_counter(crtc); - __entry->scanline[crtc->pipe] = intel_get_crtc_scanline(crtc); - } - __entry->old = old; - __entry->new = new; - ), - - TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", - onoff(__entry->old), onoff(__entry->new), - __entry->frame[PIPE_A], __entry->scanline[PIPE_A], - __entry->frame[PIPE_B], __entry->scanline[PIPE_B], - __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) -); - -TRACE_EVENT(g4x_wm, - TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm), - TP_ARGS(crtc, wm), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __field(u16, primary) - __field(u16, sprite) - __field(u16, cursor) - __field(u16, sr_plane) - __field(u16, sr_cursor) - __field(u16, sr_fbc) - __field(u16, hpll_plane) - __field(u16, hpll_cursor) - __field(u16, hpll_fbc) - __field(bool, cxsr) - __field(bool, hpll) - __field(bool, fbc) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; - __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; - __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; - __entry->sr_plane = wm->sr.plane; - __entry->sr_cursor = wm->sr.cursor; - __entry->sr_fbc = wm->sr.fbc; - __entry->hpll_plane = wm->hpll.plane; - __entry->hpll_cursor = wm->hpll.cursor; - __entry->hpll_fbc = wm->hpll.fbc; - __entry->cxsr = wm->cxsr; - __entry->hpll = wm->hpll_en; - __entry->fbc = wm->fbc_en; - ), - - TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s", - pipe_name(__entry->pipe), __entry->frame, __entry->scanline, - __entry->primary, __entry->sprite, __entry->cursor, - yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc, - yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc, - yesno(__entry->fbc)) -); - -TRACE_EVENT(vlv_wm, - TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), - TP_ARGS(crtc, wm), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __field(u32, level) - __field(u32, cxsr) - __field(u32, primary) - __field(u32, sprite0) - __field(u32, sprite1) - __field(u32, cursor) - __field(u32, sr_plane) - __field(u32, sr_cursor) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - __entry->level = wm->level; - __entry->cxsr = wm->cxsr; - __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; - __entry->sprite0 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; - __entry->sprite1 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE1]; - __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; - __entry->sr_plane = wm->sr.plane; - __entry->sr_cursor = wm->sr.cursor; - ), - - TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline, __entry->level, __entry->cxsr, - __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor, - __entry->sr_plane, __entry->sr_cursor) -); - -TRACE_EVENT(vlv_fifo_size, - TP_PROTO(struct intel_crtc *crtc, u32 sprite0_start, u32 sprite1_start, u32 fifo_size), - TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __field(u32, sprite0_start) - __field(u32, sprite1_start) - __field(u32, fifo_size) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - __entry->sprite0_start = sprite0_start; - __entry->sprite1_start = sprite1_start; - __entry->fifo_size = fifo_size; - ), - - TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline, __entry->sprite0_start, - __entry->sprite1_start, __entry->fifo_size) -); - -/* plane updates */ - -TRACE_EVENT(intel_plane_update_noarm, - TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), - TP_ARGS(plane, crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __array(int, src, 4) - __array(int, dst, 4) - __string(name, plane->name) - ), - - TP_fast_assign( - __assign_str(name, plane->name); - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); - memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); - ), - - TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, - pipe_name(__entry->pipe), __get_str(name), - __entry->frame, __entry->scanline, - DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), - DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) -); - -TRACE_EVENT(intel_plane_update_arm, - TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), - TP_ARGS(plane, crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __array(int, src, 4) - __array(int, dst, 4) - __string(name, plane->name) - ), - - TP_fast_assign( - __assign_str(name, plane->name); - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); - memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); - ), - - TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, - pipe_name(__entry->pipe), __get_str(name), - __entry->frame, __entry->scanline, - DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), - DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) -); - -TRACE_EVENT(intel_plane_disable_arm, - TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), - TP_ARGS(plane, crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __string(name, plane->name) - ), - - TP_fast_assign( - __assign_str(name, plane->name); - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __get_str(name), - __entry->frame, __entry->scanline) -); - -/* fbc */ - -TRACE_EVENT(intel_fbc_activate, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->frame, __entry->scanline) -); - -TRACE_EVENT(intel_fbc_deactivate, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->frame, __entry->scanline) -); - -TRACE_EVENT(intel_fbc_nuke, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->frame, __entry->scanline) -); - -/* pipe updates */ - -TRACE_EVENT(intel_crtc_vblank_work_start, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline) -); - -TRACE_EVENT(intel_crtc_vblank_work_end, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline) -); - -TRACE_EVENT(intel_pipe_update_start, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __field(u32, min) - __field(u32, max) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = intel_crtc_get_vblank_counter(crtc); - __entry->scanline = intel_get_crtc_scanline(crtc); - __entry->min = crtc->debug.min_vbl; - __entry->max = crtc->debug.max_vbl; - ), - - TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline, __entry->min, __entry->max) -); - -TRACE_EVENT(intel_pipe_update_vblank_evaded, - TP_PROTO(struct intel_crtc *crtc), - TP_ARGS(crtc), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - __field(u32, min) - __field(u32, max) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = crtc->debug.start_vbl_count; - __entry->scanline = crtc->debug.scanline_start; - __entry->min = crtc->debug.min_vbl; - __entry->max = crtc->debug.max_vbl; - ), - - TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline, __entry->min, __entry->max) -); - -TRACE_EVENT(intel_pipe_update_end, - TP_PROTO(struct intel_crtc *crtc, u32 frame, int scanline_end), - TP_ARGS(crtc, frame, scanline_end), - - TP_STRUCT__entry( - __field(enum pipe, pipe) - __field(u32, frame) - __field(u32, scanline) - ), - - TP_fast_assign( - __entry->pipe = crtc->pipe; - __entry->frame = frame; - __entry->scanline = scanline_end; - ), - - TP_printk("pipe %c, frame=%u, scanline=%u", - pipe_name(__entry->pipe), __entry->frame, - __entry->scanline) -); - -/* frontbuffer tracking */ - -TRACE_EVENT(intel_frontbuffer_invalidate, - TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin), - TP_ARGS(frontbuffer_bits, origin), - - TP_STRUCT__entry( - __field(unsigned int, frontbuffer_bits) - __field(unsigned int, origin) - ), - - TP_fast_assign( - __entry->frontbuffer_bits = frontbuffer_bits; - __entry->origin = origin; - ), - - TP_printk("frontbuffer_bits=0x%08x, origin=%u", - __entry->frontbuffer_bits, __entry->origin) -); - -TRACE_EVENT(intel_frontbuffer_flush, - TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin), - TP_ARGS(frontbuffer_bits, origin), - - TP_STRUCT__entry( - __field(unsigned int, frontbuffer_bits) - __field(unsigned int, origin) - ), - - TP_fast_assign( - __entry->frontbuffer_bits = frontbuffer_bits; - __entry->origin = origin; - ), - - TP_printk("frontbuffer_bits=0x%08x, origin=%u", - __entry->frontbuffer_bits, __entry->origin) -); - /* object tracking */ TRACE_EVENT(i915_gem_object_create, @@ -1331,5 +764,7 @@ DEFINE_EVENT(i915_context, i915_context_free, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 +#define TRACE_INCLUDE_FILE i915_trace #include <trace/define_trace.h> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bef795e265a6..29a858c53bdd 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -40,12 +40,12 @@ static struct kmem_cache *slab_vmas; -struct i915_vma *i915_vma_alloc(void) +static struct i915_vma *i915_vma_alloc(void) { return kmem_cache_zalloc(slab_vmas, GFP_KERNEL); } -void i915_vma_free(struct i915_vma *vma) +static void i915_vma_free(struct i915_vma *vma) { return kmem_cache_free(slab_vmas, vma); } @@ -109,11 +109,9 @@ vma_create(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); kref_init(&vma->ref); - mutex_init(&vma->pages_mutex); vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; - vma->resv = obj->base.resv; vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; @@ -346,7 +344,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) fence = dma_fence_get_rcu_safe(&vma->active.excl.fence); rcu_read_unlock(); if (fence) { - err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT); + err = dma_fence_wait(fence, true); dma_fence_put(fence); } } @@ -354,6 +352,28 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) return err; } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +static int i915_vma_verify_bind_complete(struct i915_vma *vma) +{ + struct dma_fence *fence = i915_active_fence_get(&vma->active.excl); + int err; + + if (!fence) + return 0; + + if (dma_fence_is_signaled(fence)) + err = fence->error; + else + err = -EBUSY; + + dma_fence_put(fence); + + return err; +} +#else +#define i915_vma_verify_bind_complete(_vma) 0 +#endif + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map @@ -373,6 +393,7 @@ int i915_vma_bind(struct i915_vma *vma, u32 bind_flags; u32 vma_flags; + lockdep_assert_held(&vma->vm->mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->size > vma->node.size); @@ -394,7 +415,7 @@ int i915_vma_bind(struct i915_vma *vma, if (bind_flags == 0) return 0; - GEM_BUG_ON(!vma->pages); + GEM_BUG_ON(!atomic_read(&vma->pages_count)); trace_i915_vma_bind(vma, bind_flags); if (work && bind_flags & vma->vm->bind_async_flags) { @@ -423,11 +444,16 @@ int i915_vma_bind(struct i915_vma *vma, work->base.dma.error = 0; /* enable the queue_work() */ + __i915_gem_object_pin_pages(vma->obj); + work->pinned = i915_gem_object_get(vma->obj); + } else { if (vma->obj) { - __i915_gem_object_pin_pages(vma->obj); - work->pinned = i915_gem_object_get(vma->obj); + int ret; + + ret = i915_gem_object_wait_moving_fence(vma->obj, true); + if (ret) + return ret; } - } else { vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } @@ -449,6 +475,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); + GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { @@ -667,7 +694,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } color = 0; - if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) + if (i915_vm_has_cache_coloring(vma->vm)) color = vma->obj->cache_level; if (flags & PIN_OFFSET_FIXED) { @@ -789,40 +816,356 @@ unpinned: return pinned; } -static int vma_get_pages(struct i915_vma *vma) +static struct scatterlist * +rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int src_stride, unsigned int dst_stride, + struct sg_table *st, struct scatterlist *sg) { - int err = 0; - bool pinned_pages = false; + unsigned int column, row; + unsigned int src_idx; - if (atomic_add_unless(&vma->pages_count, 1, 0)) - return 0; + for (column = 0; column < width; column++) { + unsigned int left; - if (vma->obj) { - err = i915_gem_object_pin_pages(vma->obj); - if (err) - return err; - pinned_pages = true; + src_idx = src_stride * (height - 1) + column + offset; + for (row = 0; row < height; row++) { + st->nents++; + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); + sg_dma_address(sg) = + i915_gem_object_get_dma_address(obj, src_idx); + sg_dma_len(sg) = I915_GTT_PAGE_SIZE; + sg = sg_next(sg); + src_idx -= src_stride; + } + + left = (dst_stride - height) * I915_GTT_PAGE_SIZE; + + if (!left) + continue; + + st->nents++; + + /* + * The DE ignores the PTEs for the padding tiles, the sg entry + * here is just a conenience to indicate how many padding PTEs + * to insert at this spot. + */ + sg_set_page(sg, NULL, left, 0); + sg_dma_address(sg) = 0; + sg_dma_len(sg) = left; + sg = sg_next(sg); + } + + return sg; +} + +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_rotation_info_size(rot_info); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) + sg = rotate_pages(obj, rot_info->plane[i].offset, + rot_info->plane[i].width, rot_info->plane[i].height, + rot_info->plane[i].src_stride, + rot_info->plane[i].dst_stride, + st, sg); + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, + rot_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static struct scatterlist * +remap_pages(struct drm_i915_gem_object *obj, + unsigned int offset, unsigned int alignment_pad, + unsigned int width, unsigned int height, + unsigned int src_stride, unsigned int dst_stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int row; + + if (!width || !height) + return sg; + + if (alignment_pad) { + st->nents++; + + /* + * The DE ignores the PTEs for the padding tiles, the sg entry + * here is just a convenience to indicate how many padding PTEs + * to insert at this spot. + */ + sg_set_page(sg, NULL, alignment_pad * 4096, 0); + sg_dma_address(sg) = 0; + sg_dma_len(sg) = alignment_pad * 4096; + sg = sg_next(sg); } - /* Allocations ahoy! */ - if (mutex_lock_interruptible(&vma->pages_mutex)) { - err = -EINTR; - goto unpin; + for (row = 0; row < height; row++) { + unsigned int left = width * I915_GTT_PAGE_SIZE; + + while (left) { + dma_addr_t addr; + unsigned int length; + + /* + * We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + + addr = i915_gem_object_get_dma_address_len(obj, offset, &length); + + length = min(left, length); + + st->nents++; + + sg_set_page(sg, NULL, length, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = length; + sg = sg_next(sg); + + offset += length / I915_GTT_PAGE_SIZE; + left -= length; + } + + offset += src_stride - width; + + left = (dst_stride - width) * I915_GTT_PAGE_SIZE; + + if (!left) + continue; + + st->nents++; + + /* + * The DE ignores the PTEs for the padding tiles, the sg entry + * here is just a conenience to indicate how many padding PTEs + * to insert at this spot. + */ + sg_set_page(sg, NULL, left, 0); + sg_dma_address(sg) = 0; + sg_dma_len(sg) = left; + sg = sg_next(sg); } - if (!atomic_read(&vma->pages_count)) { - err = vma->ops->set_pages(vma); - if (err) - goto unlock; - pinned_pages = false; + return sg; +} + +static noinline struct sg_table * +intel_remap_pages(struct intel_remapped_info *rem_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_remapped_info_size(rem_info); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int gtt_offset = 0; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { + unsigned int alignment_pad = 0; + + if (rem_info->plane_alignment) + alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset; + + sg = remap_pages(obj, + rem_info->plane[i].offset, alignment_pad, + rem_info->plane[i].width, rem_info->plane[i].height, + rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride, + st, sg); + + gtt_offset += alignment_pad + + rem_info->plane[i].dst_stride * rem_info->plane[i].height; + } + + i915_sg_trim(st); + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rem_info->plane[0].width, + rem_info->plane[0].height, size); + + return ERR_PTR(ret); +} + +static noinline struct sg_table * +intel_partial_pages(const struct i915_ggtt_view *view, + struct drm_i915_gem_object *obj) +{ + struct sg_table *st; + struct scatterlist *sg, *iter; + unsigned int count = view->partial.size; + unsigned int offset; + int ret = -ENOMEM; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, count, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); + GEM_BUG_ON(!iter); + + sg = st->sgl; + st->nents = 0; + do { + unsigned int len; + + len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), + count << PAGE_SHIFT); + sg_set_page(sg, NULL, len, 0); + sg_dma_address(sg) = + sg_dma_address(iter) + (offset << PAGE_SHIFT); + sg_dma_len(sg) = len; + + st->nents++; + count -= len >> PAGE_SHIFT; + if (count == 0) { + sg_mark_end(sg); + i915_sg_trim(st); /* Drop any unused tail entries. */ + + return st; + } + + sg = __sg_next(sg); + iter = __sg_next(iter); + offset = 0; + } while (1); + +err_sg_alloc: + kfree(st); +err_st_alloc: + return ERR_PTR(ret); +} + +static int +__i915_vma_get_pages(struct i915_vma *vma) +{ + struct sg_table *pages; + int ret; + + /* + * The vma->pages are only valid within the lifespan of the borrowed + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so + * must be the vma->pages. A simple rule is that vma->pages must only + * be accessed when the obj->mm.pages are pinned. + */ + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); + + switch (vma->ggtt_view.type) { + default: + GEM_BUG_ON(vma->ggtt_view.type); + fallthrough; + case I915_GGTT_VIEW_NORMAL: + pages = vma->obj->mm.pages; + break; + + case I915_GGTT_VIEW_ROTATED: + pages = + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_REMAPPED: + pages = + intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: + pages = intel_partial_pages(&vma->ggtt_view, vma->obj); + break; } + + ret = 0; + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + pages = NULL; + drm_err(&vma->vm->i915->drm, + "Failed to get pages for VMA view type %u (%d)!\n", + vma->ggtt_view.type, ret); + } + + vma->pages = pages; + + return ret; +} + +I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma) +{ + int err; + + if (atomic_add_unless(&vma->pages_count, 1, 0)) + return 0; + + err = i915_gem_object_pin_pages(vma->obj); + if (err) + return err; + + err = __i915_vma_get_pages(vma); + if (err) + goto err_unpin; + + vma->page_sizes = vma->obj->mm.page_sizes; atomic_inc(&vma->pages_count); -unlock: - mutex_unlock(&vma->pages_mutex); -unpin: - if (pinned_pages) - __i915_gem_object_unpin_pages(vma->obj); + return 0; + +err_unpin: + __i915_gem_object_unpin_pages(vma->obj); return err; } @@ -830,18 +1173,31 @@ unpin: static void __vma_put_pages(struct i915_vma *vma, unsigned int count) { /* We allocate under vma_get_pages, so beware the shrinker */ - mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING); + struct sg_table *pages = READ_ONCE(vma->pages); + GEM_BUG_ON(atomic_read(&vma->pages_count) < count); + if (atomic_sub_return(count, &vma->pages_count) == 0) { - vma->ops->clear_pages(vma); - GEM_BUG_ON(vma->pages); - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); + /* + * The atomic_sub_return is a read barrier for the READ_ONCE of + * vma->pages above. + * + * READ_ONCE is safe because this is either called from the same + * function (i915_vma_pin_ww), or guarded by vma->vm->mutex. + * + * TODO: We're leaving vma->pages dangling, until vma->obj->resv + * lock is required. + */ + if (pages != vma->obj->mm.pages) { + sg_free_table(pages); + kfree(pages); + } + + i915_gem_object_unpin_pages(vma->obj); } - mutex_unlock(&vma->pages_mutex); } -static void vma_put_pages(struct i915_vma *vma) +I915_SELFTEST_EXPORT void i915_vma_put_pages(struct i915_vma *vma) { if (atomic_add_unless(&vma->pages_count, -1, 1)) return; @@ -867,14 +1223,13 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags) { struct i915_vma_work *work = NULL; + struct dma_fence *moving = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err; -#ifdef CONFIG_PROVE_LOCKING - if (debug_locks && !WARN_ON(!ww) && vma->resv) - assert_vma_held(vma); -#endif + assert_vma_held(vma); + GEM_BUG_ON(!ww); BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); @@ -885,14 +1240,15 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK)) return 0; - err = vma_get_pages(vma); + err = i915_vma_get_pages(vma); if (err) return err; if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); - if (flags & vma->vm->bind_async_flags) { + moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; + if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww); if (err) @@ -906,6 +1262,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, work->vm = i915_vm_get(vma->vm); + dma_fence_work_chain(&work->base, moving); + /* Allocate enough page directories to used PTE */ if (vma->vm->allocate_va_range) { err = i915_vm_alloc_pt_stash(vma->vm, @@ -980,7 +1338,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, - vma->obj ? vma->obj->cache_level : 0, + vma->obj->cache_level, flags, work); if (err) goto err_remove; @@ -1010,7 +1368,11 @@ err_fence: err_rpm: if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); - vma_put_pages(vma); + + if (moving) + dma_fence_put(moving); + + i915_vma_put_pages(vma); return err; } @@ -1025,23 +1387,15 @@ static void flush_idle_contexts(struct intel_gt *gt) intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } -int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, - u32 align, unsigned int flags) +static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u32 align, unsigned int flags) { struct i915_address_space *vm = vma->vm; int err; - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - -#ifdef CONFIG_LOCKDEP - WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv)); -#endif - do { - if (ww) - err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); - else - err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); + err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); + if (err != -ENOSPC) { if (!err) { err = i915_vma_wait_for_bind(vma); @@ -1060,6 +1414,30 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, } while (1); } +int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u32 align, unsigned int flags) +{ + struct i915_gem_ww_ctx _ww; + int err; + + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + + if (ww) + return __i915_ggtt_pin(vma, ww, align, flags); + +#ifdef CONFIG_LOCKDEP + WARN_ON(dma_resv_held(vma->obj->base.resv)); +#endif + + for_i915_gem_ww(&_ww, err, true) { + err = i915_gem_object_lock(vma->obj, &_ww); + if (!err) + err = __i915_ggtt_pin(vma, &_ww, align, flags); + } + + return err; +} + static void __vma_close(struct i915_vma *vma, struct intel_gt *gt) { /* @@ -1113,6 +1491,7 @@ void i915_vma_reopen(struct i915_vma *vma) void i915_vma_release(struct kref *ref) { struct i915_vma *vma = container_of(ref, typeof(*vma), ref); + struct drm_i915_gem_object *obj = vma->obj; if (drm_mm_node_allocated(&vma->node)) { mutex_lock(&vma->vm->mutex); @@ -1123,15 +1502,11 @@ void i915_vma_release(struct kref *ref) } GEM_BUG_ON(i915_vma_is_active(vma)); - if (vma->obj) { - struct drm_i915_gem_object *obj = vma->obj; - - spin_lock(&obj->vma.lock); - list_del(&vma->obj_link); - if (!RB_EMPTY_NODE(&vma->obj_node)) - rb_erase(&vma->obj_node, &obj->vma.tree); - spin_unlock(&obj->vma.lock); - } + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + if (!RB_EMPTY_NODE(&vma->obj_node)) + rb_erase(&vma->obj_node, &obj->vma.tree); + spin_unlock(&obj->vma.lock); __i915_vma_remove_closed(vma); i915_vm_put(vma->vm); @@ -1217,7 +1592,7 @@ __i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) return __i915_request_await_exclusive(rq, &vma->active); } -int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) +static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) { int err; @@ -1256,19 +1631,19 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (fence) { - dma_resv_add_excl_fence(vma->resv, fence); + dma_resv_add_excl_fence(vma->obj->base.resv, fence); obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0; } } else { if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_shared(vma->resv, 1); + err = dma_resv_reserve_shared(vma->obj->base.resv, 1); if (unlikely(err)) return err; } if (fence) { - dma_resv_add_shared_fence(vma->resv, fence); + dma_resv_add_shared_fence(vma->obj->base.resv, fence); obj->write_domain = 0; } } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 648dbe744c96..32719431b3df 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,8 +55,6 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) /* do not reserve memory to prevent deadlocks */ #define __EXEC_OBJECT_NO_RESERVE BIT(31) -int __must_check __i915_vma_move_to_active(struct i915_vma *vma, - struct i915_request *rq); int __must_check _i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, struct dma_fence *fence, @@ -234,16 +232,16 @@ static inline void __i915_vma_put(struct i915_vma *vma) kref_put(&vma->ref, i915_vma_release); } -#define assert_vma_held(vma) dma_resv_assert_held((vma)->resv) +#define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv) static inline void i915_vma_lock(struct i915_vma *vma) { - dma_resv_lock(vma->resv, NULL); + dma_resv_lock(vma->obj->base.resv, NULL); } static inline void i915_vma_unlock(struct i915_vma *vma) { - dma_resv_unlock(vma->resv); + dma_resv_unlock(vma->obj->base.resv); } int __must_check @@ -418,9 +416,6 @@ static inline void i915_vma_clear_scanout(struct i915_vma *vma) list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) -struct i915_vma *i915_vma_alloc(void); -void i915_vma_free(struct i915_vma *vma); - struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); @@ -436,4 +431,7 @@ static inline int i915_vma_sync(struct i915_vma *vma) void i915_vma_module_exit(void); int i915_vma_module_init(void); +I915_SELFTEST_DECLARE(int i915_vma_get_pages(struct i915_vma *vma)); +I915_SELFTEST_DECLARE(void i915_vma_put_pages(struct i915_vma *vma)); + #endif diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c b/drivers/gpu/drm/i915/i915_vma_snapshot.c new file mode 100644 index 000000000000..2949ceea9884 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma_snapshot.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_vma_snapshot.h" +#include "i915_vma_types.h" +#include "i915_vma.h" + +/** + * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from + * a struct i915_vma. + * @vsnap: The i915_vma_snapshot to init. + * @vma: A struct i915_vma used to initialize @vsnap. + * @name: Name associated with the snapshot. The character pointer needs to + * stay alive over the lifitime of the shapsot + */ +void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name) +{ + if (!i915_vma_is_pinned(vma)) + assert_object_held(vma->obj); + + vsnap->name = name; + vsnap->size = vma->size; + vsnap->obj_size = vma->obj->base.size; + vsnap->gtt_offset = vma->node.start; + vsnap->gtt_size = vma->node.size; + vsnap->page_sizes = vma->page_sizes.gtt; + vsnap->pages = vma->pages; + vsnap->pages_rsgt = NULL; + vsnap->mr = NULL; + if (vma->obj->mm.rsgt) + vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt); + vsnap->mr = vma->obj->mm.region; + kref_init(&vsnap->kref); + vsnap->vma_resource = &vma->active; + vsnap->onstack = false; + vsnap->present = true; +} + +/** + * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from + * a struct i915_vma, but avoid kfreeing it on last put. + * @vsnap: The i915_vma_snapshot to init. + * @vma: A struct i915_vma used to initialize @vsnap. + * @name: Name associated with the snapshot. The character pointer needs to + * stay alive over the lifitime of the shapsot + */ +void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name) +{ + i915_vma_snapshot_init(vsnap, vma, name); + vsnap->onstack = true; +} + +static void vma_snapshot_release(struct kref *ref) +{ + struct i915_vma_snapshot *vsnap = + container_of(ref, typeof(*vsnap), kref); + + vsnap->present = false; + if (vsnap->pages_rsgt) + i915_refct_sgt_put(vsnap->pages_rsgt); + if (!vsnap->onstack) + kfree(vsnap); +} + +/** + * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference + * @vsnap: The pointer reference + */ +void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap) +{ + kref_put(&vsnap->kref, vma_snapshot_release); +} + +/** + * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer + * reference and varify that the structure is released + * @vsnap: The pointer reference + * + * This function is intended to be paired with a i915_vma_init_onstack() + * and should be called before exiting the scope that declared or + * freeing the structure that embedded @vsnap to verify that all references + * have been released. + */ +void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap) +{ + if (!kref_put(&vsnap->kref, vma_snapshot_release)) + GEM_BUG_ON(1); +} + +/** + * i915_vma_snapshot_resource_pin - Temporarily block the memory the + * vma snapshot is pointing to from being released. + * @vsnap: The vma snapshot. + * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs + * to be passed to the paired i915_vma_snapshot_resource_unpin. + * + * This function will temporarily try to hold up a fence or similar structure + * and will therefore enter a fence signaling critical section. + * + * Return: true if we succeeded in blocking the memory from being released, + * false otherwise. + */ +bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap, + bool *lockdep_cookie) +{ + bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource); + + if (pinned) + *lockdep_cookie = dma_fence_begin_signalling(); + + return pinned; +} + +/** + * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from + * being released. + * @vsnap: The vma snapshot. + * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin(). + * + * Might leave a fence signalling critical section and signal a fence. + */ +void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap, + bool lockdep_cookie) +{ + dma_fence_end_signalling(lockdep_cookie); + + return i915_active_release(vsnap->vma_resource); +} diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h b/drivers/gpu/drm/i915/i915_vma_snapshot.h new file mode 100644 index 000000000000..940581df4622 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma_snapshot.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_VMA_SNAPSHOT_H_ +#define _I915_VMA_SNAPSHOT_H_ + +#include <linux/kref.h> +#include <linux/slab.h> +#include <linux/types.h> + +struct i915_active; +struct i915_refct_sgt; +struct i915_vma; +struct intel_memory_region; +struct sg_table; + +/** + * DOC: Simple utilities for snapshotting GPU vma metadata, later used for + * error capture. Vi use a separate header for this to avoid issues due to + * recursive header includes. + */ + +/** + * struct i915_vma_snapshot - Snapshot of vma metadata. + * @size: The vma size in bytes. + * @obj_size: The size of the underlying object in bytes. + * @gtt_offset: The gtt offset the vma is bound to. + * @gtt_size: The size in bytes allocated for the vma in the GTT. + * @pages: The struct sg_table pointing to the pages bound. + * @pages_rsgt: The refcounted sg_table holding the reference for @pages if any. + * @mr: The memory region pointed for the pages bound. + * @kref: Reference for this structure. + * @vma_resource: FIXME: A means to keep the unbind fence from signaling. + * Temporarily while we have only sync unbinds, and still use the vma + * active, we use that. With async unbinding we need a signaling refcount + * for the unbind fence. + * @page_sizes: The vma GTT page sizes information. + * @onstack: Whether the structure shouldn't be freed on final put. + * @present: Whether the structure is present and initialized. + */ +struct i915_vma_snapshot { + const char *name; + size_t size; + size_t obj_size; + size_t gtt_offset; + size_t gtt_size; + struct sg_table *pages; + struct i915_refct_sgt *pages_rsgt; + struct intel_memory_region *mr; + struct kref kref; + struct i915_active *vma_resource; + u32 page_sizes; + bool onstack:1; + bool present:1; +}; + +void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name); + +void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap, + struct i915_vma *vma, + const char *name); + +void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap); + +void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap); + +bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap, + bool *lockdep_cookie); + +void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap, + bool lockdep_cookie); + +/** + * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot + * @gfp: Allocation mode. + * + * Return: A pointer to a struct i915_vma_snapshot if successful. + * NULL otherwise. + */ +static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp) +{ + return kmalloc(sizeof(struct i915_vma_snapshot), gfp); +} + +/** + * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot + * + * Return: A pointer to a struct i915_vma_snapshot. + */ +static inline struct i915_vma_snapshot * +i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap) +{ + kref_get(&vsnap->kref); + return vsnap; +} + +/** + * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is + * present and initialized. + * + * Return: true if present and initialized; false otherwise. + */ +static inline bool +i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap) +{ + return vsnap && vsnap->present; +} + +#endif diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 4ee6e54799f4..ca575e129ced 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -187,7 +187,6 @@ struct i915_vma { const struct i915_vma_ops *ops; struct drm_i915_gem_object *obj; - struct dma_resv *resv; /** Alias of obj->resv */ struct sg_table *pages; void __iomem *iomap; @@ -271,7 +270,6 @@ struct i915_vma { #define I915_VMA_PAGES_BIAS 24 #define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) atomic_t pages_count; /* number of active binds to the pages */ - struct mutex pages_mutex; /* protect acquire/release of backing pages */ /** * Support different GGTT views into the same object. diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 305facedd284..04fd266d70e2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -83,33 +83,26 @@ const char *intel_platform_name(enum intel_platform platform) return platform_names[platform]; } -static const char *iommu_name(void) -{ - const char *msg = "n/a"; - -#ifdef CONFIG_INTEL_IOMMU - msg = enableddisabled(intel_iommu_gfx_mapped); -#endif - - return msg; -} - void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - if (info->graphics_rel) - drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel); + if (info->graphics.rel) + drm_printf(p, "graphics version: %u.%02u\n", info->graphics.ver, + info->graphics.rel); else - drm_printf(p, "graphics version: %u\n", info->graphics_ver); + drm_printf(p, "graphics version: %u\n", info->graphics.ver); - if (info->media_rel) - drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel); + if (info->media.rel) + drm_printf(p, "media version: %u.%02u\n", info->media.ver, info->media.rel); else - drm_printf(p, "media version: %u\n", info->media_ver); + drm_printf(p, "media version: %u\n", info->media.ver); + + if (info->display.rel) + drm_printf(p, "display version: %u.%02u\n", info->display.ver, info->display.rel); + else + drm_printf(p, "display version: %u\n", info->display.ver); - drm_printf(p, "display version: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); - drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); drm_printf(p, "page-sizes: %x\n", info->page_sizes); drm_printf(p, "platform: %s\n", intel_platform_name(info->platform)); @@ -177,6 +170,10 @@ static const u16 subplatform_portf_ids[] = { INTEL_ICL_PORT_F_IDS(0), }; +static const u16 subplatform_rpls_ids[] = { + INTEL_RPLS_IDS(0), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -213,6 +210,9 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_portf_ids, ARRAY_SIZE(subplatform_portf_ids))) { mask = BIT(INTEL_SUBPLATFORM_PORTF); + } else if (find_devid(devid, subplatform_rpls_ids, + ARRAY_SIZE(subplatform_rpls_ids))) { + mask = BIT(INTEL_SUBPLATFORM_RPL_S); } if (IS_TIGERLAKE(i915)) { @@ -326,33 +326,33 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) { drm_info(&dev_priv->drm, "Display fused off, disabling\n"); - info->pipe_mask = 0; - info->cpu_transcoder_mask = 0; + info->display.pipe_mask = 0; + info->display.cpu_transcoder_mask = 0; } else if (fuse_strap & IVB_PIPE_C_DISABLE) { drm_info(&dev_priv->drm, "PipeC fused off\n"); - info->pipe_mask &= ~BIT(PIPE_C); - info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); + info->display.pipe_mask &= ~BIT(PIPE_C); + info->display.cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) { u32 dfsm = intel_de_read(dev_priv, SKL_DFSM); if (dfsm & SKL_DFSM_PIPE_A_DISABLE) { - info->pipe_mask &= ~BIT(PIPE_A); - info->cpu_transcoder_mask &= ~BIT(TRANSCODER_A); + info->display.pipe_mask &= ~BIT(PIPE_A); + info->display.cpu_transcoder_mask &= ~BIT(TRANSCODER_A); } if (dfsm & SKL_DFSM_PIPE_B_DISABLE) { - info->pipe_mask &= ~BIT(PIPE_B); - info->cpu_transcoder_mask &= ~BIT(TRANSCODER_B); + info->display.pipe_mask &= ~BIT(PIPE_B); + info->display.cpu_transcoder_mask &= ~BIT(TRANSCODER_B); } if (dfsm & SKL_DFSM_PIPE_C_DISABLE) { - info->pipe_mask &= ~BIT(PIPE_C); - info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); + info->display.pipe_mask &= ~BIT(PIPE_C); + info->display.cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } if (DISPLAY_VER(dev_priv) >= 12 && (dfsm & TGL_DFSM_PIPE_D_DISABLE)) { - info->pipe_mask &= ~BIT(PIPE_D); - info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D); + info->display.pipe_mask &= ~BIT(PIPE_D); + info->display.cpu_transcoder_mask &= ~BIT(TRANSCODER_D); } if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE) @@ -369,7 +369,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->display.has_dsc = 0; } - if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active()) { + if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active(dev_priv)) { drm_info(&dev_priv->drm, "Disabling ppGTT for VT-d support\n"); info->ppgtt_type = INTEL_PPGTT_NONE; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 8e6f48d1eb7b..78597d382445 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -110,6 +110,9 @@ enum intel_platform { #define INTEL_SUBPLATFORM_G10 0 #define INTEL_SUBPLATFORM_G11 1 +/* ADL-S */ +#define INTEL_SUBPLATFORM_RPL_S 0 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, @@ -123,6 +126,7 @@ enum intel_ppgtt_type { func(is_dgfx); \ /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ + func(has_64k_pages); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ func(has_global_mocs); \ @@ -166,11 +170,14 @@ enum intel_ppgtt_type { func(overlay_needs_physical); \ func(supports_tv); +struct ip_version { + u8 ver; + u8 rel; +}; + struct intel_device_info { - u8 graphics_ver; - u8 graphics_rel; - u8 media_ver; - u8 media_rel; + struct ip_version graphics; + struct ip_version media; intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ @@ -189,17 +196,17 @@ struct intel_device_info { u8 gt; /* GT number, 0 if undefined */ - u8 pipe_mask; - u8 cpu_transcoder_mask; - - u8 abox_mask; - #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG struct { u8 ver; + u8 rel; + + u8 pipe_mask; + u8 cpu_transcoder_mask; + u8 abox_mask; #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_DISPLAY_FOR_EACH_FLAG(DEFINE_FLAG); diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 4e70c1a9ef2e..cf6e98962d82 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -109,7 +109,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return 0; } - if (intel_uc_wants_guc_submission(&dev_priv->gt.uc)) { + if (intel_uc_wants_guc_submission(&to_gt(dev_priv)->uc)) { drm_err(&dev_priv->drm, "i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n"); return -EIO; diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e7f7e6627750..c70d7e286a51 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -3,6 +3,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/prandom.h> + #include "intel_memory_region.h" #include "i915_drv.h" #include "i915_ttm_buddy_manager.h" @@ -29,6 +31,110 @@ static const struct { }, }; +static int __iopagetest(struct intel_memory_region *mem, + u8 __iomem *va, int pagesize, + u8 value, resource_size_t offset, + const void *caller) +{ + int byte = prandom_u32_max(pagesize); + u8 result[3]; + + memset_io(va, value, pagesize); /* or GPF! */ + wmb(); + + result[0] = ioread8(va); + result[1] = ioread8(va + byte); + result[2] = ioread8(va + pagesize - 1); + if (memchr_inv(result, value, sizeof(result))) { + dev_err(mem->i915->drm.dev, + "Failed to read back from memory region:%pR at [%pa + %pa] for %ps; wrote %x, read (%x, %x, %x)\n", + &mem->region, &mem->io_start, &offset, caller, + value, result[0], result[1], result[2]); + return -EINVAL; + } + + return 0; +} + +static int iopagetest(struct intel_memory_region *mem, + resource_size_t offset, + const void *caller) +{ + const u8 val[] = { 0x0, 0xa5, 0xc3, 0xf0 }; + void __iomem *va; + int err; + int i; + + va = ioremap_wc(mem->io_start + offset, PAGE_SIZE); + if (!va) { + dev_err(mem->i915->drm.dev, + "Failed to ioremap memory region [%pa + %pa] for %ps\n", + &mem->io_start, &offset, caller); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(val); i++) { + err = __iopagetest(mem, va, PAGE_SIZE, val[i], offset, caller); + if (err) + break; + + err = __iopagetest(mem, va, PAGE_SIZE, ~val[i], offset, caller); + if (err) + break; + } + + iounmap(va); + return err; +} + +static resource_size_t random_page(resource_size_t last) +{ + /* Limited to low 44b (16TiB), but should suffice for a spot check */ + return prandom_u32_max(last >> PAGE_SHIFT) << PAGE_SHIFT; +} + +static int iomemtest(struct intel_memory_region *mem, + bool test_all, + const void *caller) +{ + resource_size_t last = resource_size(&mem->region) - PAGE_SIZE; + resource_size_t page; + int err; + + /* + * Quick test to check read/write access to the iomap (backing store). + * + * Write a byte, read it back. If the iomapping fails, we expect + * a GPF preventing further execution. If the backing store does not + * exist, the read back will return garbage. We check a couple of pages, + * the first and last of the specified region to confirm the backing + * store + iomap does cover the entire memory region; and we check + * a random offset within as a quick spot check for bad memory. + */ + + if (test_all) { + for (page = 0; page <= last; page += PAGE_SIZE) { + err = iopagetest(mem, page, caller); + if (err) + return err; + } + } else { + err = iopagetest(mem, 0, caller); + if (err) + return err; + + err = iopagetest(mem, last, caller); + if (err) + return err; + + err = iopagetest(mem, random_page(last), caller); + if (err) + return err; + } + + return 0; +} + struct intel_memory_region * intel_memory_region_lookup(struct drm_i915_private *i915, u16 class, u16 instance) @@ -90,6 +196,21 @@ void intel_memory_region_debug(struct intel_memory_region *mr, &mr->total, &mr->avail); } +static int intel_memory_region_memtest(struct intel_memory_region *mem, + void *caller) +{ + struct drm_i915_private *i915 = mem->i915; + int err = 0; + + if (!mem->io_start) + return 0; + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) || i915->params.memtest) + err = iomemtest(mem, i915->params.memtest, caller); + + return err; +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, @@ -126,9 +247,15 @@ intel_memory_region_create(struct drm_i915_private *i915, goto err_free; } - kref_init(&mem->kref); + err = intel_memory_region_memtest(mem, (void *)_RET_IP_); + if (err) + goto err_release; + return mem; +err_release: + if (mem->ops->release) + mem->ops->release(mem); err_free: kfree(mem); return ERR_PTR(err); @@ -144,28 +271,17 @@ void intel_memory_region_set_name(struct intel_memory_region *mem, va_end(ap); } -static void __intel_memory_region_destroy(struct kref *kref) +void intel_memory_region_destroy(struct intel_memory_region *mem) { - struct intel_memory_region *mem = - container_of(kref, typeof(*mem), kref); + int ret = 0; if (mem->ops->release) - mem->ops->release(mem); + ret = mem->ops->release(mem); + GEM_WARN_ON(!list_empty_careful(&mem->objects.list)); mutex_destroy(&mem->objects.lock); - kfree(mem); -} - -struct intel_memory_region * -intel_memory_region_get(struct intel_memory_region *mem) -{ - kref_get(&mem->kref); - return mem; -} - -void intel_memory_region_put(struct intel_memory_region *mem) -{ - kref_put(&mem->kref, __intel_memory_region_destroy); + if (!ret) + kfree(mem); } /* Global memory region registration -- only slight layer inversions! */ @@ -234,7 +350,7 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915) fetch_and_zero(&i915->mm.regions[i]); if (region) - intel_memory_region_put(region); + intel_memory_region_destroy(region); } } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3feae3353d33..5625c9c38993 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -6,7 +6,6 @@ #ifndef __INTEL_MEMORY_REGION_H__ #define __INTEL_MEMORY_REGION_H__ -#include <linux/kref.h> #include <linux/ioport.h> #include <linux/mutex.h> #include <linux/io-mapping.h> @@ -51,7 +50,7 @@ struct intel_memory_region_ops { unsigned int flags; int (*init)(struct intel_memory_region *mem); - void (*release)(struct intel_memory_region *mem); + int (*release)(struct intel_memory_region *mem); int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, @@ -71,8 +70,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct kref kref; - resource_size_t io_start; resource_size_t min_page_size; resource_size_t total; @@ -110,9 +107,7 @@ intel_memory_region_create(struct drm_i915_private *i915, u16 instance, const struct intel_memory_region_ops *ops); -struct intel_memory_region * -intel_memory_region_get(struct intel_memory_region *mem); -void intel_memory_region_put(struct intel_memory_region *mem); +void intel_memory_region_destroy(struct intel_memory_region *mem); int intel_memory_regions_hw_probe(struct drm_i915_private *i915); void intel_memory_regions_driver_release(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index d1d4b97b86f5..da8f82c2342f 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -129,6 +129,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) return PCH_JSP; case INTEL_PCH_ADP_DEVICE_ID_TYPE: case INTEL_PCH_ADP2_DEVICE_ID_TYPE: + case INTEL_PCH_ADP3_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Alder Lake PCH\n"); drm_WARN_ON(&dev_priv->drm, !IS_ALDERLAKE_S(dev_priv) && !IS_ALDERLAKE_P(dev_priv)); diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 7c0d83d292dc..6bff77521094 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -57,6 +57,7 @@ enum intel_pch { #define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_ADP_DEVICE_ID_TYPE 0x7A80 #define INTEL_PCH_ADP2_DEVICE_ID_TYPE 0x5180 +#define INTEL_PCH_ADP3_DEVICE_ID_TYPE 0x7A00 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e9f2e307b98e..434b1f8b7fe3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -36,6 +36,7 @@ #include "display/intel_atomic_plane.h" #include "display/intel_bw.h" #include "display/intel_de.h" +#include "display/intel_display_trace.h" #include "display/intel_display_types.h" #include "display/intel_fb.h" #include "display/intel_fbc.h" @@ -47,7 +48,6 @@ #include "i915_drv.h" #include "i915_fixed.h" #include "i915_irq.h" -#include "i915_trace.h" #include "intel_pcode.h" #include "intel_pm.h" #include "vlv_sideband.h" @@ -98,7 +98,7 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) * "Plane N strech max must be programmed to 11b (x1) * when Async flips are enabled on that plane." */ - if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active()) + if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active(dev_priv)) intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), SKL_PLANE1_STRETCH_MAX_MASK, SKL_PLANE1_STRETCH_MAX_X1); } @@ -989,7 +989,7 @@ static void g4x_write_wm_values(struct drm_i915_private *dev_priv, enum pipe pipe; for_each_pipe(dev_priv, pipe) - trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + trace_g4x_wm(intel_crtc_for_pipe(dev_priv, pipe), wm); intel_uncore_write(&dev_priv->uncore, DSPFW1, FW_WM(wm->sr.plane, SR) | @@ -1021,7 +1021,7 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, enum pipe pipe; for_each_pipe(dev_priv, pipe) { - trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + trace_vlv_wm(intel_crtc_for_pipe(dev_priv, pipe), wm); intel_uncore_write(&dev_priv->uncore, VLV_DDL(pipe), (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | @@ -2336,6 +2336,20 @@ static void i965_update_wm(struct drm_i915_private *dev_priv) #undef FW_WM +static struct intel_crtc *intel_crtc_for_plane(struct drm_i915_private *i915, + enum i9xx_plane_id i9xx_plane) +{ + struct intel_plane *plane; + + for_each_intel_plane(&i915->drm, plane) { + if (plane->id == PLANE_PRIMARY && + plane->i9xx_plane == i9xx_plane) + return intel_crtc_for_pipe(i915, plane->pipe); + } + + return NULL; +} + static void i9xx_update_wm(struct drm_i915_private *dev_priv) { const struct intel_watermark_params *wm_info; @@ -2357,7 +2371,7 @@ static void i9xx_update_wm(struct drm_i915_private *dev_priv) fifo_size = i830_get_fifo_size(dev_priv, PLANE_A); else fifo_size = i9xx_get_fifo_size(dev_priv, PLANE_A); - crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A); + crtc = intel_crtc_for_plane(dev_priv, PLANE_A); if (intel_crtc_active(crtc)) { const struct drm_display_mode *pipe_mode = &crtc->config->hw.pipe_mode; @@ -2387,7 +2401,7 @@ static void i9xx_update_wm(struct drm_i915_private *dev_priv) fifo_size = i830_get_fifo_size(dev_priv, PLANE_B); else fifo_size = i9xx_get_fifo_size(dev_priv, PLANE_B); - crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B); + crtc = intel_crtc_for_plane(dev_priv, PLANE_B); if (intel_crtc_active(crtc)) { const struct drm_display_mode *pipe_mode = &crtc->config->hw.pipe_mode; @@ -3369,13 +3383,8 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv, } /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ - /* - * FIXME this is racy. FBC might get enabled later. - * What we should check here is whether FBC can be - * enabled sometime later. - */ - if (DISPLAY_VER(dev_priv) == 5 && !merged->fbc_wm_enabled && - intel_fbc_is_active(&dev_priv->fbc)) { + if (DISPLAY_VER(dev_priv) == 5 && HAS_FBC(dev_priv) && + dev_priv->params.enable_fbc && !merged->fbc_wm_enabled) { for (level = 2; level <= max_level; level++) { struct intel_wm_level *wm = &merged->wm[level]; @@ -6909,7 +6918,7 @@ void g4x_wm_sanitize(struct drm_i915_private *dev_priv) for_each_intel_plane(&dev_priv->drm, plane) { struct intel_crtc *crtc = - intel_get_crtc_for_pipe(dev_priv, plane->pipe); + intel_crtc_for_pipe(dev_priv, plane->pipe); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane_state *plane_state = @@ -7065,7 +7074,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) for_each_intel_plane(&dev_priv->drm, plane) { struct intel_crtc *crtc = - intel_get_crtc_for_pipe(dev_priv, plane->pipe); + intel_crtc_for_pipe(dev_priv, plane->pipe); struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_plane_state *plane_state = @@ -7452,9 +7461,9 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) { - /* Wa_1409120013:tgl,rkl,adl-s,dg1 */ + /* Wa_1409120013:tgl,rkl,adl-s,dg1,dg2 */ if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || - IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) + IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv) || IS_DG2(dev_priv)) intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, DPFC_CHICKEN_COMP_DUMMY_PIXEL); @@ -7482,11 +7491,34 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) gen12lp_init_clock_gating(dev_priv); /* Wa_1409836686:dg1[a0] */ - if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0)) + if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | DPT_GATING_DIS); } +static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* Wa_22010146351:xehpsdv */ + if (IS_XEHPSDV_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0)) + intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS); +} + +static void dg2_init_clock_gating(struct drm_i915_private *i915) +{ + /* Wa_22010954014:dg2_g10 */ + if (IS_DG2_G10(i915)) + intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, + SGSI_SIDECLK_DIS); + + /* + * Wa_14010733611:dg2_g10 + * Wa_22010146351:dg2_g10 + */ + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) + intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, + SGR_DIS | SGGI_DIS); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -7530,12 +7562,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) FBC_LLC_FULLY_OPEN); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); @@ -7897,6 +7929,8 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs = .init_clock_gating = platform##_init_clock_gating, \ } +CG_FUNCS(dg2); +CG_FUNCS(xehpsdv); CG_FUNCS(adlp); CG_FUNCS(dg1); CG_FUNCS(gen12lp); @@ -7933,7 +7967,11 @@ CG_FUNCS(nop); */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs; + else if (IS_XEHPSDV(dev_priv)) + dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs; + else if (IS_ALDERLAKE_P(dev_priv)) dev_priv->clock_gating_funcs = &adlp_clock_gating_funcs; else if (IS_DG1(dev_priv)) dev_priv->clock_gating_funcs = &dg1_clock_gating_funcs; diff --git a/drivers/gpu/drm/i915/intel_pm_types.h b/drivers/gpu/drm/i915/intel_pm_types.h new file mode 100644 index 000000000000..211632f58751 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_pm_types.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_PM_TYPES_H__ +#define __INTEL_PM_TYPES_H__ + +#include <linux/types.h> + +#include "display/intel_display.h" + +enum intel_ddb_partitioning { + INTEL_DDB_PART_1_2, + INTEL_DDB_PART_5_6, /* IVB+ */ +}; + +struct ilk_wm_values { + u32 wm_pipe[3]; + u32 wm_lp[3]; + u32 wm_lp_spr[3]; + bool enable_fbc_wm; + enum intel_ddb_partitioning partitioning; +}; + +struct g4x_pipe_wm { + u16 plane[I915_MAX_PLANES]; + u16 fbc; +}; + +struct g4x_sr_wm { + u16 plane; + u16 cursor; + u16 fbc; +}; + +struct vlv_wm_ddl_values { + u8 plane[I915_MAX_PLANES]; +}; + +struct vlv_wm_values { + struct g4x_pipe_wm pipe[3]; + struct g4x_sr_wm sr; + struct vlv_wm_ddl_values ddl[3]; + u8 level; + bool cxsr; +}; + +struct g4x_wm_values { + struct g4x_pipe_wm pipe[2]; + struct g4x_sr_wm sr; + struct g4x_sr_wm hpll; + bool cxsr; + bool hpll_en; + bool fbc_en; +}; + +struct skl_ddb_entry { + u16 start, end; /* in number of blocks, 'end' is exclusive */ +}; + +static inline u16 skl_ddb_entry_size(const struct skl_ddb_entry *entry) +{ + return entry->end - entry->start; +} + +static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1, + const struct skl_ddb_entry *e2) +{ + if (e1->start == e2->start && e1->end == e2->end) + return true; + + return false; +} + +#endif /* __INTEL_PM_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 98c7339bf8ba..f2b888c16958 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -104,19 +104,50 @@ int intel_region_ttm_init(struct intel_memory_region *mem) * memory region, and if it was registered with the TTM device, * removes that registration. */ -void intel_region_ttm_fini(struct intel_memory_region *mem) +int intel_region_ttm_fini(struct intel_memory_region *mem) { - int ret; + struct ttm_resource_manager *man = mem->region_private; + int ret = -EBUSY; + int count; + + /* + * Put the region's move fences. This releases requests that + * may hold on to contexts and vms that may hold on to buffer + * objects placed in this region. + */ + if (man) + ttm_resource_manager_cleanup(man); + + /* Flush objects from region. */ + for (count = 0; count < 10; ++count) { + i915_gem_flush_free_objects(mem->i915); + + mutex_lock(&mem->objects.lock); + if (list_empty(&mem->objects.list)) + ret = 0; + mutex_unlock(&mem->objects.lock); + if (!ret) + break; + + msleep(20); + flush_delayed_work(&mem->i915->bdev.wq); + } + + /* If we leaked objects, Don't free the region causing use after free */ + if (ret || !man) + return ret; ret = i915_ttm_buddy_man_fini(&mem->i915->bdev, intel_region_to_ttm_type(mem)); GEM_WARN_ON(ret); mem->region_private = NULL; + + return ret; } /** - * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource - * to an sg_table. + * intel_region_ttm_resource_to_rsgt - + * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. * @@ -126,17 +157,18 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) * * Return: A malloced sg_table on success, an error pointer on failure. */ -struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, - struct ttm_resource *res) +struct i915_refct_sgt * +intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, + struct ttm_resource *res) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); - return i915_sg_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); } else { - return i915_sg_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index 6f44075920f2..fdee5e7bd46c 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -20,10 +20,11 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv); int intel_region_ttm_init(struct intel_memory_region *mem); -void intel_region_ttm_fini(struct intel_memory_region *mem); +int intel_region_ttm_fini(struct intel_memory_region *mem); -struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, - struct ttm_resource *res); +struct i915_refct_sgt * +intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, + struct ttm_resource *res); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 6cf967631395..a4b16b9e2e55 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -23,7 +23,8 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x +#define COMMON_STEP(x) .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x +#define COMMON_GT_MEDIA_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x static const struct intel_step_info skl_revids[] = { [0x6] = { COMMON_STEP(G0) }, @@ -33,13 +34,13 @@ static const struct intel_step_info skl_revids[] = { }; static const struct intel_step_info kbl_revids[] = { - [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, - [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 }, - [4] = { .gt_step = STEP_F0, .display_step = STEP_C0 }, - [5] = { .gt_step = STEP_C0, .display_step = STEP_B1 }, - [6] = { .gt_step = STEP_D1, .display_step = STEP_B1 }, - [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, + [3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 }, + [4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 }, + [5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 }, + [6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 }, + [7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 }, }; static const struct intel_step_info bxt_revids[] = { @@ -63,16 +64,16 @@ static const struct intel_step_info jsl_ehl_revids[] = { }; static const struct intel_step_info tgl_uy_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, - [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, - [3] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, + [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, + [2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, + [3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct intel_step_info tgl_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, - [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 }, + [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 }, }; static const struct intel_step_info rkl_revids[] = { @@ -87,38 +88,38 @@ static const struct intel_step_info dg1_revids[] = { }; static const struct intel_step_info adls_revids[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, - [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 }, }; static const struct intel_step_info adlp_revids[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, - [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, }; static const struct intel_step_info xehpsdv_revids[] = { - [0x0] = { .gt_step = STEP_A0 }, - [0x1] = { .gt_step = STEP_A1 }, - [0x4] = { .gt_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0) }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1) }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0) }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0) }, }; static const struct intel_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, - [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, - [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, }; static const struct intel_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, - [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, - [0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, + [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, }; void intel_step_init(struct drm_i915_private *i915) @@ -179,7 +180,7 @@ void intel_step_init(struct drm_i915_private *i915) if (!revids) return; - if (revid < size && revids[revid].gt_step != STEP_NONE) { + if (revid < size && revids[revid].graphics_step != STEP_NONE) { step = revids[revid]; } else { drm_warn(&i915->drm, "Unknown revid 0x%02x\n", revid); @@ -192,7 +193,7 @@ void intel_step_init(struct drm_i915_private *i915) * steppings in the array are not monotonically increasing, but * it's better than defaulting to 0. */ - while (revid < size && revids[revid].gt_step == STEP_NONE) + while (revid < size && revids[revid].graphics_step == STEP_NONE) revid++; if (revid < size) { @@ -201,12 +202,12 @@ void intel_step_init(struct drm_i915_private *i915) step = revids[revid]; } else { drm_dbg(&i915->drm, "Using future steppings\n"); - step.gt_step = STEP_FUTURE; + step.graphics_step = STEP_FUTURE; step.display_step = STEP_FUTURE; } } - if (drm_WARN_ON(&i915->drm, step.gt_step == STEP_NONE)) + if (drm_WARN_ON(&i915->drm, step.graphics_step == STEP_NONE)) return; RUNTIME_INFO(i915)->step = step; diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index f6641e2a3c77..d71a99bd5179 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -11,8 +11,9 @@ struct drm_i915_private; struct intel_step_info { - u8 gt_step; + u8 graphics_step; u8 display_step; + u8 media_step; }; #define STEP_ENUM_VAL(name) STEP_##name, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 722910d02b5f..fc25ebf1a593 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2020,7 +2020,7 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int uncore_mmio_setup(struct intel_uncore *uncore) +int intel_uncore_setup_mmio(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); @@ -2053,7 +2053,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore) return 0; } -static void uncore_mmio_cleanup(struct intel_uncore *uncore) +void intel_uncore_cleanup_mmio(struct intel_uncore *uncore) { struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev); @@ -2061,12 +2061,13 @@ static void uncore_mmio_cleanup(struct intel_uncore *uncore) } void intel_uncore_init_early(struct intel_uncore *uncore, - struct drm_i915_private *i915) + struct intel_gt *gt) { spin_lock_init(&uncore->lock); - uncore->i915 = i915; - uncore->rpm = &i915->runtime_pm; - uncore->debug = &i915->mmio_debug; + uncore->i915 = gt->i915; + uncore->gt = gt; + uncore->rpm = >->i915->runtime_pm; + uncore->debug = >->i915->mmio_debug; } static void uncore_raw_init(struct intel_uncore *uncore) @@ -2146,10 +2147,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) struct drm_i915_private *i915 = uncore->i915; int ret; - ret = uncore_mmio_setup(uncore); - if (ret) - return ret; - /* * The boot firmware initializes local memory and assesses its health. * If memory training fails, the punit will have been instructed to @@ -2170,7 +2167,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) } else { ret = uncore_forcewake_init(uncore); if (ret) - goto out_mmio_cleanup; + return ret; } /* make sure fw funcs are set if and only if we have fw*/ @@ -2192,11 +2189,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n"); return 0; - -out_mmio_cleanup: - uncore_mmio_cleanup(uncore); - - return ret; } /* @@ -2261,8 +2253,6 @@ void intel_uncore_fini_mmio(struct intel_uncore *uncore) intel_uncore_fw_domains_fini(uncore); iosf_mbi_punit_release(); } - - uncore_mmio_cleanup(uncore); } static const struct reg_whitelist { diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 3248e4e2c540..210fe2a71612 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -129,6 +129,7 @@ struct intel_uncore { void __iomem *regs; struct drm_i915_private *i915; + struct intel_gt *gt; struct intel_runtime_pm *rpm; spinlock_t lock; /** lock is also taken in irq contexts. */ @@ -217,12 +218,14 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, - struct drm_i915_private *i915); + struct intel_gt *gt); +int intel_uncore_setup_mmio(struct intel_uncore *uncore); int intel_uncore_init_mmio(struct intel_uncore *uncore); void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, struct intel_gt *gt); bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore); bool intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore); +void intel_uncore_cleanup_mmio(struct intel_uncore *uncore); void intel_uncore_fini_mmio(struct intel_uncore *uncore); void intel_uncore_suspend(struct intel_uncore *uncore); void intel_uncore_resume_early(struct intel_uncore *uncore); diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 5e511bb891f9..f06d21005106 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -220,7 +220,7 @@ static bool __wopcm_regs_locked(struct intel_uncore *uncore, void intel_wopcm_init(struct intel_wopcm *wopcm) { struct drm_i915_private *i915 = wopcm_to_i915(wopcm); - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); u32 guc_fw_size = intel_uc_fw_get_upload_size(>->uc.guc.fw); u32 huc_fw_size = intel_uc_fw_get_upload_size(>->uc.huc.fw); u32 ctx_rsvd = context_reserved_size(i915); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index e2314ad9546d..15311eaed848 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -44,6 +44,11 @@ struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) return container_of(pxp, struct intel_gt, pxp); } +bool intel_pxp_is_enabled(const struct intel_pxp *pxp) +{ + return pxp->ce; +} + bool intel_pxp_is_active(const struct intel_pxp *pxp) { return pxp->arb_is_valid; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index aa262258d4d4..73847e535cab 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -6,17 +6,15 @@ #ifndef __INTEL_PXP_H__ #define __INTEL_PXP_H__ -#include "intel_pxp_types.h" +#include <linux/errno.h> +#include <linux/types.h> +struct intel_pxp; struct drm_i915_gem_object; -static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) -{ - return pxp->ce; -} - #ifdef CONFIG_DRM_I915_PXP struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp); +bool intel_pxp_is_enabled(const struct intel_pxp *pxp); bool intel_pxp_is_active(const struct intel_pxp *pxp); void intel_pxp_init(struct intel_pxp *pxp); @@ -48,6 +46,11 @@ static inline int intel_pxp_start(struct intel_pxp *pxp) return -ENODEV; } +static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) +{ + return false; +} + static inline bool intel_pxp_is_active(const struct intel_pxp *pxp) { return false; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c index 23fd86de5a24..6a7d4e2ee138 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -7,26 +7,29 @@ #include "intel_pxp_irq.h" #include "intel_pxp_pm.h" #include "intel_pxp_session.h" +#include "i915_drv.h" -void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime) +void intel_pxp_suspend_prepare(struct intel_pxp *pxp) { if (!intel_pxp_is_enabled(pxp)) return; pxp->arb_is_valid = false; - /* - * Contexts using protected objects keep a runtime PM reference, so we - * can only runtime suspend when all of them have been either closed - * or banned. Therefore, there is no need to invalidate in that - * scenario. - */ - if (!runtime) - intel_pxp_invalidate(pxp); + intel_pxp_invalidate(pxp); +} - intel_pxp_fini_hw(pxp); +void intel_pxp_suspend(struct intel_pxp *pxp) +{ + intel_wakeref_t wakeref; - pxp->hw_state_invalidated = false; + if (!intel_pxp_is_enabled(pxp)) + return; + + with_intel_runtime_pm(&pxp_to_gt(pxp)->i915->runtime_pm, wakeref) { + intel_pxp_fini_hw(pxp); + pxp->hw_state_invalidated = false; + } } void intel_pxp_resume(struct intel_pxp *pxp) @@ -44,3 +47,15 @@ void intel_pxp_resume(struct intel_pxp *pxp) intel_pxp_init_hw(pxp); } + +void intel_pxp_runtime_suspend(struct intel_pxp *pxp) +{ + if (!intel_pxp_is_enabled(pxp)) + return; + + pxp->arb_is_valid = false; + + intel_pxp_fini_hw(pxp); + + pxp->hw_state_invalidated = false; +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h index c89e97a0c3d0..16990a3f2f85 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h @@ -9,16 +9,29 @@ #include "intel_pxp_types.h" #ifdef CONFIG_DRM_I915_PXP -void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime); +void intel_pxp_suspend_prepare(struct intel_pxp *pxp); +void intel_pxp_suspend(struct intel_pxp *pxp); void intel_pxp_resume(struct intel_pxp *pxp); +void intel_pxp_runtime_suspend(struct intel_pxp *pxp); #else -static inline void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime) +static inline void intel_pxp_suspend_prepare(struct intel_pxp *pxp) +{ +} + +static inline void intel_pxp_suspend(struct intel_pxp *pxp) { } static inline void intel_pxp_resume(struct intel_pxp *pxp) { } -#endif +static inline void intel_pxp_runtime_suspend(struct intel_pxp *pxp) +{ +} +#endif +static inline void intel_pxp_runtime_resume(struct intel_pxp *pxp) +{ + intel_pxp_resume(pxp); +} #endif /* __INTEL_PXP_PM_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index 5d169624ad60..195b2323ec00 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -16,7 +16,9 @@ static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) { - return &kdev_to_i915(i915_kdev)->gt.pxp; + struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); + + return &to_gt(i915)->pxp; } static int intel_pxp_tee_io_message(struct intel_pxp *pxp, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 73ef7d1754e1..7ce5f37ee12e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -7,9 +7,7 @@ #define __INTEL_PXP_TYPES_H__ #include <linux/completion.h> -#include <linux/list.h> #include <linux/mutex.h> -#include <linux/spinlock.h> #include <linux/types.h> #include <linux/workqueue.h> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 61bf4560d8af..2dac9be1de58 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -254,7 +254,7 @@ int i915_active_live_selftests(struct drm_i915_private *i915) SUBTEST(live_active_barrier), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 152d9ab135b1..b5576888cd78 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -248,7 +248,7 @@ int i915_gem_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_ww_ctx), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f99bb0113726..75b709c26dd3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -442,6 +442,7 @@ static int igt_evict_contexts(void *arg) /* Overfill the GGTT with context objects and so try to evict one. */ for_each_engine(engine, gt, id) { struct i915_sw_fence fence; + struct i915_request *last = NULL; count = 0; onstack_fence_init(&fence); @@ -479,6 +480,9 @@ static int igt_evict_contexts(void *arg) i915_request_add(rq); count++; + if (last) + i915_request_put(last); + last = i915_request_get(rq); err = 0; } while(1); onstack_fence_fini(&fence); @@ -486,6 +490,21 @@ static int igt_evict_contexts(void *arg) count, engine->name); if (err) break; + if (last) { + if (i915_request_wait(last, 0, HZ) < 0) { + err = -EIO; + i915_request_put(last); + pr_err("Failed waiting for last request (on %s)", + engine->name); + break; + } + i915_request_put(last); + } + err = intel_gt_wait_for_idle(engine->gt, HZ * 3); + if (err) { + pr_err("Failed to idle GT (on %s)", engine->name); + break; + } } mutex_lock(&ggtt->vm.mutex); @@ -526,7 +545,7 @@ int i915_gem_evict_mock_selftests(void) return -ENOMEM; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, &i915->gt); + err = i915_subtests(tests, to_gt(i915)); mock_destroy_device(i915); return err; @@ -538,8 +557,8 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_evict_contexts), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 46f4236039a9..575705c3bce9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -155,7 +155,7 @@ static int igt_ppgtt_alloc(void *arg) if (!HAS_PPGTT(dev_priv)) return 0; - ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); @@ -1053,7 +1053,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - ppgtt = i915_ppgtt_create(&dev_priv->gt, 0); + ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); goto out_free; @@ -1275,7 +1275,7 @@ static void track_vma_bind(struct i915_vma *vma) __i915_gem_object_pin_pages(obj); - GEM_BUG_ON(vma->pages); + GEM_BUG_ON(atomic_read(&vma->pages_count)); atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; @@ -1953,7 +1953,9 @@ static int igt_cs_tlb(void *arg) goto end; } - err = vma->ops->set_pages(vma); + i915_gem_object_lock(bbe, NULL); + err = i915_vma_get_pages(vma); + i915_gem_object_unlock(bbe); if (err) goto end; @@ -1994,7 +1996,7 @@ end_ww: i915_request_put(rq); } - vma->ops->clear_pages(vma); + i915_vma_put_pages(vma); err = context_sync(ce); if (err) { @@ -2009,7 +2011,9 @@ end_ww: goto end; } - err = vma->ops->set_pages(vma); + i915_gem_object_lock(act, NULL); + err = i915_vma_get_pages(vma); + i915_gem_object_unlock(act); if (err) goto end; @@ -2047,7 +2051,7 @@ end_ww: } end_spin(batch, count - 1); - vma->ops->clear_pages(vma); + i915_vma_put_pages(vma); err = context_sync(ce); if (err) { diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 9e9a6cb1d9e5..88db2e3d81d0 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -424,7 +424,7 @@ int i915_perf_live_selftests(struct drm_i915_private *i915) if (!perf->metrics_kobj || !perf->ops.enable_metric_set) return 0; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; err = alloc_empty_config(&i915->perf); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index d67710d10615..92a859b34190 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -209,6 +209,10 @@ static int igt_request_rewind(void *arg) int err = -EINVAL; ctx[0] = mock_context(i915, "A"); + if (!ctx[0]) { + err = -ENOMEM; + goto err_ctx_0; + } ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -223,6 +227,10 @@ static int igt_request_rewind(void *arg) i915_request_add(request); ctx[1] = mock_context(i915, "B"); + if (!ctx[1]) { + err = -ENOMEM; + goto err_ctx_1; + } ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -261,9 +269,11 @@ err: i915_request_put(vip); err_context_1: mock_context_close(ctx[1]); +err_ctx_1: i915_request_put(request); err_context_0: mock_context_close(ctx[0]); +err_ctx_0: mock_device_flush(i915); return err; } @@ -831,7 +841,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - intel_gt_chipset_flush(&i915->gt); + intel_gt_chipset_flush(to_gt(i915)); vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { @@ -972,7 +982,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, i915->gt.vm, NULL); + vma = i915_vma_instance(obj, to_gt(i915)->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -1004,7 +1014,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - intel_gt_chipset_flush(&i915->gt); + intel_gt_chipset_flush(to_gt(i915)); return vma; @@ -1690,7 +1700,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_breadcrumbs_smoketest), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_subtests(tests, i915); @@ -2805,7 +2815,7 @@ static int p_sync0(void *arg) i915_request_add(rq); err = 0; - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) @@ -2876,7 +2886,7 @@ static int p_sync1(void *arg) i915_request_add(rq); err = 0; - if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) + if (prev && i915_request_wait(prev, 0, HZ) < 0) err = -ETIME; i915_request_put(prev); prev = rq; @@ -3081,7 +3091,7 @@ int i915_request_perf_selftests(struct drm_i915_private *i915) SUBTEST(perf_parallel_engines), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 484759c9409c..2d6d7bd13c3c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -298,10 +298,10 @@ int __i915_live_setup(void *data) struct drm_i915_private *i915 = data; /* The selftests expect an idle system */ - if (intel_gt_pm_wait_for_idle(&i915->gt)) + if (intel_gt_pm_wait_for_idle(to_gt(i915))) return -EIO; - return intel_gt_terminally_wedged(&i915->gt); + return intel_gt_terminally_wedged(to_gt(i915)); } int __i915_live_teardown(int err, void *data) diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index cbf45d85cbff..daa985e5a19b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -28,7 +28,7 @@ #include "../i915_selftest.h" -static int __i915_sw_fence_call +static int fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { switch (state) { diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 1f10fe36619b..5c5809dfe9b2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -691,7 +691,11 @@ static int igt_vma_rotate_remap(void *arg) } i915_vma_unpin(vma); - + err = i915_vma_unbind(vma); + if (err) { + pr_err("Unbinding returned %i\n", err); + goto out_object; + } cond_resched(); } } @@ -848,6 +852,11 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); nvma++; + err = i915_vma_unbind(vma); + if (err) { + pr_err("Unbinding returned %i\n", err); + goto out_object; + } cond_resched(); } @@ -882,6 +891,12 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + if (err) { + pr_err("Unbinding returned %i\n", err); + goto out_object; + } + count = 0; list_for_each_entry(vma, &obj->vma.list, obj_link) count++; diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index a6c71fca61aa..b84594601d30 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -14,7 +14,7 @@ int igt_flush_test(struct drm_i915_private *i915) { - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); int ret = intel_gt_is_wedged(gt) ? -EIO : 0; cond_resched(); diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 1c721542e277..72b58b66692a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -16,7 +16,7 @@ int igt_live_test_begin(struct igt_live_test *t, const char *func, const char *name) { - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_engine_cs *engine; enum intel_engine_id id; int err; @@ -57,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - for_each_engine(engine, &i915->gt, id) { + for_each_engine(engine, to_gt(i915), id) { if (t->reset_engine[id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 9f8590b868a9..a2838c65f8a5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -36,7 +36,7 @@ void igt_global_reset_unlock(struct intel_gt *gt) enum intel_engine_id id; for_each_engine(engine, gt, id) - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); clear_bit(I915_RESET_BACKOFF, >->reset.flags); wake_up_all(>->reset.queue); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 418caae84759..8255561ff853 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -225,7 +225,7 @@ static int igt_mock_reserve(void *arg) out_close: close_objects(mem, &objects); - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); out_free_order: kfree(order); return err; @@ -439,7 +439,7 @@ static int igt_mock_splintered_region(void *arg) out_close: close_objects(mem, &objects); out_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -507,7 +507,7 @@ static int igt_mock_max_segment(void *arg) out_close: close_objects(mem, &objects); out_put: - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); return err; } @@ -1196,7 +1196,7 @@ int intel_memory_region_mock_selftests(void) err = i915_subtests(tests, mem); - intel_memory_region_put(mem); + intel_memory_region_destroy(mem); out_unref: mock_destroy_device(i915); return err; @@ -1217,7 +1217,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) return 0; } - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); @@ -1229,7 +1229,7 @@ int intel_memory_region_perf_selftests(struct drm_i915_private *i915) SUBTEST(perf_memcpy), }; - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(to_gt(i915))) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index bc8128170a99..cdd196783535 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -344,5 +344,5 @@ int intel_uncore_live_selftests(struct drm_i915_private *i915) SUBTEST(live_forcewake_domains), }; - return intel_gt_live_subtests(tests, &i915->gt); + return intel_gt_live_subtests(tests, to_gt(i915)); } diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index 080b90b63d16..bf2752cc1e0b 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -26,7 +26,7 @@ /* Small library of different fence types useful for writing tests */ -static int __i915_sw_fence_call +static int nop_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { return NOTIFY_DONE; @@ -41,12 +41,12 @@ void __onstack_fence_init(struct i915_sw_fence *fence, __init_waitqueue_head(&fence->wait, name, key); atomic_set(&fence->pending, 1); fence->error = 0; - fence->flags = (unsigned long)nop_fence_notify; + fence->fn = nop_fence_notify; } void onstack_fence_fini(struct i915_sw_fence *fence) { - if (!fence->flags) + if (!fence->fn) return; i915_sw_fence_commit(fence); @@ -89,7 +89,7 @@ struct heap_fence { }; }; -static int __i915_sw_fence_call +static int heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct heap_fence *h = container_of(fence, typeof(*h), fence); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 4f8180146888..8aa7b1d33865 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -45,7 +45,7 @@ void mock_device_flush(struct drm_i915_private *i915) { - struct intel_gt *gt = &i915->gt; + struct intel_gt *gt = to_gt(i915); struct intel_engine_cs *engine; enum intel_engine_id id; @@ -64,7 +64,7 @@ static void mock_device_release(struct drm_device *dev) goto out; mock_device_flush(i915); - intel_gt_driver_remove(&i915->gt); + intel_gt_driver_remove(to_gt(i915)); i915_gem_drain_workqueue(i915); i915_gem_drain_freed_objects(i915); @@ -73,7 +73,7 @@ static void mock_device_release(struct drm_device *dev) destroy_workqueue(i915->wq); intel_region_ttm_device_fini(i915); - intel_gt_driver_late_release(&i915->gt); + intel_gt_driver_late_release(to_gt(i915)); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); @@ -165,7 +165,7 @@ struct drm_i915_private *mock_gem_device(void) /* Using the global GTT may ask questions about KMS users, so prepare */ drm_mode_config_init(&i915->drm); - mkwrite_device_info(i915)->graphics_ver = -1; + mkwrite_device_info(i915)->graphics.ver = -1; mkwrite_device_info(i915)->page_sizes = I915_GTT_PAGE_SIZE_4K | @@ -175,12 +175,14 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->memory_regions = REGION_SMEM; intel_memory_regions_hw_probe(i915); - mock_uncore_init(&i915->uncore, i915); + spin_lock_init(&i915->gpu_error.lock); i915_gem_init__mm(i915); - intel_gt_init_early(&i915->gt, i915); - atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ - i915->gt.awake = -ENODEV; + intel_gt_init_early(to_gt(i915), i915); + __intel_gt_init_early(to_gt(i915), i915); + mock_uncore_init(&i915->uncore, i915); + atomic_inc(&to_gt(i915)->wakeref.count); /* disable; no hw support */ + to_gt(i915)->awake = -ENODEV; ret = intel_region_ttm_device_init(i915); if (ret) @@ -193,19 +195,19 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); mock_init_ggtt(i915, &i915->ggtt); - i915->gt.vm = i915_vm_get(&i915->ggtt.vm); + to_gt(i915)->vm = i915_vm_get(&i915->ggtt.vm); mkwrite_device_info(i915)->platform_engine_mask = BIT(0); - i915->gt.info.engine_mask = BIT(0); + to_gt(i915)->info.engine_mask = BIT(0); - i915->gt.engine[RCS0] = mock_engine(i915, "mock", RCS0); - if (!i915->gt.engine[RCS0]) + to_gt(i915)->engine[RCS0] = mock_engine(i915, "mock", RCS0); + if (!to_gt(i915)->engine[RCS0]) goto err_unlock; - if (mock_engine_init(i915->gt.engine[RCS0])) + if (mock_engine_init(to_gt(i915)->engine[RCS0])) goto err_context; - __clear_bit(I915_WEDGED, &i915->gt.reset.flags); + __clear_bit(I915_WEDGED, &to_gt(i915)->reset.flags); intel_engines_driver_register(i915); i915->do_release = true; @@ -214,13 +216,13 @@ struct drm_i915_private *mock_gem_device(void) return i915; err_context: - intel_gt_driver_remove(&i915->gt); + intel_gt_driver_remove(to_gt(i915)); err_unlock: destroy_workqueue(i915->wq); err_drv: intel_region_ttm_device_fini(i915); err_ttm: - intel_gt_driver_late_release(&i915->gt); + intel_gt_driver_late_release(to_gt(i915)); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); mock_destroy_device(i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index cc047ec594f9..1802baf80a17 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -70,7 +70,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) if (!ppgtt) return NULL; - ppgtt->vm.gt = &i915->gt; + ppgtt->vm.gt = to_gt(i915); ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.dma = i915->drm.dev; @@ -78,6 +78,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; ppgtt->vm.clear_range = mock_clear_range; ppgtt->vm.insert_page = mock_insert_page; @@ -86,8 +87,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.vma_ops.bind_vma = mock_bind_ppgtt; ppgtt->vm.vma_ops.unbind_vma = mock_unbind_ppgtt; - ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; - ppgtt->vm.vma_ops.clear_pages = clear_pages; return ppgtt; } @@ -109,7 +108,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) { memset(ggtt, 0, sizeof(*ggtt)); - ggtt->vm.gt = &i915->gt; + ggtt->vm.gt = to_gt(i915); ggtt->vm.i915 = i915; ggtt->vm.is_ggtt = true; @@ -118,6 +117,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->vm.total = 4096 * PAGE_SIZE; ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.clear_range = mock_clear_range; ggtt->vm.insert_page = mock_insert_page; @@ -126,11 +126,9 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = mock_bind_ggtt; ggtt->vm.vma_ops.unbind_vma = mock_unbind_ggtt; - ggtt->vm.vma_ops.set_pages = ggtt_set_pages; - ggtt->vm.vma_ops.clear_pages = clear_pages; i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - i915->gt.ggtt = ggtt; + to_gt(i915)->ggtt = ggtt; } void mock_fini_ggtt(struct i915_ggtt *ggtt) diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 75793008c4ef..19bff8afcaaa 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -15,9 +15,9 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { + i915_refct_sgt_put(obj->mm.rsgt); + obj->mm.rsgt = NULL; intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); - sg_free_table(pages); - kfree(pages); } static int mock_region_get_pages(struct drm_i915_gem_object *obj) @@ -36,12 +36,14 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) if (IS_ERR(obj->mm.res)) return PTR_ERR(obj->mm.res); - pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); - if (IS_ERR(pages)) { - err = PTR_ERR(pages); + obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, + obj->mm.res); + if (IS_ERR(obj->mm.rsgt)) { + err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; } + pages = &obj->mm.rsgt->table; __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); return 0; @@ -82,13 +84,16 @@ static int mock_object_init(struct intel_memory_region *mem, return 0; } -static void mock_region_fini(struct intel_memory_region *mem) +static int mock_region_fini(struct intel_memory_region *mem) { struct drm_i915_private *i915 = mem->i915; int instance = mem->instance; + int ret; - intel_region_ttm_fini(mem); + ret = intel_region_ttm_fini(mem); ida_free(&i915->selftest.mock_region_instances, instance); + + return ret; } static const struct intel_memory_region_ops mock_region_ops = { diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c index ca57e4008701..f2d6be5e1230 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.c +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -42,7 +42,7 @@ __nop_read(64) void mock_uncore_init(struct intel_uncore *uncore, struct drm_i915_private *i915) { - intel_uncore_init_early(uncore, i915); + intel_uncore_init_early(uncore, to_gt(i915)); ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop); diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig index b5fa0e45a839..bb9738c7c825 100644 --- a/drivers/gpu/drm/imx/Kconfig +++ b/drivers/gpu/drm/imx/Kconfig @@ -4,7 +4,7 @@ config DRM_IMX select DRM_KMS_HELPER select VIDEOMODE_HELPERS select DRM_GEM_CMA_HELPER - select DRM_KMS_CMA_HELPER + select DRM_KMS_HELPER depends on DRM && (ARCH_MXC || ARCH_MULTIPLATFORM || COMPILE_TEST) depends on IMX_IPUV3_CORE help diff --git a/drivers/gpu/drm/imx/dcss/Kconfig b/drivers/gpu/drm/imx/dcss/Kconfig index 2b17a964ff05..7374f1952762 100644 --- a/drivers/gpu/drm/imx/dcss/Kconfig +++ b/drivers/gpu/drm/imx/dcss/Kconfig @@ -1,7 +1,7 @@ config DRM_IMX_DCSS tristate "i.MX8MQ DCSS" select IMX_IRQSTEER - select DRM_KMS_CMA_HELPER + select DRM_KMS_HELPER select VIDEOMODE_HELPERS depends on DRM && ARCH_MXC && ARM64 help diff --git a/drivers/gpu/drm/ingenic/Kconfig b/drivers/gpu/drm/ingenic/Kconfig index 3b57f8be007c..001f59fb06d5 100644 --- a/drivers/gpu/drm/ingenic/Kconfig +++ b/drivers/gpu/drm/ingenic/Kconfig @@ -8,7 +8,6 @@ config DRM_INGENIC select DRM_BRIDGE select DRM_PANEL_BRIDGE select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE help diff --git a/drivers/gpu/drm/kmb/Kconfig b/drivers/gpu/drm/kmb/Kconfig index bc4cb5e1cd8a..5fdd43dad507 100644 --- a/drivers/gpu/drm/kmb/Kconfig +++ b/drivers/gpu/drm/kmb/Kconfig @@ -3,7 +3,6 @@ config DRM_KMB_DISPLAY depends on DRM depends on ARCH_KEEMBAY || COMPILE_TEST select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index 36c990589427..02cef0cea657 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -4,6 +4,7 @@ #include <linux/regulator/consumer.h> #include <linux/reset.h> #include <linux/clk.h> +#include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/platform_device.h> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 2723d333c608..f9a9198ef198 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -221,7 +221,7 @@ struct drm_gem_object *lima_gem_create_object(struct drm_device *dev, size_t siz bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) - return NULL; + return ERR_PTR(-ENOMEM); mutex_init(&bo->lock); INIT_LIST_HEAD(&bo->va); diff --git a/drivers/gpu/drm/mcde/Kconfig b/drivers/gpu/drm/mcde/Kconfig index 71c689b573c9..d0bf1bc8da3f 100644 --- a/drivers/gpu/drm/mcde/Kconfig +++ b/drivers/gpu/drm/mcde/Kconfig @@ -10,7 +10,6 @@ config DRM_MCDE select DRM_BRIDGE select DRM_PANEL_BRIDGE select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE help diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c index 141cb36b9c07..3a53ebc4e172 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c @@ -205,9 +205,15 @@ static const struct mtk_disp_ccorr_data mt8183_ccorr_driver_data = { .matrix_bits = 10, }; +static const struct mtk_disp_ccorr_data mt8192_ccorr_driver_data = { + .matrix_bits = 11, +}; + static const struct of_device_id mtk_disp_ccorr_driver_dt_match[] = { { .compatible = "mediatek,mt8183-disp-ccorr", .data = &mt8183_ccorr_driver_data}, + { .compatible = "mediatek,mt8192-disp-ccorr", + .data = &mt8192_ccorr_driver_data}, {}, }; MODULE_DEVICE_TABLE(of, mtk_disp_ccorr_driver_dt_match); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 5326989d5206..2146299e5f52 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -456,6 +456,22 @@ static const struct mtk_disp_ovl_data mt8183_ovl_2l_driver_data = { .fmt_rgb565_is_0 = true, }; +static const struct mtk_disp_ovl_data mt8192_ovl_driver_data = { + .addr = DISP_REG_OVL_ADDR_MT8173, + .gmc_bits = 10, + .layer_nr = 4, + .fmt_rgb565_is_0 = true, + .smi_id_en = true, +}; + +static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = { + .addr = DISP_REG_OVL_ADDR_MT8173, + .gmc_bits = 10, + .layer_nr = 2, + .fmt_rgb565_is_0 = true, + .smi_id_en = true, +}; + static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = { { .compatible = "mediatek,mt2701-disp-ovl", .data = &mt2701_ovl_driver_data}, @@ -465,6 +481,10 @@ static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = { .data = &mt8183_ovl_driver_data}, { .compatible = "mediatek,mt8183-disp-ovl-2l", .data = &mt8183_ovl_2l_driver_data}, + { .compatible = "mediatek,mt8192-disp-ovl", + .data = &mt8192_ovl_driver_data}, + { .compatible = "mediatek,mt8192-disp-ovl-2l", + .data = &mt8192_ovl_2l_driver_data}, {}, }; MODULE_DEVICE_TABLE(of, mtk_disp_ovl_driver_dt_match); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 75d7f45579e2..d41a3970b944 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -353,6 +353,10 @@ static const struct mtk_disp_rdma_data mt8183_rdma_driver_data = { .fifo_size = 5 * SZ_1K, }; +static const struct mtk_disp_rdma_data mt8192_rdma_driver_data = { + .fifo_size = 5 * SZ_1K, +}; + static const struct of_device_id mtk_disp_rdma_driver_dt_match[] = { { .compatible = "mediatek,mt2701-disp-rdma", .data = &mt2701_rdma_driver_data}, @@ -360,6 +364,8 @@ static const struct of_device_id mtk_disp_rdma_driver_dt_match[] = { .data = &mt8173_rdma_driver_data}, { .compatible = "mediatek,mt8183-disp-rdma", .data = &mt8183_rdma_driver_data}, + { .compatible = "mediatek,mt8192-disp-rdma", + .data = &mt8192_rdma_driver_data}, {}, }; MODULE_DEVICE_TABLE(of, mtk_disp_rdma_driver_dt_match); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index a4e80e499674..d661edf7e0fe 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -4,6 +4,8 @@ */ #include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/mailbox_controller.h> #include <linux/pm_runtime.h> #include <linux/soc/mediatek/mtk-cmdq.h> #include <linux/soc/mediatek/mtk-mmsys.h> @@ -50,8 +52,10 @@ struct mtk_drm_crtc { bool pending_async_planes; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_client *cmdq_client; + struct cmdq_client cmdq_client; + struct cmdq_pkt cmdq_handle; u32 cmdq_event; + u32 cmdq_vblank_cnt; #endif struct device *mmsys_dev; @@ -104,12 +108,60 @@ static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) } } +#if IS_REACHABLE(CONFIG_MTK_CMDQ) +static int mtk_drm_cmdq_pkt_create(struct cmdq_client *client, struct cmdq_pkt *pkt, + size_t size) +{ + struct device *dev; + dma_addr_t dma_addr; + + pkt->va_base = kzalloc(size, GFP_KERNEL); + if (!pkt->va_base) { + kfree(pkt); + return -ENOMEM; + } + pkt->buf_size = size; + pkt->cl = (void *)client; + + dev = client->chan->mbox->dev; + dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size); + kfree(pkt->va_base); + kfree(pkt); + return -ENOMEM; + } + + pkt->pa_base = dma_addr; + + return 0; +} + +static void mtk_drm_cmdq_pkt_destroy(struct cmdq_pkt *pkt) +{ + struct cmdq_client *client = (struct cmdq_client *)pkt->cl; + + dma_unmap_single(client->chan->mbox->dev, pkt->pa_base, pkt->buf_size, + DMA_TO_DEVICE); + kfree(pkt->va_base); + kfree(pkt); +} +#endif + static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); mtk_mutex_put(mtk_crtc->mutex); +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + mtk_drm_cmdq_pkt_destroy(&mtk_crtc->cmdq_handle); + if (mtk_crtc->cmdq_client.chan) { + mbox_free_channel(mtk_crtc->cmdq_client.chan); + mtk_crtc->cmdq_client.chan = NULL; + } +#endif drm_crtc_cleanup(crtc); } @@ -222,9 +274,46 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, } #if IS_REACHABLE(CONFIG_MTK_CMDQ) -static void ddp_cmdq_cb(struct cmdq_cb_data data) +static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) { - cmdq_pkt_destroy(data.data); + struct cmdq_cb_data *data = mssg; + struct cmdq_client *cmdq_cl = container_of(cl, struct cmdq_client, client); + struct mtk_drm_crtc *mtk_crtc = container_of(cmdq_cl, struct mtk_drm_crtc, cmdq_client); + struct mtk_crtc_state *state; + unsigned int i; + + if (data->sta < 0) + return; + + state = to_mtk_crtc_state(mtk_crtc->base.state); + + state->pending_config = false; + + if (mtk_crtc->pending_planes) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *plane_state; + + plane_state = to_mtk_plane_state(plane->state); + + plane_state->pending.config = false; + } + mtk_crtc->pending_planes = false; + } + + if (mtk_crtc->pending_async_planes) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *plane_state; + + plane_state = to_mtk_plane_state(plane->state); + + plane_state->pending.async_config = false; + } + mtk_crtc->pending_async_planes = false; + } + + mtk_crtc->cmdq_vblank_cnt = 0; } #endif @@ -378,7 +467,8 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, state->pending_vrefresh, 0, cmdq_handle); - state->pending_config = false; + if (!cmdq_handle) + state->pending_config = false; } if (mtk_crtc->pending_planes) { @@ -398,9 +488,12 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, mtk_ddp_comp_layer_config(comp, local_layer, plane_state, cmdq_handle); - plane_state->pending.config = false; + if (!cmdq_handle) + plane_state->pending.config = false; } - mtk_crtc->pending_planes = false; + + if (!cmdq_handle) + mtk_crtc->pending_planes = false; } if (mtk_crtc->pending_async_planes) { @@ -420,9 +513,12 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, mtk_ddp_comp_layer_config(comp, local_layer, plane_state, cmdq_handle); - plane_state->pending.async_config = false; + if (!cmdq_handle) + plane_state->pending.async_config = false; } - mtk_crtc->pending_async_planes = false; + + if (!cmdq_handle) + mtk_crtc->pending_async_planes = false; } } @@ -430,7 +526,7 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, bool needs_vblank) { #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_pkt *cmdq_handle; + struct cmdq_pkt *cmdq_handle = &mtk_crtc->cmdq_handle; #endif struct drm_crtc *crtc = &mtk_crtc->base; struct mtk_drm_private *priv = crtc->dev->dev_private; @@ -468,14 +564,28 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, mtk_mutex_release(mtk_crtc->mutex); } #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (mtk_crtc->cmdq_client) { - mbox_flush(mtk_crtc->cmdq_client->chan, 2000); - cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE); + if (mtk_crtc->cmdq_client.chan) { + mbox_flush(mtk_crtc->cmdq_client.chan, 2000); + cmdq_handle->cmd_buf_size = 0; cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false); mtk_crtc_ddp_config(crtc, cmdq_handle); cmdq_pkt_finalize(cmdq_handle); - cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle); + dma_sync_single_for_device(mtk_crtc->cmdq_client.chan->mbox->dev, + cmdq_handle->pa_base, + cmdq_handle->cmd_buf_size, + DMA_TO_DEVICE); + /* + * CMDQ command should execute in next 3 vblank. + * One vblank interrupt before send message (occasionally) + * and one vblank interrupt after cmdq done, + * so it's timeout after 3 vblank interrupt. + * If it fail to execute in next 3 vblank, timeout happen. + */ + mtk_crtc->cmdq_vblank_cnt = 3; + + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); + mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); } #endif mtk_crtc->config_updating = false; @@ -489,12 +599,15 @@ static void mtk_crtc_ddp_irq(void *data) struct mtk_drm_private *priv = crtc->dev->dev_private; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (!priv->data->shadow_register && !mtk_crtc->cmdq_client) + if (!priv->data->shadow_register && !mtk_crtc->cmdq_client.chan) + mtk_crtc_ddp_config(crtc, NULL); + else if (mtk_crtc->cmdq_vblank_cnt > 0 && --mtk_crtc->cmdq_vblank_cnt == 0) + DRM_ERROR("mtk_crtc %d CMDQ execute command timeout!\n", + drm_crtc_index(&mtk_crtc->base)); #else if (!priv->data->shadow_register) -#endif mtk_crtc_ddp_config(crtc, NULL); - +#endif mtk_drm_finish_page_flip(mtk_crtc); } @@ -829,16 +942,20 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mutex_init(&mtk_crtc->hw_lock); #if IS_REACHABLE(CONFIG_MTK_CMDQ) - mtk_crtc->cmdq_client = - cmdq_mbox_create(mtk_crtc->mmsys_dev, - drm_crtc_index(&mtk_crtc->base)); - if (IS_ERR(mtk_crtc->cmdq_client)) { + mtk_crtc->cmdq_client.client.dev = mtk_crtc->mmsys_dev; + mtk_crtc->cmdq_client.client.tx_block = false; + mtk_crtc->cmdq_client.client.knows_txdone = true; + mtk_crtc->cmdq_client.client.rx_callback = ddp_cmdq_cb; + mtk_crtc->cmdq_client.chan = + mbox_request_channel(&mtk_crtc->cmdq_client.client, + drm_crtc_index(&mtk_crtc->base)); + if (IS_ERR(mtk_crtc->cmdq_client.chan)) { dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n", drm_crtc_index(&mtk_crtc->base)); - mtk_crtc->cmdq_client = NULL; + mtk_crtc->cmdq_client.chan = NULL; } - if (mtk_crtc->cmdq_client) { + if (mtk_crtc->cmdq_client.chan) { ret = of_property_read_u32_index(priv->mutex_node, "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base), @@ -846,8 +963,18 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, if (ret) { dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", drm_crtc_index(&mtk_crtc->base)); - cmdq_mbox_destroy(mtk_crtc->cmdq_client); - mtk_crtc->cmdq_client = NULL; + mbox_free_channel(mtk_crtc->cmdq_client.chan); + mtk_crtc->cmdq_client.chan = NULL; + } else { + ret = mtk_drm_cmdq_pkt_create(&mtk_crtc->cmdq_client, + &mtk_crtc->cmdq_handle, + PAGE_SIZE); + if (ret) { + dev_dbg(dev, "mtk_crtc %d failed to create cmdq packet\n", + drm_crtc_index(&mtk_crtc->base)); + mbox_free_channel(mtk_crtc->cmdq_client.chan); + mtk_crtc->cmdq_client.chan = NULL; + } } } #endif diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 99cbf44463e4..b4b682bc1991 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -20,45 +20,39 @@ #include "mtk_drm_ddp_comp.h" #include "mtk_drm_crtc.h" -#define DISP_OD_EN 0x0000 -#define DISP_OD_INTEN 0x0008 -#define DISP_OD_INTSTA 0x000c -#define DISP_OD_CFG 0x0020 -#define DISP_OD_SIZE 0x0030 -#define DISP_DITHER_5 0x0114 -#define DISP_DITHER_7 0x011c -#define DISP_DITHER_15 0x013c -#define DISP_DITHER_16 0x0140 -#define DISP_REG_UFO_START 0x0000 - -#define DISP_DITHER_EN 0x0000 +#define DISP_REG_DITHER_EN 0x0000 #define DITHER_EN BIT(0) -#define DISP_DITHER_CFG 0x0020 +#define DISP_REG_DITHER_CFG 0x0020 #define DITHER_RELAY_MODE BIT(0) #define DITHER_ENGINE_EN BIT(1) -#define DISP_DITHER_SIZE 0x0030 - -#define LUT_10BIT_MASK 0x03ff - -#define OD_RELAYMODE BIT(0) - -#define UFO_BYPASS BIT(2) - #define DISP_DITHERING BIT(2) +#define DISP_REG_DITHER_SIZE 0x0030 +#define DISP_REG_DITHER_5 0x0114 +#define DISP_REG_DITHER_7 0x011c +#define DISP_REG_DITHER_15 0x013c #define DITHER_LSB_ERR_SHIFT_R(x) (((x) & 0x7) << 28) -#define DITHER_OVFLW_BIT_R(x) (((x) & 0x7) << 24) #define DITHER_ADD_LSHIFT_R(x) (((x) & 0x7) << 20) -#define DITHER_ADD_RSHIFT_R(x) (((x) & 0x7) << 16) #define DITHER_NEW_BIT_MODE BIT(0) +#define DISP_REG_DITHER_16 0x0140 #define DITHER_LSB_ERR_SHIFT_B(x) (((x) & 0x7) << 28) -#define DITHER_OVFLW_BIT_B(x) (((x) & 0x7) << 24) #define DITHER_ADD_LSHIFT_B(x) (((x) & 0x7) << 20) -#define DITHER_ADD_RSHIFT_B(x) (((x) & 0x7) << 16) #define DITHER_LSB_ERR_SHIFT_G(x) (((x) & 0x7) << 12) -#define DITHER_OVFLW_BIT_G(x) (((x) & 0x7) << 8) #define DITHER_ADD_LSHIFT_G(x) (((x) & 0x7) << 4) -#define DITHER_ADD_RSHIFT_G(x) (((x) & 0x7) << 0) + +#define DISP_REG_OD_EN 0x0000 +#define DISP_REG_OD_CFG 0x0020 +#define OD_RELAYMODE BIT(0) +#define DISP_REG_OD_SIZE 0x0030 + +#define DISP_REG_POSTMASK_EN 0x0000 +#define POSTMASK_EN BIT(0) +#define DISP_REG_POSTMASK_CFG 0x0020 +#define POSTMASK_RELAY_MODE BIT(0) +#define DISP_REG_POSTMASK_SIZE 0x0030 + +#define DISP_REG_UFO_START 0x0000 +#define UFO_BYPASS BIT(2) struct mtk_ddp_comp_dev { struct clk *clk; @@ -134,25 +128,52 @@ void mtk_dither_set_common(void __iomem *regs, struct cmdq_client_reg *cmdq_reg, return; if (bpc >= MTK_MIN_BPC) { - mtk_ddp_write(cmdq_pkt, 0, cmdq_reg, regs, DISP_DITHER_5); - mtk_ddp_write(cmdq_pkt, 0, cmdq_reg, regs, DISP_DITHER_7); + mtk_ddp_write(cmdq_pkt, 0, cmdq_reg, regs, DISP_REG_DITHER_5); + mtk_ddp_write(cmdq_pkt, 0, cmdq_reg, regs, DISP_REG_DITHER_7); mtk_ddp_write(cmdq_pkt, DITHER_LSB_ERR_SHIFT_R(MTK_MAX_BPC - bpc) | DITHER_ADD_LSHIFT_R(MTK_MAX_BPC - bpc) | DITHER_NEW_BIT_MODE, - cmdq_reg, regs, DISP_DITHER_15); + cmdq_reg, regs, DISP_REG_DITHER_15); mtk_ddp_write(cmdq_pkt, DITHER_LSB_ERR_SHIFT_B(MTK_MAX_BPC - bpc) | DITHER_ADD_LSHIFT_B(MTK_MAX_BPC - bpc) | DITHER_LSB_ERR_SHIFT_G(MTK_MAX_BPC - bpc) | DITHER_ADD_LSHIFT_G(MTK_MAX_BPC - bpc), - cmdq_reg, regs, DISP_DITHER_16); + cmdq_reg, regs, DISP_REG_DITHER_16); mtk_ddp_write(cmdq_pkt, dither_en, cmdq_reg, regs, cfg); } } +static void mtk_dither_config(struct device *dev, unsigned int w, + unsigned int h, unsigned int vrefresh, + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) +{ + struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); + + mtk_ddp_write(cmdq_pkt, h << 16 | w, &priv->cmdq_reg, priv->regs, DISP_REG_DITHER_SIZE); + mtk_ddp_write(cmdq_pkt, DITHER_RELAY_MODE, &priv->cmdq_reg, priv->regs, + DISP_REG_DITHER_CFG); + mtk_dither_set_common(priv->regs, &priv->cmdq_reg, bpc, DISP_REG_DITHER_CFG, + DITHER_ENGINE_EN, cmdq_pkt); +} + +static void mtk_dither_start(struct device *dev) +{ + struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); + + writel(DITHER_EN, priv->regs + DISP_REG_DITHER_EN); +} + +static void mtk_dither_stop(struct device *dev) +{ + struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); + + writel_relaxed(0x0, priv->regs + DISP_REG_DITHER_EN); +} + static void mtk_dither_set(struct device *dev, unsigned int bpc, - unsigned int cfg, struct cmdq_pkt *cmdq_pkt) + unsigned int cfg, struct cmdq_pkt *cmdq_pkt) { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); @@ -166,49 +187,49 @@ static void mtk_od_config(struct device *dev, unsigned int w, { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_OD_SIZE); - mtk_ddp_write(cmdq_pkt, OD_RELAYMODE, &priv->cmdq_reg, priv->regs, DISP_OD_CFG); - mtk_dither_set(dev, bpc, DISP_OD_CFG, cmdq_pkt); + mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_REG_OD_SIZE); + mtk_ddp_write(cmdq_pkt, OD_RELAYMODE, &priv->cmdq_reg, priv->regs, DISP_REG_OD_CFG); + mtk_dither_set(dev, bpc, DISP_REG_OD_CFG, cmdq_pkt); } static void mtk_od_start(struct device *dev) { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - writel(1, priv->regs + DISP_OD_EN); + writel(1, priv->regs + DISP_REG_OD_EN); } -static void mtk_ufoe_start(struct device *dev) +static void mtk_postmask_config(struct device *dev, unsigned int w, + unsigned int h, unsigned int vrefresh, + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - writel(UFO_BYPASS, priv->regs + DISP_REG_UFO_START); + mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, + DISP_REG_POSTMASK_SIZE); + mtk_ddp_write(cmdq_pkt, POSTMASK_RELAY_MODE, &priv->cmdq_reg, + priv->regs, DISP_REG_POSTMASK_CFG); } -static void mtk_dither_config(struct device *dev, unsigned int w, - unsigned int h, unsigned int vrefresh, - unsigned int bpc, struct cmdq_pkt *cmdq_pkt) +static void mtk_postmask_start(struct device *dev) { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - mtk_ddp_write(cmdq_pkt, h << 16 | w, &priv->cmdq_reg, priv->regs, DISP_DITHER_SIZE); - mtk_ddp_write(cmdq_pkt, DITHER_RELAY_MODE, &priv->cmdq_reg, priv->regs, DISP_DITHER_CFG); - mtk_dither_set_common(priv->regs, &priv->cmdq_reg, bpc, DISP_DITHER_CFG, - DITHER_ENGINE_EN, cmdq_pkt); + writel(POSTMASK_EN, priv->regs + DISP_REG_POSTMASK_EN); } -static void mtk_dither_start(struct device *dev) +static void mtk_postmask_stop(struct device *dev) { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - writel(DITHER_EN, priv->regs + DISP_DITHER_EN); + writel_relaxed(0x0, priv->regs + DISP_REG_POSTMASK_EN); } -static void mtk_dither_stop(struct device *dev) +static void mtk_ufoe_start(struct device *dev) { struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - writel_relaxed(0x0, priv->regs + DISP_DITHER_EN); + writel(UFO_BYPASS, priv->regs + DISP_REG_UFO_START); } static const struct mtk_ddp_comp_funcs ddp_aal = { @@ -286,6 +307,14 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = { .bgclr_in_off = mtk_ovl_bgclr_in_off, }; +static const struct mtk_ddp_comp_funcs ddp_postmask = { + .clk_enable = mtk_ddp_clk_enable, + .clk_disable = mtk_ddp_clk_disable, + .config = mtk_postmask_config, + .start = mtk_postmask_start, + .stop = mtk_postmask_stop, +}; + static const struct mtk_ddp_comp_funcs ddp_rdma = { .clk_enable = mtk_rdma_clk_enable, .clk_disable = mtk_rdma_clk_disable, @@ -305,22 +334,23 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = { }; static const char * const mtk_ddp_comp_stem[MTK_DDP_COMP_TYPE_MAX] = { + [MTK_DISP_AAL] = "aal", + [MTK_DISP_BLS] = "bls", + [MTK_DISP_CCORR] = "ccorr", + [MTK_DISP_COLOR] = "color", + [MTK_DISP_DITHER] = "dither", + [MTK_DISP_GAMMA] = "gamma", + [MTK_DISP_MUTEX] = "mutex", + [MTK_DISP_OD] = "od", [MTK_DISP_OVL] = "ovl", [MTK_DISP_OVL_2L] = "ovl-2l", + [MTK_DISP_POSTMASK] = "postmask", + [MTK_DISP_PWM] = "pwm", [MTK_DISP_RDMA] = "rdma", - [MTK_DISP_WDMA] = "wdma", - [MTK_DISP_COLOR] = "color", - [MTK_DISP_CCORR] = "ccorr", - [MTK_DISP_AAL] = "aal", - [MTK_DISP_GAMMA] = "gamma", - [MTK_DISP_DITHER] = "dither", [MTK_DISP_UFOE] = "ufoe", - [MTK_DSI] = "dsi", + [MTK_DISP_WDMA] = "wdma", [MTK_DPI] = "dpi", - [MTK_DISP_PWM] = "pwm", - [MTK_DISP_MUTEX] = "mutex", - [MTK_DISP_OD] = "od", - [MTK_DISP_BLS] = "bls", + [MTK_DSI] = "dsi", }; struct mtk_ddp_comp_match { @@ -330,35 +360,38 @@ struct mtk_ddp_comp_match { }; static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = { - [DDP_COMPONENT_AAL0] = { MTK_DISP_AAL, 0, &ddp_aal }, - [DDP_COMPONENT_AAL1] = { MTK_DISP_AAL, 1, &ddp_aal }, - [DDP_COMPONENT_BLS] = { MTK_DISP_BLS, 0, NULL }, - [DDP_COMPONENT_CCORR] = { MTK_DISP_CCORR, 0, &ddp_ccorr }, - [DDP_COMPONENT_COLOR0] = { MTK_DISP_COLOR, 0, &ddp_color }, - [DDP_COMPONENT_COLOR1] = { MTK_DISP_COLOR, 1, &ddp_color }, - [DDP_COMPONENT_DITHER] = { MTK_DISP_DITHER, 0, &ddp_dither }, - [DDP_COMPONENT_DPI0] = { MTK_DPI, 0, &ddp_dpi }, - [DDP_COMPONENT_DPI1] = { MTK_DPI, 1, &ddp_dpi }, - [DDP_COMPONENT_DSI0] = { MTK_DSI, 0, &ddp_dsi }, - [DDP_COMPONENT_DSI1] = { MTK_DSI, 1, &ddp_dsi }, - [DDP_COMPONENT_DSI2] = { MTK_DSI, 2, &ddp_dsi }, - [DDP_COMPONENT_DSI3] = { MTK_DSI, 3, &ddp_dsi }, - [DDP_COMPONENT_GAMMA] = { MTK_DISP_GAMMA, 0, &ddp_gamma }, - [DDP_COMPONENT_OD0] = { MTK_DISP_OD, 0, &ddp_od }, - [DDP_COMPONENT_OD1] = { MTK_DISP_OD, 1, &ddp_od }, - [DDP_COMPONENT_OVL0] = { MTK_DISP_OVL, 0, &ddp_ovl }, - [DDP_COMPONENT_OVL1] = { MTK_DISP_OVL, 1, &ddp_ovl }, - [DDP_COMPONENT_OVL_2L0] = { MTK_DISP_OVL_2L, 0, &ddp_ovl }, - [DDP_COMPONENT_OVL_2L1] = { MTK_DISP_OVL_2L, 1, &ddp_ovl }, - [DDP_COMPONENT_PWM0] = { MTK_DISP_PWM, 0, NULL }, - [DDP_COMPONENT_PWM1] = { MTK_DISP_PWM, 1, NULL }, - [DDP_COMPONENT_PWM2] = { MTK_DISP_PWM, 2, NULL }, - [DDP_COMPONENT_RDMA0] = { MTK_DISP_RDMA, 0, &ddp_rdma }, - [DDP_COMPONENT_RDMA1] = { MTK_DISP_RDMA, 1, &ddp_rdma }, - [DDP_COMPONENT_RDMA2] = { MTK_DISP_RDMA, 2, &ddp_rdma }, - [DDP_COMPONENT_UFOE] = { MTK_DISP_UFOE, 0, &ddp_ufoe }, - [DDP_COMPONENT_WDMA0] = { MTK_DISP_WDMA, 0, NULL }, - [DDP_COMPONENT_WDMA1] = { MTK_DISP_WDMA, 1, NULL }, + [DDP_COMPONENT_AAL0] = { MTK_DISP_AAL, 0, &ddp_aal }, + [DDP_COMPONENT_AAL1] = { MTK_DISP_AAL, 1, &ddp_aal }, + [DDP_COMPONENT_BLS] = { MTK_DISP_BLS, 0, NULL }, + [DDP_COMPONENT_CCORR] = { MTK_DISP_CCORR, 0, &ddp_ccorr }, + [DDP_COMPONENT_COLOR0] = { MTK_DISP_COLOR, 0, &ddp_color }, + [DDP_COMPONENT_COLOR1] = { MTK_DISP_COLOR, 1, &ddp_color }, + [DDP_COMPONENT_DITHER] = { MTK_DISP_DITHER, 0, &ddp_dither }, + [DDP_COMPONENT_DPI0] = { MTK_DPI, 0, &ddp_dpi }, + [DDP_COMPONENT_DPI1] = { MTK_DPI, 1, &ddp_dpi }, + [DDP_COMPONENT_DSI0] = { MTK_DSI, 0, &ddp_dsi }, + [DDP_COMPONENT_DSI1] = { MTK_DSI, 1, &ddp_dsi }, + [DDP_COMPONENT_DSI2] = { MTK_DSI, 2, &ddp_dsi }, + [DDP_COMPONENT_DSI3] = { MTK_DSI, 3, &ddp_dsi }, + [DDP_COMPONENT_GAMMA] = { MTK_DISP_GAMMA, 0, &ddp_gamma }, + [DDP_COMPONENT_OD0] = { MTK_DISP_OD, 0, &ddp_od }, + [DDP_COMPONENT_OD1] = { MTK_DISP_OD, 1, &ddp_od }, + [DDP_COMPONENT_OVL0] = { MTK_DISP_OVL, 0, &ddp_ovl }, + [DDP_COMPONENT_OVL1] = { MTK_DISP_OVL, 1, &ddp_ovl }, + [DDP_COMPONENT_OVL_2L0] = { MTK_DISP_OVL_2L, 0, &ddp_ovl }, + [DDP_COMPONENT_OVL_2L1] = { MTK_DISP_OVL_2L, 1, &ddp_ovl }, + [DDP_COMPONENT_OVL_2L2] = { MTK_DISP_OVL_2L, 2, &ddp_ovl }, + [DDP_COMPONENT_POSTMASK0] = { MTK_DISP_POSTMASK, 0, &ddp_postmask }, + [DDP_COMPONENT_PWM0] = { MTK_DISP_PWM, 0, NULL }, + [DDP_COMPONENT_PWM1] = { MTK_DISP_PWM, 1, NULL }, + [DDP_COMPONENT_PWM2] = { MTK_DISP_PWM, 2, NULL }, + [DDP_COMPONENT_RDMA0] = { MTK_DISP_RDMA, 0, &ddp_rdma }, + [DDP_COMPONENT_RDMA1] = { MTK_DISP_RDMA, 1, &ddp_rdma }, + [DDP_COMPONENT_RDMA2] = { MTK_DISP_RDMA, 2, &ddp_rdma }, + [DDP_COMPONENT_RDMA4] = { MTK_DISP_RDMA, 4, &ddp_rdma }, + [DDP_COMPONENT_UFOE] = { MTK_DISP_UFOE, 0, &ddp_ufoe }, + [DDP_COMPONENT_WDMA0] = { MTK_DISP_WDMA, 0, NULL }, + [DDP_COMPONENT_WDMA1] = { MTK_DISP_WDMA, 1, NULL }, }; static bool mtk_drm_find_comp_in_ddp(struct device *dev, @@ -475,12 +508,12 @@ int mtk_ddp_comp_init(struct device_node *node, struct mtk_ddp_comp *comp, type == MTK_DISP_CCORR || type == MTK_DISP_COLOR || type == MTK_DISP_GAMMA || - type == MTK_DPI || - type == MTK_DSI || type == MTK_DISP_OVL || type == MTK_DISP_OVL_2L || type == MTK_DISP_PWM || - type == MTK_DISP_RDMA) + type == MTK_DISP_RDMA || + type == MTK_DPI || + type == MTK_DSI) return 0; priv = devm_kzalloc(comp->dev, sizeof(*priv), GFP_KERNEL); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index bb914d976cf5..4c6a98662305 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -18,22 +18,23 @@ struct mtk_plane_state; struct drm_crtc_state; enum mtk_ddp_comp_type { - MTK_DISP_OVL, - MTK_DISP_OVL_2L, - MTK_DISP_RDMA, - MTK_DISP_WDMA, - MTK_DISP_COLOR, + MTK_DISP_AAL, + MTK_DISP_BLS, MTK_DISP_CCORR, + MTK_DISP_COLOR, MTK_DISP_DITHER, - MTK_DISP_AAL, MTK_DISP_GAMMA, - MTK_DISP_UFOE, - MTK_DSI, - MTK_DPI, - MTK_DISP_PWM, MTK_DISP_MUTEX, MTK_DISP_OD, - MTK_DISP_BLS, + MTK_DISP_OVL, + MTK_DISP_OVL_2L, + MTK_DISP_POSTMASK, + MTK_DISP_PWM, + MTK_DISP_RDMA, + MTK_DISP_UFOE, + MTK_DISP_WDMA, + MTK_DPI, + MTK_DSI, MTK_DDP_COMP_TYPE_MAX, }; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index aec39724ebeb..56ff8c57ef8f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -158,6 +158,25 @@ static const enum mtk_ddp_comp_id mt8183_mtk_ddp_ext[] = { DDP_COMPONENT_DPI0, }; +static const enum mtk_ddp_comp_id mt8192_mtk_ddp_main[] = { + DDP_COMPONENT_OVL0, + DDP_COMPONENT_OVL_2L0, + DDP_COMPONENT_RDMA0, + DDP_COMPONENT_COLOR0, + DDP_COMPONENT_CCORR, + DDP_COMPONENT_AAL0, + DDP_COMPONENT_GAMMA, + DDP_COMPONENT_POSTMASK0, + DDP_COMPONENT_DITHER, + DDP_COMPONENT_DSI0, +}; + +static const enum mtk_ddp_comp_id mt8192_mtk_ddp_ext[] = { + DDP_COMPONENT_OVL_2L2, + DDP_COMPONENT_RDMA4, + DDP_COMPONENT_DPI0, +}; + static const struct mtk_mmsys_driver_data mt2701_mmsys_driver_data = { .main_path = mt2701_mtk_ddp_main, .main_len = ARRAY_SIZE(mt2701_mtk_ddp_main), @@ -202,6 +221,13 @@ static const struct mtk_mmsys_driver_data mt8183_mmsys_driver_data = { .ext_len = ARRAY_SIZE(mt8183_mtk_ddp_ext), }; +static const struct mtk_mmsys_driver_data mt8192_mmsys_driver_data = { + .main_path = mt8192_mtk_ddp_main, + .main_len = ARRAY_SIZE(mt8192_mtk_ddp_main), + .ext_path = mt8192_mtk_ddp_ext, + .ext_len = ARRAY_SIZE(mt8192_mtk_ddp_ext), +}; + static int mtk_drm_kms_init(struct drm_device *drm) { struct mtk_drm_private *private = drm->dev_private; @@ -397,68 +423,36 @@ static const struct component_master_ops mtk_drm_ops = { }; static const struct of_device_id mtk_ddp_comp_dt_ids[] = { - { .compatible = "mediatek,mt2701-disp-ovl", - .data = (void *)MTK_DISP_OVL }, - { .compatible = "mediatek,mt8167-disp-ovl", - .data = (void *)MTK_DISP_OVL }, - { .compatible = "mediatek,mt8173-disp-ovl", - .data = (void *)MTK_DISP_OVL }, - { .compatible = "mediatek,mt8183-disp-ovl", - .data = (void *)MTK_DISP_OVL }, - { .compatible = "mediatek,mt8183-disp-ovl-2l", - .data = (void *)MTK_DISP_OVL_2L }, - { .compatible = "mediatek,mt2701-disp-rdma", - .data = (void *)MTK_DISP_RDMA }, - { .compatible = "mediatek,mt8167-disp-rdma", - .data = (void *)MTK_DISP_RDMA }, - { .compatible = "mediatek,mt8173-disp-rdma", - .data = (void *)MTK_DISP_RDMA }, - { .compatible = "mediatek,mt8183-disp-rdma", - .data = (void *)MTK_DISP_RDMA }, - { .compatible = "mediatek,mt8173-disp-wdma", - .data = (void *)MTK_DISP_WDMA }, + { .compatible = "mediatek,mt8167-disp-aal", + .data = (void *)MTK_DISP_AAL}, + { .compatible = "mediatek,mt8173-disp-aal", + .data = (void *)MTK_DISP_AAL}, + { .compatible = "mediatek,mt8183-disp-aal", + .data = (void *)MTK_DISP_AAL}, + { .compatible = "mediatek,mt8192-disp-aal", + .data = (void *)MTK_DISP_AAL}, { .compatible = "mediatek,mt8167-disp-ccorr", .data = (void *)MTK_DISP_CCORR }, { .compatible = "mediatek,mt8183-disp-ccorr", .data = (void *)MTK_DISP_CCORR }, + { .compatible = "mediatek,mt8192-disp-ccorr", + .data = (void *)MTK_DISP_CCORR }, { .compatible = "mediatek,mt2701-disp-color", .data = (void *)MTK_DISP_COLOR }, { .compatible = "mediatek,mt8167-disp-color", .data = (void *)MTK_DISP_COLOR }, { .compatible = "mediatek,mt8173-disp-color", .data = (void *)MTK_DISP_COLOR }, - { .compatible = "mediatek,mt8167-disp-aal", - .data = (void *)MTK_DISP_AAL}, - { .compatible = "mediatek,mt8173-disp-aal", - .data = (void *)MTK_DISP_AAL}, - { .compatible = "mediatek,mt8183-disp-aal", - .data = (void *)MTK_DISP_AAL}, + { .compatible = "mediatek,mt8167-disp-dither", + .data = (void *)MTK_DISP_DITHER }, + { .compatible = "mediatek,mt8183-disp-dither", + .data = (void *)MTK_DISP_DITHER }, { .compatible = "mediatek,mt8167-disp-gamma", .data = (void *)MTK_DISP_GAMMA, }, { .compatible = "mediatek,mt8173-disp-gamma", .data = (void *)MTK_DISP_GAMMA, }, { .compatible = "mediatek,mt8183-disp-gamma", .data = (void *)MTK_DISP_GAMMA, }, - { .compatible = "mediatek,mt8167-disp-dither", - .data = (void *)MTK_DISP_DITHER }, - { .compatible = "mediatek,mt8183-disp-dither", - .data = (void *)MTK_DISP_DITHER }, - { .compatible = "mediatek,mt8173-disp-ufoe", - .data = (void *)MTK_DISP_UFOE }, - { .compatible = "mediatek,mt2701-dsi", - .data = (void *)MTK_DSI }, - { .compatible = "mediatek,mt8167-dsi", - .data = (void *)MTK_DSI }, - { .compatible = "mediatek,mt8173-dsi", - .data = (void *)MTK_DSI }, - { .compatible = "mediatek,mt8183-dsi", - .data = (void *)MTK_DSI }, - { .compatible = "mediatek,mt2701-dpi", - .data = (void *)MTK_DPI }, - { .compatible = "mediatek,mt8173-dpi", - .data = (void *)MTK_DPI }, - { .compatible = "mediatek,mt8183-dpi", - .data = (void *)MTK_DPI }, { .compatible = "mediatek,mt2701-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt2712-disp-mutex", @@ -469,14 +463,60 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8183-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, + { .compatible = "mediatek,mt8192-disp-mutex", + .data = (void *)MTK_DISP_MUTEX }, + { .compatible = "mediatek,mt8173-disp-od", + .data = (void *)MTK_DISP_OD }, + { .compatible = "mediatek,mt2701-disp-ovl", + .data = (void *)MTK_DISP_OVL }, + { .compatible = "mediatek,mt8167-disp-ovl", + .data = (void *)MTK_DISP_OVL }, + { .compatible = "mediatek,mt8173-disp-ovl", + .data = (void *)MTK_DISP_OVL }, + { .compatible = "mediatek,mt8183-disp-ovl", + .data = (void *)MTK_DISP_OVL }, + { .compatible = "mediatek,mt8192-disp-ovl", + .data = (void *)MTK_DISP_OVL }, + { .compatible = "mediatek,mt8183-disp-ovl-2l", + .data = (void *)MTK_DISP_OVL_2L }, + { .compatible = "mediatek,mt8192-disp-ovl-2l", + .data = (void *)MTK_DISP_OVL_2L }, + { .compatible = "mediatek,mt8192-disp-postmask", + .data = (void *)MTK_DISP_POSTMASK }, { .compatible = "mediatek,mt2701-disp-pwm", .data = (void *)MTK_DISP_BLS }, { .compatible = "mediatek,mt8167-disp-pwm", .data = (void *)MTK_DISP_PWM }, { .compatible = "mediatek,mt8173-disp-pwm", .data = (void *)MTK_DISP_PWM }, - { .compatible = "mediatek,mt8173-disp-od", - .data = (void *)MTK_DISP_OD }, + { .compatible = "mediatek,mt2701-disp-rdma", + .data = (void *)MTK_DISP_RDMA }, + { .compatible = "mediatek,mt8167-disp-rdma", + .data = (void *)MTK_DISP_RDMA }, + { .compatible = "mediatek,mt8173-disp-rdma", + .data = (void *)MTK_DISP_RDMA }, + { .compatible = "mediatek,mt8183-disp-rdma", + .data = (void *)MTK_DISP_RDMA }, + { .compatible = "mediatek,mt8192-disp-rdma", + .data = (void *)MTK_DISP_RDMA }, + { .compatible = "mediatek,mt8173-disp-ufoe", + .data = (void *)MTK_DISP_UFOE }, + { .compatible = "mediatek,mt8173-disp-wdma", + .data = (void *)MTK_DISP_WDMA }, + { .compatible = "mediatek,mt2701-dpi", + .data = (void *)MTK_DPI }, + { .compatible = "mediatek,mt8167-dsi", + .data = (void *)MTK_DSI }, + { .compatible = "mediatek,mt8173-dpi", + .data = (void *)MTK_DPI }, + { .compatible = "mediatek,mt8183-dpi", + .data = (void *)MTK_DPI }, + { .compatible = "mediatek,mt2701-dsi", + .data = (void *)MTK_DSI }, + { .compatible = "mediatek,mt8173-dsi", + .data = (void *)MTK_DSI }, + { .compatible = "mediatek,mt8183-dsi", + .data = (void *)MTK_DSI }, { } }; @@ -493,6 +533,8 @@ static const struct of_device_id mtk_drm_of_ids[] = { .data = &mt8173_mmsys_driver_data}, { .compatible = "mediatek,mt8183-mmsys", .data = &mt8183_mmsys_driver_data}, + { .compatible = "mediatek,mt8192-mmsys", + .data = &mt8192_mmsys_driver_data}, { } }; MODULE_DEVICE_TABLE(of, mtk_drm_of_ids); @@ -568,8 +610,8 @@ static int mtk_drm_probe(struct platform_device *pdev) comp_type == MTK_DISP_OVL || comp_type == MTK_DISP_OVL_2L || comp_type == MTK_DISP_RDMA || - comp_type == MTK_DSI || - comp_type == MTK_DPI) { + comp_type == MTK_DPI || + comp_type == MTK_DSI) { dev_info(dev, "Adding component match for %pOF\n", node); drm_of_component_match_add(dev, &match, compare_of, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 734a1fb052df..075747a6d4aa 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -44,9 +44,10 @@ static void mtk_plane_reset(struct drm_plane *plane) state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return; - plane->state = &state->base; } + __drm_atomic_helper_plane_reset(plane, &state->base); + state->base.plane = plane; state->pending.format = DRM_FORMAT_RGB565; } diff --git a/drivers/gpu/drm/meson/Kconfig b/drivers/gpu/drm/meson/Kconfig index a4e1ed96e5e8..6c70fc3214af 100644 --- a/drivers/gpu/drm/meson/Kconfig +++ b/drivers/gpu/drm/meson/Kconfig @@ -4,7 +4,6 @@ config DRM_MESON depends on DRM && OF && (ARM || ARM64) depends on ARCH_MESON || COMPILE_TEST select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_DISPLAY_CONNECTOR select VIDEOMODE_HELPERS diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c index 6b9243713b3c..740108a006ba 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.c +++ b/drivers/gpu/drm/mgag200/mgag200_drv.c @@ -6,7 +6,6 @@ * Dave Airlie */ -#include <linux/console.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/vmalloc.h> @@ -378,7 +377,7 @@ static struct pci_driver mgag200_pci_driver = { static int __init mgag200_init(void) { - if (vgacon_text_force() && mgag200_modeset == -1) + if (drm_firmware_drivers_only() && mgag200_modeset == -1) return -EINVAL; if (mgag200_modeset == 0) diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 4a1420b05e97..086dacf2f26a 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -5,6 +5,7 @@ */ #include <linux/vmalloc.h> +#include <linux/sched/mm.h> #include "msm_drv.h" #include "msm_gem.h" diff --git a/drivers/gpu/drm/mxsfb/Kconfig b/drivers/gpu/drm/mxsfb/Kconfig index ee22cd25d3e3..987170e16ebd 100644 --- a/drivers/gpu/drm/mxsfb/Kconfig +++ b/drivers/gpu/drm/mxsfb/Kconfig @@ -10,7 +10,7 @@ config DRM_MXSFB depends on COMMON_CLK select DRM_MXS select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_PANEL select DRM_PANEL_BRIDGE help diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index e7efd9ede8e4..561309d447e0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -22,7 +22,6 @@ * Authors: Ben Skeggs */ -#include <linux/console.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/pci.h> @@ -32,6 +31,7 @@ #include <drm/drm_aperture.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_drv.h> #include <drm/drm_gem_ttm_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_vblank.h> @@ -1358,7 +1358,7 @@ nouveau_drm_init(void) nouveau_display_options(); if (nouveau_modeset == -1) { - if (vgacon_text_force()) + if (drm_firmware_drivers_only()) nouveau_modeset = 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b51d690f375f..88d262ba648c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2627,6 +2627,27 @@ nv174_chipset = { }; static const struct nvkm_device_chip +nv176_chipset = { + .name = "GA106", + .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, + .devinit = { 0x00000001, ga100_devinit_new }, + .fb = { 0x00000001, ga102_fb_new }, + .gpio = { 0x00000001, ga102_gpio_new }, + .i2c = { 0x00000001, gm200_i2c_new }, + .imem = { 0x00000001, nv50_instmem_new }, + .mc = { 0x00000001, ga100_mc_new }, + .mmu = { 0x00000001, tu102_mmu_new }, + .pci = { 0x00000001, gp100_pci_new }, + .privring = { 0x00000001, gm200_privring_new }, + .timer = { 0x00000001, gk20a_timer_new }, + .top = { 0x00000001, ga100_top_new }, + .disp = { 0x00000001, ga102_disp_new }, + .dma = { 0x00000001, gv100_dma_new }, + .fifo = { 0x00000001, ga102_fifo_new }, +}; + +static const struct nvkm_device_chip nv177_chipset = { .name = "GA107", .bar = { 0x00000001, tu102_bar_new }, @@ -3072,6 +3093,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x168: device->chip = &nv168_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x174: device->chip = &nv174_chipset; break; + case 0x176: device->chip = &nv176_chipset; break; case 0x177: device->chip = &nv177_chipset; break; default: if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c index cdb1ead26d84..82b4c8e1457c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c @@ -207,11 +207,13 @@ int gm200_acr_wpr_parse(struct nvkm_acr *acr) { const struct wpr_header *hdr = (void *)acr->wpr_fw->data; + struct nvkm_acr_lsfw *lsfw; while (hdr->falcon_id != WPR_HEADER_V0_FALCON_ID_INVALID) { wpr_header_dump(&acr->subdev, hdr); - if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id)) - return -ENOMEM; + lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id); + if (IS_ERR(lsfw)) + return PTR_ERR(lsfw); } return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c index fb9132a39bb1..fd97a935a380 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c @@ -161,11 +161,13 @@ int gp102_acr_wpr_parse(struct nvkm_acr *acr) { const struct wpr_header_v1 *hdr = (void *)acr->wpr_fw->data; + struct nvkm_acr_lsfw *lsfw; while (hdr->falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID) { wpr_header_v1_dump(&acr->subdev, hdr); - if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id)) - return -ENOMEM; + lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id); + if (IS_ERR(lsfw)) + return PTR_ERR(lsfw); } return 0; diff --git a/drivers/gpu/drm/omapdrm/Makefile b/drivers/gpu/drm/omapdrm/Makefile index 21e8277ff88f..710b4e0abcf0 100644 --- a/drivers/gpu/drm/omapdrm/Makefile +++ b/drivers/gpu/drm/omapdrm/Makefile @@ -9,6 +9,7 @@ omapdrm-y := omap_drv.o \ omap_debugfs.o \ omap_crtc.o \ omap_plane.o \ + omap_overlay.o \ omap_encoder.o \ omap_fb.o \ omap_gem.o \ diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index b440147ae28b..c4de142cc85b 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -92,6 +92,8 @@ struct dispc_features { u8 mgr_height_start; u16 mgr_width_max; u16 mgr_height_max; + u16 ovl_width_max; + u16 ovl_height_max; unsigned long max_lcd_pclk; unsigned long max_tv_pclk; unsigned int max_downscale; @@ -1279,8 +1281,8 @@ static u32 dispc_ovl_get_burst_size(struct dispc_device *dispc, return dispc->feat->burst_size_unit * 8; } -static bool dispc_ovl_color_mode_supported(struct dispc_device *dispc, - enum omap_plane_id plane, u32 fourcc) +bool dispc_ovl_color_mode_supported(struct dispc_device *dispc, + enum omap_plane_id plane, u32 fourcc) { const u32 *modes; unsigned int i; @@ -2487,6 +2489,11 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc, return 0; } +enum omap_overlay_caps dispc_ovl_get_caps(struct dispc_device *dispc, enum omap_plane_id plane) +{ + return dispc->feat->overlay_caps[plane]; +} + #define DIV_FRAC(dividend, divisor) \ ((dividend) * 100 / (divisor) - ((dividend) / (divisor) * 100)) @@ -2599,6 +2606,12 @@ static int dispc_ovl_calc_scaling(struct dispc_device *dispc, return 0; } +void dispc_ovl_get_max_size(struct dispc_device *dispc, u16 *width, u16 *height) +{ + *width = dispc->feat->ovl_width_max; + *height = dispc->feat->ovl_height_max; +} + static int dispc_ovl_setup_common(struct dispc_device *dispc, enum omap_plane_id plane, enum omap_overlay_caps caps, @@ -4240,6 +4253,8 @@ static const struct dispc_features omap24xx_dispc_feats = { .mgr_height_start = 26, .mgr_width_max = 2048, .mgr_height_max = 2048, + .ovl_width_max = 2048, + .ovl_height_max = 2048, .max_lcd_pclk = 66500000, .max_downscale = 2, /* @@ -4278,6 +4293,8 @@ static const struct dispc_features omap34xx_rev1_0_dispc_feats = { .mgr_height_start = 26, .mgr_width_max = 2048, .mgr_height_max = 2048, + .ovl_width_max = 2048, + .ovl_height_max = 2048, .max_lcd_pclk = 173000000, .max_tv_pclk = 59000000, .max_downscale = 4, @@ -4313,6 +4330,8 @@ static const struct dispc_features omap34xx_rev3_0_dispc_feats = { .mgr_height_start = 26, .mgr_width_max = 2048, .mgr_height_max = 2048, + .ovl_width_max = 2048, + .ovl_height_max = 2048, .max_lcd_pclk = 173000000, .max_tv_pclk = 59000000, .max_downscale = 4, @@ -4348,6 +4367,8 @@ static const struct dispc_features omap36xx_dispc_feats = { .mgr_height_start = 26, .mgr_width_max = 2048, .mgr_height_max = 2048, + .ovl_width_max = 2048, + .ovl_height_max = 2048, .max_lcd_pclk = 173000000, .max_tv_pclk = 59000000, .max_downscale = 4, @@ -4383,6 +4404,8 @@ static const struct dispc_features am43xx_dispc_feats = { .mgr_height_start = 26, .mgr_width_max = 2048, .mgr_height_max = 2048, + .ovl_width_max = 2048, + .ovl_height_max = 2048, .max_lcd_pclk = 173000000, .max_tv_pclk = 59000000, .max_downscale = 4, @@ -4418,6 +4441,8 @@ static const struct dispc_features omap44xx_dispc_feats = { .mgr_height_start = 26, .mgr_width_max = 2048, .mgr_height_max = 2048, + .ovl_width_max = 2048, + .ovl_height_max = 2048, .max_lcd_pclk = 170000000, .max_tv_pclk = 185625000, .max_downscale = 4, @@ -4457,6 +4482,8 @@ static const struct dispc_features omap54xx_dispc_feats = { .mgr_height_start = 27, .mgr_width_max = 4096, .mgr_height_max = 4096, + .ovl_width_max = 2048, + .ovl_height_max = 4096, .max_lcd_pclk = 170000000, .max_tv_pclk = 192000000, .max_downscale = 4, @@ -4842,7 +4869,7 @@ static int dispc_remove(struct platform_device *pdev) return 0; } -static int dispc_runtime_suspend(struct device *dev) +static __maybe_unused int dispc_runtime_suspend(struct device *dev) { struct dispc_device *dispc = dev_get_drvdata(dev); @@ -4857,7 +4884,7 @@ static int dispc_runtime_suspend(struct device *dev) return 0; } -static int dispc_runtime_resume(struct device *dev) +static __maybe_unused int dispc_runtime_resume(struct device *dev) { struct dispc_device *dispc = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index d730bf67fed9..a6845856cbce 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5058,7 +5058,7 @@ static int dsi_remove(struct platform_device *pdev) return 0; } -static int dsi_runtime_suspend(struct device *dev) +static __maybe_unused int dsi_runtime_suspend(struct device *dev) { struct dsi_data *dsi = dev_get_drvdata(dev); @@ -5071,7 +5071,7 @@ static int dsi_runtime_suspend(struct device *dev) return 0; } -static int dsi_runtime_resume(struct device *dev) +static __maybe_unused int dsi_runtime_resume(struct device *dev) { struct dsi_data *dsi = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 66db28bfe824..69b3e15b9356 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1569,7 +1569,7 @@ static void dss_shutdown(struct platform_device *pdev) DSSDBG("shutdown\n"); } -static int dss_runtime_suspend(struct device *dev) +static __maybe_unused int dss_runtime_suspend(struct device *dev) { struct dss_device *dss = dev_get_drvdata(dev); @@ -1581,7 +1581,7 @@ static int dss_runtime_suspend(struct device *dev) return 0; } -static int dss_runtime_resume(struct device *dev) +static __maybe_unused int dss_runtime_resume(struct device *dev) { struct dss_device *dss = dev_get_drvdata(dev); int r; diff --git a/drivers/gpu/drm/omapdrm/dss/dss.h b/drivers/gpu/drm/omapdrm/dss/dss.h index a547527bb2f3..4ff02fbc0e71 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.h +++ b/drivers/gpu/drm/omapdrm/dss/dss.h @@ -397,6 +397,11 @@ int dispc_get_num_mgrs(struct dispc_device *dispc); const u32 *dispc_ovl_get_color_modes(struct dispc_device *dispc, enum omap_plane_id plane); +void dispc_ovl_get_max_size(struct dispc_device *dispc, u16 *width, u16 *height); +bool dispc_ovl_color_mode_supported(struct dispc_device *dispc, + enum omap_plane_id plane, u32 fourcc); +enum omap_overlay_caps dispc_ovl_get_caps(struct dispc_device *dispc, enum omap_plane_id plane); + u32 dispc_read_irqstatus(struct dispc_device *dispc); void dispc_clear_irqstatus(struct dispc_device *dispc, u32 mask); void dispc_write_irqenable(struct dispc_device *dispc, u32 mask); diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c index 508fddd376cf..4480b69ab5a7 100644 --- a/drivers/gpu/drm/omapdrm/dss/venc.c +++ b/drivers/gpu/drm/omapdrm/dss/venc.c @@ -879,7 +879,7 @@ static int venc_remove(struct platform_device *pdev) return 0; } -static int venc_runtime_suspend(struct device *dev) +static __maybe_unused int venc_runtime_suspend(struct device *dev) { struct venc_device *venc = dev_get_drvdata(dev); @@ -889,7 +889,7 @@ static int venc_runtime_suspend(struct device *dev) return 0; } -static int venc_runtime_resume(struct device *dev) +static __maybe_unused int venc_runtime_resume(struct device *dev) { struct venc_device *venc = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index c05d3975cb31..2720a58ccd90 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -117,6 +117,102 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state) dispc_runtime_put(priv->dispc); } +static int drm_atomic_state_normalized_zpos_cmp(const void *a, const void *b) +{ + const struct drm_plane_state *sa = *(struct drm_plane_state **)a; + const struct drm_plane_state *sb = *(struct drm_plane_state **)b; + + if (sa->normalized_zpos != sb->normalized_zpos) + return sa->normalized_zpos - sb->normalized_zpos; + else + return sa->plane->base.id - sb->plane->base.id; +} + +/* + * This replaces the drm_atomic_normalize_zpos to handle the dual overlay case. + * + * Since both halves need to be 'appear' side by side the zpos is + * recalculated when dealing with dual overlay cases so that the other + * planes zpos is consistent. + */ +static int omap_atomic_update_normalize_zpos(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_state, *new_state; + struct drm_plane *plane; + int c, i, n, inc; + int total_planes = dev->mode_config.num_total_plane; + struct drm_plane_state **states; + int ret = 0; + + states = kmalloc_array(total_planes, sizeof(*states), GFP_KERNEL); + if (!states) + return -ENOMEM; + + for_each_oldnew_crtc_in_state(state, crtc, old_state, new_state, c) { + if (old_state->plane_mask == new_state->plane_mask && + !new_state->zpos_changed) + continue; + + /* Reset plane increment and index value for every crtc */ + n = 0; + + /* + * Normalization process might create new states for planes + * which normalized_zpos has to be recalculated. + */ + drm_for_each_plane_mask(plane, dev, new_state->plane_mask) { + struct drm_plane_state *plane_state = + drm_atomic_get_plane_state(new_state->state, + plane); + if (IS_ERR(plane_state)) { + ret = PTR_ERR(plane_state); + goto done; + } + states[n++] = plane_state; + } + + sort(states, n, sizeof(*states), + drm_atomic_state_normalized_zpos_cmp, NULL); + + for (i = 0, inc = 0; i < n; i++) { + plane = states[i]->plane; + + states[i]->normalized_zpos = i + inc; + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] updated normalized zpos value %d\n", + plane->base.id, plane->name, + states[i]->normalized_zpos); + + if (is_omap_plane_dual_overlay(states[i])) + inc++; + } + new_state->zpos_changed = true; + } + +done: + kfree(states); + return ret; +} + +static int omap_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) +{ + int ret; + + ret = drm_atomic_helper_check(dev, state); + if (ret) + return ret; + + if (dev->mode_config.normalize_zpos) { + ret = omap_atomic_update_normalize_zpos(dev, state); + if (ret) + return ret; + } + + return 0; +} + static const struct drm_mode_config_helper_funcs omap_mode_config_helper_funcs = { .atomic_commit_tail = omap_atomic_commit_tail, }; @@ -124,10 +220,86 @@ static const struct drm_mode_config_helper_funcs omap_mode_config_helper_funcs = static const struct drm_mode_config_funcs omap_mode_config_funcs = { .fb_create = omap_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = omap_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; +/* Global/shared object state funcs */ + +/* + * This is a helper that returns the private state currently in operation. + * Note that this would return the "old_state" if called in the atomic check + * path, and the "new_state" after the atomic swap has been done. + */ +struct omap_global_state * +omap_get_existing_global_state(struct omap_drm_private *priv) +{ + return to_omap_global_state(priv->glob_obj.state); +} + +/* + * This acquires the modeset lock set aside for global state, creates + * a new duplicated private object state. + */ +struct omap_global_state *__must_check +omap_get_global_state(struct drm_atomic_state *s) +{ + struct omap_drm_private *priv = s->dev->dev_private; + struct drm_private_state *priv_state; + + priv_state = drm_atomic_get_private_obj_state(s, &priv->glob_obj); + if (IS_ERR(priv_state)) + return ERR_CAST(priv_state); + + return to_omap_global_state(priv_state); +} + +static struct drm_private_state * +omap_global_duplicate_state(struct drm_private_obj *obj) +{ + struct omap_global_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); + + return &state->base; +} + +static void omap_global_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct omap_global_state *omap_state = to_omap_global_state(state); + + kfree(omap_state); +} + +static const struct drm_private_state_funcs omap_global_state_funcs = { + .atomic_duplicate_state = omap_global_duplicate_state, + .atomic_destroy_state = omap_global_destroy_state, +}; + +static int omap_global_obj_init(struct drm_device *dev) +{ + struct omap_drm_private *priv = dev->dev_private; + struct omap_global_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + drm_atomic_private_obj_init(dev, &priv->glob_obj, &state->base, + &omap_global_state_funcs); + return 0; +} + +static void omap_global_obj_fini(struct omap_drm_private *priv) +{ + drm_atomic_private_obj_fini(&priv->glob_obj); +} + static void omap_disconnect_pipelines(struct drm_device *ddev) { struct omap_drm_private *priv = ddev->dev_private; @@ -231,8 +403,6 @@ static int omap_modeset_init(struct drm_device *dev) if (!omapdss_stack_is_ready()) return -EPROBE_DEFER; - drm_mode_config_init(dev); - ret = omap_modeset_init_properties(dev); if (ret < 0) return ret; @@ -583,10 +753,20 @@ static int omapdrm_init(struct omap_drm_private *priv, struct device *dev) omap_gem_init(ddev); + drm_mode_config_init(ddev); + + ret = omap_global_obj_init(ddev); + if (ret) + goto err_gem_deinit; + + ret = omap_hwoverlays_init(priv); + if (ret) + goto err_free_priv_obj; + ret = omap_modeset_init(ddev); if (ret) { dev_err(priv->dev, "omap_modeset_init failed: ret=%d\n", ret); - goto err_gem_deinit; + goto err_free_overlays; } /* Initialize vblank handling, start with all CRTCs disabled. */ @@ -618,7 +798,12 @@ err_cleanup_helpers: omap_fbdev_fini(ddev); err_cleanup_modeset: omap_modeset_fini(ddev); +err_free_overlays: + omap_hwoverlays_destroy(priv); +err_free_priv_obj: + omap_global_obj_fini(priv); err_gem_deinit: + drm_mode_config_cleanup(ddev); omap_gem_deinit(ddev); destroy_workqueue(priv->wq); omap_disconnect_pipelines(ddev); @@ -642,6 +827,9 @@ static void omapdrm_cleanup(struct omap_drm_private *priv) drm_atomic_helper_shutdown(ddev); omap_modeset_fini(ddev); + omap_hwoverlays_destroy(priv); + omap_global_obj_fini(priv); + drm_mode_config_cleanup(ddev); omap_gem_deinit(ddev); destroy_workqueue(priv->wq); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 591d4c273f02..825960fd3ea9 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -14,6 +14,7 @@ #include "dss/omapdss.h" #include "dss/dss.h" +#include <drm/drm_atomic.h> #include <drm/drm_gem.h> #include <drm/omap_drm.h> @@ -24,6 +25,7 @@ #include "omap_gem.h" #include "omap_irq.h" #include "omap_plane.h" +#include "omap_overlay.h" #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /* verbose debug */ @@ -40,6 +42,19 @@ struct omap_drm_pipeline { unsigned int alias_id; }; +/* + * Global private object state for tracking resources that are shared across + * multiple kms objects (planes/crtcs/etc). + */ +#define to_omap_global_state(x) container_of(x, struct omap_global_state, base) + +struct omap_global_state { + struct drm_private_state base; + + /* global atomic state of assignment between overlays and planes */ + struct drm_plane *hwoverlay_to_plane[8]; +}; + struct omap_drm_private { struct drm_device *ddev; struct device *dev; @@ -57,6 +72,11 @@ struct omap_drm_private { unsigned int num_planes; struct drm_plane *planes[8]; + unsigned int num_ovls; + struct omap_hw_overlay *overlays[8]; + + struct drm_private_obj glob_obj; + struct drm_fb_helper *fbdev; struct workqueue_struct *wq; @@ -85,4 +105,8 @@ struct omap_drm_private { void omap_debugfs_init(struct drm_minor *minor); +struct omap_global_state * __must_check omap_get_global_state(struct drm_atomic_state *s); + +struct omap_global_state *omap_get_existing_global_state(struct omap_drm_private *priv); + #endif /* __OMAPDRM_DRV_H__ */ diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 190afc564914..895e66b08a81 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -131,7 +131,9 @@ static u32 drm_rotation_to_tiler(unsigned int drm_rot) /* update ovl info for scanout, handles cases of multi-planar fb's, etc. */ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, - struct drm_plane_state *state, struct omap_overlay_info *info) + struct drm_plane_state *state, + struct omap_overlay_info *info, + struct omap_overlay_info *r_info) { struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); const struct drm_format_info *format = omap_fb->format; @@ -218,6 +220,35 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, } else { info->p_uv_addr = 0; } + + if (r_info) { + info->width /= 2; + info->out_width /= 2; + + *r_info = *info; + + if (fb->format->is_yuv) { + if (info->width & 1) { + info->width++; + r_info->width--; + } + + if (info->out_width & 1) { + info->out_width++; + r_info->out_width--; + } + } + + r_info->pos_x = info->pos_x + info->out_width; + + r_info->paddr = get_linear_addr(fb, format, 0, + x + info->width, y); + if (fb->format->format == DRM_FORMAT_NV12) { + r_info->p_uv_addr = + get_linear_addr(fb, format, 1, + x + info->width, y); + } + } } /* pin, prepare for scanout: */ diff --git a/drivers/gpu/drm/omapdrm/omap_fb.h b/drivers/gpu/drm/omapdrm/omap_fb.h index c0e19aed8220..b75f0b5ef1d8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.h +++ b/drivers/gpu/drm/omapdrm/omap_fb.h @@ -26,7 +26,9 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, int omap_framebuffer_pin(struct drm_framebuffer *fb); void omap_framebuffer_unpin(struct drm_framebuffer *fb); void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, - struct drm_plane_state *state, struct omap_overlay_info *info); + struct drm_plane_state *state, + struct omap_overlay_info *info, + struct omap_overlay_info *r_info); bool omap_framebuffer_supports_rotation(struct drm_framebuffer *fb); void omap_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 38af6195d959..b0fa17409b66 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -789,7 +789,7 @@ int omap_gem_pin(struct drm_gem_object *obj, dma_addr_t *dma_addr) if (omap_obj->flags & OMAP_BO_TILED_MASK) { block = tiler_reserve_2d(fmt, omap_obj->width, - omap_obj->height, 0); + omap_obj->height, PAGE_SIZE); } else { block = tiler_reserve_1d(obj->size); } @@ -851,6 +851,11 @@ static void omap_gem_unpin_locked(struct drm_gem_object *obj) return; if (refcount_dec_and_test(&omap_obj->dma_addr_cnt)) { + if (omap_obj->sgt) { + sg_free_table(omap_obj->sgt); + kfree(omap_obj->sgt); + omap_obj->sgt = NULL; + } ret = tiler_unpin(omap_obj->block); if (ret) { dev_err(obj->dev->dev, @@ -963,6 +968,78 @@ int omap_gem_put_pages(struct drm_gem_object *obj) return 0; } +struct sg_table *omap_gem_get_sg(struct drm_gem_object *obj) +{ + struct omap_gem_object *omap_obj = to_omap_bo(obj); + dma_addr_t addr; + struct sg_table *sgt; + struct scatterlist *sg; + unsigned int count, len, stride, i; + int ret; + + ret = omap_gem_pin(obj, &addr); + if (ret) + return ERR_PTR(ret); + + mutex_lock(&omap_obj->lock); + + sgt = omap_obj->sgt; + if (sgt) + goto out; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto err_unpin; + } + + if (omap_obj->flags & OMAP_BO_TILED_MASK) { + enum tiler_fmt fmt = gem2fmt(omap_obj->flags); + + len = omap_obj->width << (int)fmt; + count = omap_obj->height; + stride = tiler_stride(fmt, 0); + } else { + len = obj->size; + count = 1; + stride = 0; + } + + ret = sg_alloc_table(sgt, count, GFP_KERNEL); + if (ret) + goto err_free; + + for_each_sg(sgt->sgl, sg, count, i) { + sg_set_page(sg, phys_to_page(addr), len, offset_in_page(addr)); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = len; + + addr += stride; + } + + omap_obj->sgt = sgt; +out: + mutex_unlock(&omap_obj->lock); + return sgt; + +err_free: + kfree(sgt); +err_unpin: + mutex_unlock(&omap_obj->lock); + omap_gem_unpin(obj); + return ERR_PTR(ret); +} + +void omap_gem_put_sg(struct drm_gem_object *obj, struct sg_table *sgt) +{ + struct omap_gem_object *omap_obj = to_omap_bo(obj); + + if (WARN_ON(omap_obj->sgt != sgt)) + return; + + omap_gem_unpin(obj); +} + #ifdef CONFIG_DRM_FBDEV_EMULATION /* * Get kernel virtual address for CPU access.. this more or less only diff --git a/drivers/gpu/drm/omapdrm/omap_gem.h b/drivers/gpu/drm/omapdrm/omap_gem.h index eda9b4839c30..19209e319663 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.h +++ b/drivers/gpu/drm/omapdrm/omap_gem.h @@ -82,5 +82,7 @@ u32 omap_gem_flags(struct drm_gem_object *obj); int omap_gem_rotated_dma_addr(struct drm_gem_object *obj, u32 orient, int x, int y, dma_addr_t *dma_addr); int omap_gem_tiled_stride(struct drm_gem_object *obj, u32 orient); +struct sg_table *omap_gem_get_sg(struct drm_gem_object *obj); +void omap_gem_put_sg(struct drm_gem_object *obj, struct sg_table *sgt); #endif /* __OMAPDRM_GEM_H__ */ diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index 809f86cfc540..57af3d97be77 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -23,45 +23,21 @@ static struct sg_table *omap_gem_map_dma_buf( { struct drm_gem_object *obj = attachment->dmabuf->priv; struct sg_table *sg; - dma_addr_t dma_addr; - int ret; - - sg = kzalloc(sizeof(*sg), GFP_KERNEL); - if (!sg) - return ERR_PTR(-ENOMEM); - - /* camera, etc, need physically contiguous.. but we need a - * better way to know this.. - */ - ret = omap_gem_pin(obj, &dma_addr); - if (ret) - goto out; - - ret = sg_alloc_table(sg, 1, GFP_KERNEL); - if (ret) - goto out; - - sg_init_table(sg->sgl, 1); - sg_dma_len(sg->sgl) = obj->size; - sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(dma_addr)), obj->size, 0); - sg_dma_address(sg->sgl) = dma_addr; + sg = omap_gem_get_sg(obj); + if (IS_ERR(sg)) + return sg; /* this must be after omap_gem_pin() to ensure we have pages attached */ omap_gem_dma_sync_buffer(obj, dir); return sg; -out: - kfree(sg); - return ERR_PTR(ret); } static void omap_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { struct drm_gem_object *obj = attachment->dmabuf->priv; - omap_gem_unpin(obj); - sg_free_table(sg); - kfree(sg); + omap_gem_put_sg(obj, sg); } static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer, @@ -114,7 +90,7 @@ struct dma_buf *omap_gem_prime_export(struct drm_gem_object *obj, int flags) DEFINE_DMA_BUF_EXPORT_INFO(exp_info); exp_info.ops = &omap_dmabuf_ops; - exp_info.size = obj->size; + exp_info.size = omap_gem_mmap_size(obj); exp_info.flags = flags; exp_info.priv = obj; diff --git a/drivers/gpu/drm/omapdrm/omap_overlay.c b/drivers/gpu/drm/omapdrm/omap_overlay.c new file mode 100644 index 000000000000..10730c9b2752 --- /dev/null +++ b/drivers/gpu/drm/omapdrm/omap_overlay.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/ + * Author: Benoit Parrot <bparrot@ti.com> + */ + +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> + +#include "omap_dmm_tiler.h" +#include "omap_drv.h" + +/* + * overlay funcs + */ +static const char * const overlay_id_to_name[] = { + [OMAP_DSS_GFX] = "gfx", + [OMAP_DSS_VIDEO1] = "vid1", + [OMAP_DSS_VIDEO2] = "vid2", + [OMAP_DSS_VIDEO3] = "vid3", +}; + +/* + * Find a free overlay with the required caps and supported fourcc + */ +static struct omap_hw_overlay * +omap_plane_find_free_overlay(struct drm_device *dev, struct drm_plane *hwoverlay_to_plane[], + u32 caps, u32 fourcc) +{ + struct omap_drm_private *priv = dev->dev_private; + int i; + + DBG("caps: %x fourcc: %x", caps, fourcc); + + for (i = 0; i < priv->num_ovls; i++) { + struct omap_hw_overlay *cur = priv->overlays[i]; + + DBG("%d: id: %d cur->caps: %x", + cur->idx, cur->id, cur->caps); + + /* skip if already in-use */ + if (hwoverlay_to_plane[cur->idx]) + continue; + + /* skip if doesn't support some required caps: */ + if (caps & ~cur->caps) + continue; + + /* check supported format */ + if (!dispc_ovl_color_mode_supported(priv->dispc, + cur->id, fourcc)) + continue; + + return cur; + } + + DBG("no match"); + return NULL; +} + +/* + * Assign a new overlay to a plane with the required caps and supported fourcc + * If a plane need a new overlay, the previous one should have been released + * with omap_overlay_release() + * This should be called from the plane atomic_check() in order to prepare the + * next global overlay_map to be enabled when atomic transaction is valid. + */ +int omap_overlay_assign(struct drm_atomic_state *s, struct drm_plane *plane, + u32 caps, u32 fourcc, struct omap_hw_overlay **overlay, + struct omap_hw_overlay **r_overlay) +{ + /* Get the global state of the current atomic transaction */ + struct omap_global_state *state = omap_get_global_state(s); + struct drm_plane **overlay_map = state->hwoverlay_to_plane; + struct omap_hw_overlay *ovl, *r_ovl; + + ovl = omap_plane_find_free_overlay(s->dev, overlay_map, caps, fourcc); + if (!ovl) + return -ENOMEM; + + overlay_map[ovl->idx] = plane; + *overlay = ovl; + + if (r_overlay) { + r_ovl = omap_plane_find_free_overlay(s->dev, overlay_map, + caps, fourcc); + if (!r_ovl) { + overlay_map[r_ovl->idx] = NULL; + *overlay = NULL; + return -ENOMEM; + } + + overlay_map[r_ovl->idx] = plane; + *r_overlay = r_ovl; + } + + DBG("%s: assign to plane %s caps %x", ovl->name, plane->name, caps); + + if (r_overlay) { + DBG("%s: assign to right of plane %s caps %x", + r_ovl->name, plane->name, caps); + } + + return 0; +} + +/* + * Release an overlay from a plane if the plane gets not visible or the plane + * need a new overlay if overlay caps changes. + * This should be called from the plane atomic_check() in order to prepare the + * next global overlay_map to be enabled when atomic transaction is valid. + */ +void omap_overlay_release(struct drm_atomic_state *s, struct omap_hw_overlay *overlay) +{ + /* Get the global state of the current atomic transaction */ + struct omap_global_state *state = omap_get_global_state(s); + struct drm_plane **overlay_map = state->hwoverlay_to_plane; + + if (!overlay) + return; + + if (WARN_ON(!overlay_map[overlay->idx])) + return; + + DBG("%s: release from plane %s", overlay->name, overlay_map[overlay->idx]->name); + + overlay_map[overlay->idx] = NULL; +} + +/* + * Update an overlay state that was attached to a plane before the current atomic state. + * This should be called from the plane atomic_update() or atomic_disable(), + * where an overlay association to a plane could have changed between the old and current + * atomic state. + */ +void omap_overlay_update_state(struct omap_drm_private *priv, + struct omap_hw_overlay *overlay) +{ + struct omap_global_state *state = omap_get_existing_global_state(priv); + struct drm_plane **overlay_map = state->hwoverlay_to_plane; + + /* Check if this overlay is not used anymore, then disable it */ + if (!overlay_map[overlay->idx]) { + DBG("%s: disabled", overlay->name); + + /* disable the overlay */ + dispc_ovl_enable(priv->dispc, overlay->id, false); + } +} + +static void omap_overlay_destroy(struct omap_hw_overlay *overlay) +{ + kfree(overlay); +} + +static struct omap_hw_overlay *omap_overlay_init(enum omap_plane_id overlay_id, + enum omap_overlay_caps caps) +{ + struct omap_hw_overlay *overlay; + + overlay = kzalloc(sizeof(*overlay), GFP_KERNEL); + if (!overlay) + return ERR_PTR(-ENOMEM); + + overlay->name = overlay_id_to_name[overlay_id]; + overlay->id = overlay_id; + overlay->caps = caps; + + return overlay; +} + +int omap_hwoverlays_init(struct omap_drm_private *priv) +{ + static const enum omap_plane_id hw_plane_ids[] = { + OMAP_DSS_GFX, OMAP_DSS_VIDEO1, + OMAP_DSS_VIDEO2, OMAP_DSS_VIDEO3, + }; + u32 num_overlays = dispc_get_num_ovls(priv->dispc); + enum omap_overlay_caps caps; + int i, ret; + + for (i = 0; i < num_overlays; i++) { + struct omap_hw_overlay *overlay; + + caps = dispc_ovl_get_caps(priv->dispc, hw_plane_ids[i]); + overlay = omap_overlay_init(hw_plane_ids[i], caps); + if (IS_ERR(overlay)) { + ret = PTR_ERR(overlay); + dev_err(priv->dev, "failed to construct overlay for %s (%d)\n", + overlay_id_to_name[i], ret); + omap_hwoverlays_destroy(priv); + return ret; + } + overlay->idx = priv->num_ovls; + priv->overlays[priv->num_ovls++] = overlay; + } + + return 0; +} + +void omap_hwoverlays_destroy(struct omap_drm_private *priv) +{ + int i; + + for (i = 0; i < priv->num_ovls; i++) { + omap_overlay_destroy(priv->overlays[i]); + priv->overlays[i] = NULL; + } + + priv->num_ovls = 0; +} diff --git a/drivers/gpu/drm/omapdrm/omap_overlay.h b/drivers/gpu/drm/omapdrm/omap_overlay.h new file mode 100644 index 000000000000..e36a43f35563 --- /dev/null +++ b/drivers/gpu/drm/omapdrm/omap_overlay.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/ + * Author: Benoit Parrot <bparrot@ti.com> + */ + +#ifndef __OMAPDRM_OVERLAY_H__ +#define __OMAPDRM_OVERLAY_H__ + +#include <linux/types.h> + +enum drm_plane_type; + +struct drm_device; +struct drm_mode_object; +struct drm_plane; + +/* Used to associate a HW overlay/plane to a plane */ +struct omap_hw_overlay { + unsigned int idx; + + const char *name; + enum omap_plane_id id; + + enum omap_overlay_caps caps; +}; + +int omap_hwoverlays_init(struct omap_drm_private *priv); +void omap_hwoverlays_destroy(struct omap_drm_private *priv); +int omap_overlay_assign(struct drm_atomic_state *s, struct drm_plane *plane, + u32 caps, u32 fourcc, struct omap_hw_overlay **overlay, + struct omap_hw_overlay **r_overlay); +void omap_overlay_release(struct drm_atomic_state *s, struct omap_hw_overlay *overlay); +void omap_overlay_update_state(struct omap_drm_private *priv, struct omap_hw_overlay *overlay); +#endif /* __OMAPDRM_OVERLAY_H__ */ diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 512af976b7e9..b35205c4e979 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -8,6 +8,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> +#include <drm/drm_fourcc.h> #include "omap_dmm_tiler.h" #include "omap_drv.h" @@ -16,14 +17,30 @@ * plane funcs */ +#define to_omap_plane_state(x) container_of(x, struct omap_plane_state, base) + +struct omap_plane_state { + /* Must be first. */ + struct drm_plane_state base; + + struct omap_hw_overlay *overlay; + struct omap_hw_overlay *r_overlay; /* right overlay */ +}; + #define to_omap_plane(x) container_of(x, struct omap_plane, base) struct omap_plane { struct drm_plane base; enum omap_plane_id id; - const char *name; }; +bool is_omap_plane_dual_overlay(struct drm_plane_state *state) +{ + struct omap_plane_state *omap_state = to_omap_plane_state(state); + + return !!omap_state->r_overlay; +} + static int omap_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { @@ -46,13 +63,35 @@ static void omap_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct omap_drm_private *priv = plane->dev->dev_private; - struct omap_plane *omap_plane = to_omap_plane(plane); struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); - struct omap_overlay_info info; + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + struct omap_plane_state *new_omap_state; + struct omap_plane_state *old_omap_state; + struct omap_overlay_info info, r_info; + enum omap_plane_id ovl_id, r_ovl_id; int ret; + bool dual_ovl; + + new_omap_state = to_omap_plane_state(new_state); + old_omap_state = to_omap_plane_state(old_state); - DBG("%s, crtc=%p fb=%p", omap_plane->name, new_state->crtc, + dual_ovl = is_omap_plane_dual_overlay(new_state); + + /* Cleanup previously held overlay if needed */ + if (old_omap_state->overlay) + omap_overlay_update_state(priv, old_omap_state->overlay); + if (old_omap_state->r_overlay) + omap_overlay_update_state(priv, old_omap_state->r_overlay); + + if (!new_omap_state->overlay) { + DBG("[PLANE:%d:%s] no overlay attached", plane->base.id, plane->name); + return; + } + + ovl_id = new_omap_state->overlay->id; + DBG("%s, crtc=%p fb=%p", plane->name, new_state->crtc, new_state->fb); memset(&info, 0, sizeof(info)); @@ -67,65 +106,155 @@ static void omap_plane_atomic_update(struct drm_plane *plane, info.color_encoding = new_state->color_encoding; info.color_range = new_state->color_range; + r_info = info; + /* update scanout: */ - omap_framebuffer_update_scanout(new_state->fb, new_state, &info); + omap_framebuffer_update_scanout(new_state->fb, new_state, &info, + dual_ovl ? &r_info : NULL); - DBG("%dx%d -> %dx%d (%d)", info.width, info.height, - info.out_width, info.out_height, - info.screen_width); + DBG("%s: %dx%d -> %dx%d (%d)", + new_omap_state->overlay->name, info.width, info.height, + info.out_width, info.out_height, info.screen_width); DBG("%d,%d %pad %pad", info.pos_x, info.pos_y, &info.paddr, &info.p_uv_addr); + if (dual_ovl) { + r_ovl_id = new_omap_state->r_overlay->id; + /* + * If the current plane uses 2 hw planes the very next + * zorder is used by the r_overlay so we just use the + * main overlay zorder + 1 + */ + r_info.zorder = info.zorder + 1; + + DBG("%s: %dx%d -> %dx%d (%d)", + new_omap_state->r_overlay->name, + r_info.width, r_info.height, + r_info.out_width, r_info.out_height, r_info.screen_width); + DBG("%d,%d %pad %pad", r_info.pos_x, r_info.pos_y, + &r_info.paddr, &r_info.p_uv_addr); + } + /* and finally, update omapdss: */ - ret = dispc_ovl_setup(priv->dispc, omap_plane->id, &info, + ret = dispc_ovl_setup(priv->dispc, ovl_id, &info, omap_crtc_timings(new_state->crtc), false, omap_crtc_channel(new_state->crtc)); if (ret) { dev_err(plane->dev->dev, "Failed to setup plane %s\n", - omap_plane->name); - dispc_ovl_enable(priv->dispc, omap_plane->id, false); + plane->name); + dispc_ovl_enable(priv->dispc, ovl_id, false); return; } - dispc_ovl_enable(priv->dispc, omap_plane->id, true); + dispc_ovl_enable(priv->dispc, ovl_id, true); + + if (dual_ovl) { + ret = dispc_ovl_setup(priv->dispc, r_ovl_id, &r_info, + omap_crtc_timings(new_state->crtc), false, + omap_crtc_channel(new_state->crtc)); + if (ret) { + dev_err(plane->dev->dev, "Failed to setup plane right-overlay %s\n", + plane->name); + dispc_ovl_enable(priv->dispc, r_ovl_id, false); + dispc_ovl_enable(priv->dispc, ovl_id, false); + return; + } + + dispc_ovl_enable(priv->dispc, r_ovl_id, true); + } } static void omap_plane_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); struct omap_drm_private *priv = plane->dev->dev_private; struct omap_plane *omap_plane = to_omap_plane(plane); + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + struct omap_plane_state *new_omap_state; + struct omap_plane_state *old_omap_state; + + new_omap_state = to_omap_plane_state(new_state); + old_omap_state = to_omap_plane_state(old_state); + + if (!old_omap_state->overlay) + return; new_state->rotation = DRM_MODE_ROTATE_0; new_state->zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : omap_plane->id; - dispc_ovl_enable(priv->dispc, omap_plane->id, false); + omap_overlay_update_state(priv, old_omap_state->overlay); + new_omap_state->overlay = NULL; + + if (is_omap_plane_dual_overlay(old_state)) { + omap_overlay_update_state(priv, old_omap_state->r_overlay); + new_omap_state->r_overlay = NULL; + } } +#define FRAC_16_16(mult, div) (((mult) << 16) / (div)) + static int omap_plane_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, + plane); + struct omap_drm_private *priv = plane->dev->dev_private; + struct omap_plane_state *omap_state = to_omap_plane_state(new_plane_state); + struct omap_global_state *omap_overlay_global_state; struct drm_crtc_state *crtc_state; + bool new_r_hw_overlay = false; + bool new_hw_overlay = false; + u32 max_width, max_height; + struct drm_crtc *crtc; + u16 width, height; + u32 caps = 0; + u32 fourcc; + int ret; - if (!new_plane_state->fb) - return 0; + omap_overlay_global_state = omap_get_global_state(state); + if (IS_ERR(omap_overlay_global_state)) + return PTR_ERR(omap_overlay_global_state); + + dispc_ovl_get_max_size(priv->dispc, &width, &height); + max_width = width << 16; + max_height = height << 16; - /* crtc should only be NULL when disabling (i.e., !new_plane_state->fb) */ - if (WARN_ON(!new_plane_state->crtc)) + crtc = new_plane_state->crtc ? new_plane_state->crtc : plane->state->crtc; + if (!crtc) return 0; - crtc_state = drm_atomic_get_existing_crtc_state(state, - new_plane_state->crtc); + crtc_state = drm_atomic_get_existing_crtc_state(state, crtc); /* we should have a crtc state if the plane is attached to a crtc */ if (WARN_ON(!crtc_state)) return 0; - if (!crtc_state->enable) + /* + * Note: these are just sanity checks to filter out totally bad scaling + * factors. The real limits must be calculated case by case, and + * unfortunately we currently do those checks only at the commit + * phase in dispc. + */ + ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, + FRAC_16_16(1, 8), FRAC_16_16(8, 1), + true, true); + if (ret) + return ret; + + DBG("%s: visible %d -> %d", plane->name, + old_plane_state->visible, new_plane_state->visible); + + if (!new_plane_state->visible) { + omap_overlay_release(state, omap_state->overlay); + omap_overlay_release(state, omap_state->r_overlay); + omap_state->overlay = NULL; + omap_state->r_overlay = NULL; return 0; + } if (new_plane_state->crtc_x < 0 || new_plane_state->crtc_y < 0) return -EINVAL; @@ -136,10 +265,96 @@ static int omap_plane_atomic_check(struct drm_plane *plane, if (new_plane_state->crtc_y + new_plane_state->crtc_h > crtc_state->adjusted_mode.vdisplay) return -EINVAL; + /* Make sure dimensions are within bounds. */ + if (new_plane_state->src_h > max_height || new_plane_state->crtc_h > height) + return -EINVAL; + + + if (new_plane_state->src_w > max_width || new_plane_state->crtc_w > width) { + bool is_fourcc_yuv = new_plane_state->fb->format->is_yuv; + + if (is_fourcc_yuv && (((new_plane_state->src_w >> 16) / 2 & 1) || + new_plane_state->crtc_w / 2 & 1)) { + /* + * When calculating the split overlay width + * and it yield an odd value we will need to adjust + * the indivual width +/- 1. So make sure it fits + */ + if (new_plane_state->src_w <= ((2 * width - 1) << 16) && + new_plane_state->crtc_w <= (2 * width - 1)) + new_r_hw_overlay = true; + else + return -EINVAL; + } else { + if (new_plane_state->src_w <= (2 * max_width) && + new_plane_state->crtc_w <= (2 * width)) + new_r_hw_overlay = true; + else + return -EINVAL; + } + } + if (new_plane_state->rotation != DRM_MODE_ROTATE_0 && !omap_framebuffer_supports_rotation(new_plane_state->fb)) return -EINVAL; + if ((new_plane_state->src_w >> 16) != new_plane_state->crtc_w || + (new_plane_state->src_h >> 16) != new_plane_state->crtc_h) + caps |= OMAP_DSS_OVL_CAP_SCALE; + + fourcc = new_plane_state->fb->format->format; + + /* + * (re)allocate hw overlay if we don't have one or + * there is a caps mismatch + */ + if (!omap_state->overlay || (caps & ~omap_state->overlay->caps)) { + new_hw_overlay = true; + } else { + /* check supported format */ + if (!dispc_ovl_color_mode_supported(priv->dispc, omap_state->overlay->id, + fourcc)) + new_hw_overlay = true; + } + + /* + * check if we need two overlays and only have 1 or + * if we had 2 overlays but will only need 1 + */ + if ((new_r_hw_overlay && !omap_state->r_overlay) || + (!new_r_hw_overlay && omap_state->r_overlay)) + new_hw_overlay = true; + + if (new_hw_overlay) { + struct omap_hw_overlay *old_ovl = omap_state->overlay; + struct omap_hw_overlay *old_r_ovl = omap_state->r_overlay; + struct omap_hw_overlay *new_ovl = NULL; + struct omap_hw_overlay *new_r_ovl = NULL; + + omap_overlay_release(state, old_ovl); + omap_overlay_release(state, old_r_ovl); + + ret = omap_overlay_assign(state, plane, caps, fourcc, &new_ovl, + new_r_hw_overlay ? &new_r_ovl : NULL); + if (ret) { + DBG("%s: failed to assign hw_overlay", plane->name); + omap_state->overlay = NULL; + omap_state->r_overlay = NULL; + return ret; + } + + omap_state->overlay = new_ovl; + if (new_r_hw_overlay) + omap_state->r_overlay = new_r_ovl; + else + omap_state->r_overlay = NULL; + } + + DBG("plane: %s overlay_id: %d", plane->name, omap_state->overlay->id); + + if (omap_state->r_overlay) + DBG("plane: %s r_overlay_id: %d", plane->name, omap_state->r_overlay->id); + return 0; } @@ -155,7 +370,7 @@ static void omap_plane_destroy(struct drm_plane *plane) { struct omap_plane *omap_plane = to_omap_plane(plane); - DBG("%s", omap_plane->name); + DBG("%s", plane->name); drm_plane_cleanup(plane); @@ -189,11 +404,17 @@ void omap_plane_install_properties(struct drm_plane *plane, static void omap_plane_reset(struct drm_plane *plane) { struct omap_plane *omap_plane = to_omap_plane(plane); + struct omap_plane_state *omap_state; + + if (plane->state) + drm_atomic_helper_plane_destroy_state(plane, plane->state); - drm_atomic_helper_plane_reset(plane); - if (!plane->state) + omap_state = kzalloc(sizeof(*omap_state), GFP_KERNEL); + if (!omap_state) return; + __drm_atomic_helper_plane_reset(plane, &omap_state->base); + /* * Set the zpos default depending on whether we are a primary or overlay * plane. @@ -204,6 +425,47 @@ static void omap_plane_reset(struct drm_plane *plane) plane->state->color_range = DRM_COLOR_YCBCR_FULL_RANGE; } +static struct drm_plane_state * +omap_plane_atomic_duplicate_state(struct drm_plane *plane) +{ + struct omap_plane_state *state, *current_state; + + if (WARN_ON(!plane->state)) + return NULL; + + current_state = to_omap_plane_state(plane->state); + + state = kmalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, &state->base); + + state->overlay = current_state->overlay; + state->r_overlay = current_state->r_overlay; + + return &state->base; +} + +static void omap_plane_atomic_print_state(struct drm_printer *p, + const struct drm_plane_state *state) +{ + struct omap_plane_state *omap_state = to_omap_plane_state(state); + + if (omap_state->overlay) + drm_printf(p, "\toverlay=%s (caps=0x%x)\n", + omap_state->overlay->name, + omap_state->overlay->caps); + else + drm_printf(p, "\toverlay=None\n"); + if (omap_state->r_overlay) + drm_printf(p, "\tr_overlay=%s (caps=0x%x)\n", + omap_state->r_overlay->name, + omap_state->r_overlay->caps); + else + drm_printf(p, "\tr_overlay=None\n"); +} + static int omap_plane_atomic_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, @@ -239,10 +501,11 @@ static const struct drm_plane_funcs omap_plane_funcs = { .disable_plane = drm_atomic_helper_disable_plane, .reset = omap_plane_reset, .destroy = omap_plane_destroy, - .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_duplicate_state = omap_plane_atomic_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, .atomic_set_property = omap_plane_atomic_set_property, .atomic_get_property = omap_plane_atomic_get_property, + .atomic_print_state = omap_plane_atomic_print_state, }; static bool omap_plane_supports_yuv(struct drm_plane *plane) @@ -261,20 +524,6 @@ static bool omap_plane_supports_yuv(struct drm_plane *plane) return false; } -static const char *plane_id_to_name[] = { - [OMAP_DSS_GFX] = "gfx", - [OMAP_DSS_VIDEO1] = "vid1", - [OMAP_DSS_VIDEO2] = "vid2", - [OMAP_DSS_VIDEO3] = "vid3", -}; - -static const enum omap_plane_id plane_idx_to_id[] = { - OMAP_DSS_GFX, - OMAP_DSS_VIDEO1, - OMAP_DSS_VIDEO2, - OMAP_DSS_VIDEO3, -}; - /* initialize plane */ struct drm_plane *omap_plane_init(struct drm_device *dev, int idx, enum drm_plane_type type, @@ -284,27 +533,25 @@ struct drm_plane *omap_plane_init(struct drm_device *dev, unsigned int num_planes = dispc_get_num_ovls(priv->dispc); struct drm_plane *plane; struct omap_plane *omap_plane; - enum omap_plane_id id; int ret; u32 nformats; const u32 *formats; - if (WARN_ON(idx >= ARRAY_SIZE(plane_idx_to_id))) + if (WARN_ON(idx >= num_planes)) return ERR_PTR(-EINVAL); - id = plane_idx_to_id[idx]; - - DBG("%s: type=%d", plane_id_to_name[id], type); - omap_plane = kzalloc(sizeof(*omap_plane), GFP_KERNEL); if (!omap_plane) return ERR_PTR(-ENOMEM); - formats = dispc_ovl_get_color_modes(priv->dispc, id); + omap_plane->id = idx; + + DBG("%d: type=%d", omap_plane->id, type); + DBG(" crtc_mask: 0x%04x", possible_crtcs); + + formats = dispc_ovl_get_color_modes(priv->dispc, omap_plane->id); for (nformats = 0; formats[nformats]; ++nformats) ; - omap_plane->id = id; - omap_plane->name = plane_id_to_name[id]; plane = &omap_plane->base; @@ -334,8 +581,8 @@ struct drm_plane *omap_plane_init(struct drm_device *dev, return plane; error: - dev_err(dev->dev, "%s(): could not create plane: %s\n", - __func__, plane_id_to_name[id]); + dev_err(dev->dev, "%s(): could not create plane: %d\n", + __func__, omap_plane->id); kfree(omap_plane); return NULL; diff --git a/drivers/gpu/drm/omapdrm/omap_plane.h b/drivers/gpu/drm/omapdrm/omap_plane.h index 0c28fe8ffa20..a9a33e12722a 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.h +++ b/drivers/gpu/drm/omapdrm/omap_plane.h @@ -22,5 +22,6 @@ struct drm_plane *omap_plane_init(struct drm_device *dev, u32 possible_crtcs); void omap_plane_install_properties(struct drm_plane *plane, struct drm_mode_object *obj); +bool is_omap_plane_dual_overlay(struct drm_plane_state *state); #endif /* __OMAPDRM_PLANE_H__ */ diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 7770b1802291..434c2861bb40 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -152,7 +152,7 @@ config DRM_PANEL_ILITEK_ILI9341 tristate "Ilitek ILI9341 240x320 QVGA panels" depends on OF && SPI depends on DRM_KMS_HELPER - depends on DRM_KMS_CMA_HELPER + depends on DRM_GEM_CMA_HELPER depends on BACKLIGHT_CLASS_DEVICE select DRM_MIPI_DBI help diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 529561b4fbbc..5fcbde789ddb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -84,8 +84,8 @@ static const struct panel_init_cmd boe_tv110c9m_init_cmd[] = { _INIT_DCS_CMD(0x0D, 0x63), _INIT_DCS_CMD(0x0E, 0x91), _INIT_DCS_CMD(0x0F, 0x73), - _INIT_DCS_CMD(0x95, 0xEB), - _INIT_DCS_CMD(0x96, 0xEB), + _INIT_DCS_CMD(0x95, 0xE6), + _INIT_DCS_CMD(0x96, 0xF0), _INIT_DCS_CMD(0x30, 0x11), _INIT_DCS_CMD(0x6D, 0x66), _INIT_DCS_CMD(0x75, 0xA2), @@ -111,18 +111,18 @@ static const struct panel_init_cmd boe_tv110c9m_init_cmd[] = { _INIT_DCS_CMD(0xB0, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x45, 0x00, 0x65, 0x00, 0x81, 0x00, 0x99, 0x00, 0xAE, 0x00, 0xC1), _INIT_DCS_CMD(0xB1, 0x00, 0xD2, 0x01, 0x0B, 0x01, 0x34, 0x01, 0x76, 0x01, 0xA3, 0x01, 0xEF, 0x02, 0x27, 0x02, 0x29), _INIT_DCS_CMD(0xB2, 0x02, 0x5F, 0x02, 0x9E, 0x02, 0xC9, 0x03, 0x00, 0x03, 0x26, 0x03, 0x53, 0x03, 0x63, 0x03, 0x73), - _INIT_DCS_CMD(0xB3, 0x03, 0x86, 0x03, 0x9A, 0x03, 0xAF, 0x03, 0xDF, 0x03, 0xF5, 0x03, 0xF7), + _INIT_DCS_CMD(0xB3, 0x03, 0x86, 0x03, 0x9A, 0x03, 0xA7, 0x03, 0xCF, 0x03, 0xDE, 0x03, 0xE0), _INIT_DCS_CMD(0xB4, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x45, 0x00, 0x65, 0x00, 0x81, 0x00, 0x99, 0x00, 0xAE, 0x00, 0xC1), _INIT_DCS_CMD(0xB5, 0x00, 0xD2, 0x01, 0x0B, 0x01, 0x34, 0x01, 0x76, 0x01, 0xA3, 0x01, 0xEF, 0x02, 0x27, 0x02, 0x29), _INIT_DCS_CMD(0xB6, 0x02, 0x5F, 0x02, 0x9E, 0x02, 0xC9, 0x03, 0x00, 0x03, 0x26, 0x03, 0x53, 0x03, 0x63, 0x03, 0x73), - _INIT_DCS_CMD(0xB7, 0x03, 0x86, 0x03, 0x9A, 0x03, 0xAF, 0x03, 0xDF, 0x03, 0xF5, 0x03, 0xF7), + _INIT_DCS_CMD(0xB7, 0x03, 0x86, 0x03, 0x9A, 0x03, 0xA7, 0x03, 0xCF, 0x03, 0xDE, 0x03, 0xE0), _INIT_DCS_CMD(0xB8, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x45, 0x00, 0x65, 0x00, 0x81, 0x00, 0x99, 0x00, 0xAE, 0x00, 0xC1), _INIT_DCS_CMD(0xB9, 0x00, 0xD2, 0x01, 0x0B, 0x01, 0x34, 0x01, 0x76, 0x01, 0xA3, 0x01, 0xEF, 0x02, 0x27, 0x02, 0x29), _INIT_DCS_CMD(0xBA, 0x02, 0x5F, 0x02, 0x9E, 0x02, 0xC9, 0x03, 0x00, 0x03, 0x26, 0x03, 0x53, 0x03, 0x63, 0x03, 0x73), - _INIT_DCS_CMD(0xBB, 0x03, 0x86, 0x03, 0x9A, 0x03, 0xAF, 0x03, 0xDF, 0x03, 0xF5, 0x03, 0xF7), + _INIT_DCS_CMD(0xBB, 0x03, 0x86, 0x03, 0x9A, 0x03, 0xA7, 0x03, 0xCF, 0x03, 0xDE, 0x03, 0xE0), _INIT_DCS_CMD(0xFF, 0x24), _INIT_DCS_CMD(0xFB, 0x01), @@ -225,6 +225,7 @@ static const struct panel_init_cmd boe_tv110c9m_init_cmd[] = { _INIT_DCS_CMD(0x7F, 0x3C), _INIT_DCS_CMD(0x82, 0x04), _INIT_DCS_CMD(0x97, 0xC0), + _INIT_DCS_CMD(0xB6, 0x05, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x05, 0x00, 0x00), _INIT_DCS_CMD(0x91, 0x44), _INIT_DCS_CMD(0x92, 0xA9), @@ -332,12 +333,39 @@ static const struct panel_init_cmd boe_tv110c9m_init_cmd[] = { _INIT_DCS_CMD(0x34, 0x78), _INIT_DCS_CMD(0x35, 0x16), _INIT_DCS_CMD(0xC8, 0x04), - _INIT_DCS_CMD(0xC9, 0x80), + _INIT_DCS_CMD(0xC9, 0x9E), _INIT_DCS_CMD(0xCA, 0x4E), _INIT_DCS_CMD(0xCB, 0x00), - _INIT_DCS_CMD(0xA9, 0x4C), - _INIT_DCS_CMD(0xAA, 0x47), + _INIT_DCS_CMD(0xA9, 0x49), + _INIT_DCS_CMD(0xAA, 0x4B), + _INIT_DCS_CMD(0xAB, 0x48), + _INIT_DCS_CMD(0xAC, 0x43), + _INIT_DCS_CMD(0xAD, 0x40), + _INIT_DCS_CMD(0xAE, 0x50), + _INIT_DCS_CMD(0xAF, 0x44), + _INIT_DCS_CMD(0xB0, 0x54), + _INIT_DCS_CMD(0xB1, 0x4E), + _INIT_DCS_CMD(0xB2, 0x4D), + _INIT_DCS_CMD(0xB3, 0x4C), + _INIT_DCS_CMD(0xB4, 0x41), + _INIT_DCS_CMD(0xB5, 0x47), + _INIT_DCS_CMD(0xB6, 0x53), + _INIT_DCS_CMD(0xB7, 0x3E), + _INIT_DCS_CMD(0xB8, 0x51), + _INIT_DCS_CMD(0xB9, 0x3C), + _INIT_DCS_CMD(0xBA, 0x3B), + _INIT_DCS_CMD(0xBB, 0x46), + _INIT_DCS_CMD(0xBC, 0x45), + _INIT_DCS_CMD(0xBD, 0x55), + _INIT_DCS_CMD(0xBE, 0x3D), + _INIT_DCS_CMD(0xBF, 0x3F), + _INIT_DCS_CMD(0xC0, 0x52), + _INIT_DCS_CMD(0xC1, 0x4A), + _INIT_DCS_CMD(0xC2, 0x39), + _INIT_DCS_CMD(0xC3, 0x4F), + _INIT_DCS_CMD(0xC4, 0x3A), + _INIT_DCS_CMD(0xC5, 0x42), _INIT_DCS_CMD(0xFF, 0x27), _INIT_DCS_CMD(0xFB, 0x01), @@ -419,7 +447,7 @@ static const struct panel_init_cmd boe_tv110c9m_init_cmd[] = { {}, }; -static const struct panel_init_cmd inx_init_cmd[] = { +static const struct panel_init_cmd inx_hj110iz_init_cmd[] = { _INIT_DCS_CMD(0xFF, 0x20), _INIT_DCS_CMD(0xFB, 0x01), _INIT_DCS_CMD(0x05, 0xD1), @@ -428,10 +456,10 @@ static const struct panel_init_cmd inx_init_cmd[] = { _INIT_DCS_CMD(0x08, 0x4B), _INIT_DCS_CMD(0x0E, 0x91), _INIT_DCS_CMD(0x0F, 0x69), - _INIT_DCS_CMD(0x95, 0xFF), - _INIT_DCS_CMD(0x96, 0xFF), - _INIT_DCS_CMD(0x9D, 0x0A), - _INIT_DCS_CMD(0x9E, 0x0A), + _INIT_DCS_CMD(0x95, 0xF5), + _INIT_DCS_CMD(0x96, 0xF5), + _INIT_DCS_CMD(0x9D, 0x00), + _INIT_DCS_CMD(0x9E, 0x00), _INIT_DCS_CMD(0x69, 0x98), _INIT_DCS_CMD(0x75, 0xA2), _INIT_DCS_CMD(0x77, 0xB3), @@ -493,17 +521,17 @@ static const struct panel_init_cmd inx_init_cmd[] = { _INIT_DCS_CMD(0x2A, 0x03), _INIT_DCS_CMD(0x2B, 0x03), - _INIT_DCS_CMD(0x2F, 0x06), + _INIT_DCS_CMD(0x2F, 0x05), _INIT_DCS_CMD(0x30, 0x32), _INIT_DCS_CMD(0x31, 0x43), - _INIT_DCS_CMD(0x33, 0x06), + _INIT_DCS_CMD(0x33, 0x05), _INIT_DCS_CMD(0x34, 0x32), _INIT_DCS_CMD(0x35, 0x43), _INIT_DCS_CMD(0x37, 0x44), _INIT_DCS_CMD(0x38, 0x40), _INIT_DCS_CMD(0x39, 0x00), - _INIT_DCS_CMD(0x3A, 0x01), - _INIT_DCS_CMD(0x3B, 0x48), + _INIT_DCS_CMD(0x3A, 0x18), + _INIT_DCS_CMD(0x3B, 0x00), _INIT_DCS_CMD(0x3D, 0x93), _INIT_DCS_CMD(0xAB, 0x44), _INIT_DCS_CMD(0xAC, 0x40), @@ -520,8 +548,8 @@ static const struct panel_init_cmd inx_init_cmd[] = { _INIT_DCS_CMD(0x56, 0x08), _INIT_DCS_CMD(0x58, 0x21), _INIT_DCS_CMD(0x59, 0x40), - _INIT_DCS_CMD(0x5A, 0x09), - _INIT_DCS_CMD(0x5B, 0x48), + _INIT_DCS_CMD(0x5A, 0x00), + _INIT_DCS_CMD(0x5B, 0x2C), _INIT_DCS_CMD(0x5E, 0x00, 0x10), _INIT_DCS_CMD(0x5F, 0x00), @@ -558,33 +586,36 @@ static const struct panel_init_cmd inx_init_cmd[] = { _INIT_DCS_CMD(0xEF, 0x01), _INIT_DCS_CMD(0xF0, 0x7A), + _INIT_DCS_CMD(0xB6, 0x05, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x05, 0x00, 0x00), _INIT_DCS_CMD(0xFF, 0x25), _INIT_DCS_CMD(0xFB, 0x01), _INIT_DCS_CMD(0x05, 0x00), + _INIT_DCS_CMD(0x13, 0x02), + _INIT_DCS_CMD(0x14, 0xDF), _INIT_DCS_CMD(0xF1, 0x10), _INIT_DCS_CMD(0x1E, 0x00), - _INIT_DCS_CMD(0x1F, 0x09), - _INIT_DCS_CMD(0x20, 0x46), + _INIT_DCS_CMD(0x1F, 0x00), + _INIT_DCS_CMD(0x20, 0x2C), _INIT_DCS_CMD(0x25, 0x00), - _INIT_DCS_CMD(0x26, 0x09), - _INIT_DCS_CMD(0x27, 0x46), + _INIT_DCS_CMD(0x26, 0x00), + _INIT_DCS_CMD(0x27, 0x2C), _INIT_DCS_CMD(0x3F, 0x80), _INIT_DCS_CMD(0x40, 0x00), _INIT_DCS_CMD(0x43, 0x00), - _INIT_DCS_CMD(0x44, 0x09), - _INIT_DCS_CMD(0x45, 0x46), + _INIT_DCS_CMD(0x44, 0x18), + _INIT_DCS_CMD(0x45, 0x00), - _INIT_DCS_CMD(0x48, 0x09), - _INIT_DCS_CMD(0x49, 0x46), + _INIT_DCS_CMD(0x48, 0x00), + _INIT_DCS_CMD(0x49, 0x2C), _INIT_DCS_CMD(0x5B, 0x80), _INIT_DCS_CMD(0x5C, 0x00), - _INIT_DCS_CMD(0x5D, 0x01), - _INIT_DCS_CMD(0x5E, 0x46), - _INIT_DCS_CMD(0x61, 0x01), - _INIT_DCS_CMD(0x62, 0x46), + _INIT_DCS_CMD(0x5D, 0x00), + _INIT_DCS_CMD(0x5E, 0x00), + _INIT_DCS_CMD(0x61, 0x00), + _INIT_DCS_CMD(0x62, 0x2C), _INIT_DCS_CMD(0x68, 0x10), _INIT_DCS_CMD(0xFF, 0x26), _INIT_DCS_CMD(0xFB, 0x01), @@ -700,16 +731,22 @@ static const struct panel_init_cmd inx_init_cmd[] = { _INIT_DCS_CMD(0xA3, 0x30), _INIT_DCS_CMD(0xA4, 0xC0), _INIT_DCS_CMD(0xE8, 0x00), + _INIT_DCS_CMD(0x97, 0x3C), + _INIT_DCS_CMD(0x98, 0x02), + _INIT_DCS_CMD(0x99, 0x95), + _INIT_DCS_CMD(0x9A, 0x06), + _INIT_DCS_CMD(0x9B, 0x00), + _INIT_DCS_CMD(0x9C, 0x0B), + _INIT_DCS_CMD(0x9D, 0x0A), + _INIT_DCS_CMD(0x9E, 0x90), _INIT_DCS_CMD(0xFF, 0xF0), _INIT_DCS_CMD(0xFB, 0x01), _INIT_DCS_CMD(0x3A, 0x08), _INIT_DCS_CMD(0xFF, 0xD0), _INIT_DCS_CMD(0xFB, 0x01), _INIT_DCS_CMD(0x00, 0x33), - _INIT_DCS_CMD(0x02, 0x77), _INIT_DCS_CMD(0x08, 0x01), _INIT_DCS_CMD(0x09, 0xBF), - _INIT_DCS_CMD(0x28, 0x30), _INIT_DCS_CMD(0x2F, 0x33), _INIT_DCS_CMD(0xFF, 0x23), _INIT_DCS_CMD(0xFB, 0x01), @@ -718,6 +755,9 @@ static const struct panel_init_cmd inx_init_cmd[] = { _INIT_DCS_CMD(0xFF, 0x20), _INIT_DCS_CMD(0xFB, 0x01), _INIT_DCS_CMD(0x30, 0x00), + _INIT_DCS_CMD(0xFF, 0x24), + _INIT_DCS_CMD(0x5C, 0x88), + _INIT_DCS_CMD(0x5D, 0x08), _INIT_DCS_CMD(0xFF, 0x10), _INIT_DCS_CMD(0xB9, 0x01), _INIT_DCS_CMD(0xFF, 0x20), @@ -1312,7 +1352,7 @@ static const struct panel_desc inx_hj110iz_desc = { | MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_VIDEO_BURST, - .init_cmds = inx_init_cmd, + .init_cmds = inx_hj110iz_init_cmd, }; static const struct drm_display_mode boe_tv101wum_nl6_default_mode = { diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index dde033066f3d..9e46db5e359c 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3252,6 +3252,33 @@ static const struct panel_desc starry_kr070pe2t = { .connector_type = DRM_MODE_CONNECTOR_DPI, }; +static const struct display_timing tsd_tst043015cmhx_timing = { + .pixelclock = { 5000000, 9000000, 12000000 }, + .hactive = { 480, 480, 480 }, + .hfront_porch = { 4, 5, 65 }, + .hback_porch = { 36, 40, 255 }, + .hsync_len = { 1, 1, 1 }, + .vactive = { 272, 272, 272 }, + .vfront_porch = { 2, 8, 97 }, + .vback_porch = { 3, 8, 31 }, + .vsync_len = { 1, 1, 1 }, + + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | + DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE, +}; + +static const struct panel_desc tsd_tst043015cmhx = { + .timings = &tsd_tst043015cmhx_timing, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 105, + .height = 67, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, +}; + static const struct drm_display_mode tfc_s9700rtwv43tr_01b_mode = { .clock = 30000, .hdisplay = 800, @@ -3929,6 +3956,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "starry,kr070pe2t", .data = &starry_kr070pe2t, }, { + .compatible = "team-source-display,tst043015cmhx", + .data = &tsd_tst043015cmhx, + }, { .compatible = "tfc,s9700rtwv43tr-01b", .data = &tfc_s9700rtwv43tr_01b, }, { diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 6d9bdb9180cb..ead65f5fa2bc 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -223,7 +223,7 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (!obj) - return NULL; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&obj->mappings.list); mutex_init(&obj->mappings.lock); diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig index 3aae387a96af..91ee05b01303 100644 --- a/drivers/gpu/drm/pl111/Kconfig +++ b/drivers/gpu/drm/pl111/Kconfig @@ -6,7 +6,6 @@ config DRM_PL111 depends on VEXPRESS_CONFIG || VEXPRESS_CONFIG=n depends on COMMON_CLK select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_BRIDGE select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 1f9a59601bb1..6a36b0fd845c 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -57,13 +57,16 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) struct qxl_bo *bo; list_for_each_entry(bo, &qdev->gem.objects, list) { - struct dma_resv_list *fobj; - int rel; - - rcu_read_lock(); - fobj = dma_resv_shared_list(bo->tbo.base.resv); - rel = fobj ? fobj->shared_count : 0; - rcu_read_unlock(); + struct dma_resv_iter cursor; + struct dma_fence *fence; + int rel = 0; + + dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + rel = 0; + ++rel; + } seq_printf(m, "size %ld, pc %d, num releases %d\n", (unsigned long)bo->tbo.base.size, diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index fc47b0deb021..e4b16421500b 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -29,7 +29,6 @@ #include "qxl_drv.h" -#include <linux/console.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/vgaarb.h> @@ -295,7 +294,7 @@ static struct drm_driver qxl_driver = { static int __init qxl_init(void) { - if (vgacon_text_force() && qxl_modeset == -1) + if (drm_firmware_drivers_only() && qxl_modeset == -1) return -EINVAL; if (qxl_modeset == 0) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index b74cebca1f89..956c72b5aa33 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -31,7 +31,6 @@ #include <linux/compat.h> -#include <linux/console.h> #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> @@ -637,15 +636,11 @@ static struct pci_driver radeon_kms_pci_driver = { static int __init radeon_module_init(void) { - if (vgacon_text_force() && radeon_modeset == -1) { - DRM_INFO("VGACON disable radeon kernel modesetting.\n"); + if (drm_firmware_drivers_only() && radeon_modeset == -1) radeon_modeset = 0; - } - if (radeon_modeset == 0) { - DRM_ERROR("No UMS support in radeon module!\n"); + if (radeon_modeset == 0) return -EINVAL; - } DRM_INFO("radeon kernel modesetting enabled.\n"); radeon_register_atpx_handler(); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 482fb0ae6cb5..e2488559cc9f 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -168,7 +168,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) if (!r) { acpi_status = radeon_acpi_init(rdev); if (acpi_status) - dev_dbg(dev->dev, "Error during ACPI methods call\n"); + dev_dbg(dev->dev, "Error during ACPI methods call\n"); } if (radeon_is_px(dev)) { @@ -648,6 +648,8 @@ void radeon_driver_lastclose_kms(struct drm_device *dev) int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct radeon_device *rdev = dev->dev_private; + struct radeon_fpriv *fpriv; + struct radeon_vm *vm; int r; file_priv->driver_priv = NULL; @@ -660,8 +662,6 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { - struct radeon_fpriv *fpriv; - struct radeon_vm *vm; fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); if (unlikely(!fpriv)) { @@ -672,35 +672,39 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (rdev->accel_working) { vm = &fpriv->vm; r = radeon_vm_init(rdev, vm); - if (r) { - kfree(fpriv); - goto out_suspend; - } + if (r) + goto out_fpriv; r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (r) { - radeon_vm_fini(rdev, vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto out_vm_fini; /* map the ib pool buffer read only into * virtual address space */ vm->ib_bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); + if (!vm->ib_bo_va) { + r = -ENOMEM; + goto out_vm_fini; + } + r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va, RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); - if (r) { - radeon_vm_fini(rdev, vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto out_vm_fini; } file_priv->driver_priv = fpriv; } + if (!r) + goto out_suspend; + +out_vm_fini: + radeon_vm_fini(rdev, vm); +out_fpriv: + kfree(fpriv); out_suspend: pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 511a942e851d..ca4a36464340 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -513,7 +513,7 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi, * @allocated: allocated a new handle? * * Validates the handle and return the found session index or -EINVAL - * we we don't have another free session index. + * we don't have another free session index. */ static int radeon_vce_validate_handle(struct radeon_cs_parser *p, uint32_t handle, bool *allocated) diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index b47e74421e34..f6e6a6d5d987 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -4,23 +4,24 @@ config DRM_RCAR_DU depends on DRM && OF depends on ARM || ARM64 depends on ARCH_RENESAS || COMPILE_TEST - imply DRM_RCAR_CMM - imply DRM_RCAR_LVDS select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select VIDEOMODE_HELPERS help Choose this option if you have an R-Car chipset. If M is selected the module will be called rcar-du-drm. -config DRM_RCAR_CMM - tristate "R-Car DU Color Management Module (CMM) Support" - depends on DRM && OF +config DRM_RCAR_USE_CMM + bool "R-Car DU Color Management Module (CMM) Support" depends on DRM_RCAR_DU + default DRM_RCAR_DU help Enable support for R-Car Color Management Module (CMM). +config DRM_RCAR_CMM + def_tristate DRM_RCAR_DU + depends on DRM_RCAR_USE_CMM + config DRM_RCAR_DW_HDMI tristate "R-Car Gen3 and RZ/G2 DU HDMI Encoder Support" depends on DRM && OF @@ -28,15 +29,27 @@ config DRM_RCAR_DW_HDMI help Enable support for R-Car Gen3 or RZ/G2 internal HDMI encoder. +config DRM_RCAR_USE_LVDS + bool "R-Car DU LVDS Encoder Support" + depends on DRM_BRIDGE && OF + default DRM_RCAR_DU + help + Enable support for the R-Car Display Unit embedded LVDS encoders. + config DRM_RCAR_LVDS - tristate "R-Car DU LVDS Encoder Support" - depends on DRM && DRM_BRIDGE && OF + def_tristate DRM_RCAR_DU + depends on DRM_RCAR_USE_LVDS select DRM_KMS_HELPER select DRM_PANEL select OF_FLATTREE select OF_OVERLAY + +config DRM_RCAR_MIPI_DSI + tristate "R-Car DU MIPI DSI Encoder Support" + depends on DRM && DRM_BRIDGE && OF + select DRM_MIPI_DSI help - Enable support for the R-Car Display Unit embedded LVDS encoders. + Enable support for the R-Car Display Unit embedded MIPI DSI encoders. config DRM_RCAR_VSP bool "R-Car DU VSP Compositor Support" if ARM diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile index 4d1187ccc3e5..286bc81b3e7c 100644 --- a/drivers/gpu/drm/rcar-du/Makefile +++ b/drivers/gpu/drm/rcar-du/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_DRM_RCAR_CMM) += rcar_cmm.o obj-$(CONFIG_DRM_RCAR_DU) += rcar-du-drm.o obj-$(CONFIG_DRM_RCAR_DW_HDMI) += rcar_dw_hdmi.o obj-$(CONFIG_DRM_RCAR_LVDS) += rcar_lvds.o +obj-$(CONFIG_DRM_RCAR_MIPI_DSI) += rcar_mipi_dsi.o # 'remote-endpoint' is fixed up at run-time DTC_FLAGS_rcar_du_of_lvds_r8a7790 += -Wno-graph_endpoint diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 5672830ca184..f361a604337f 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -215,6 +215,7 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) const struct drm_display_mode *mode = &rcrtc->crtc.state->adjusted_mode; struct rcar_du_device *rcdu = rcrtc->dev; unsigned long mode_clock = mode->clock * 1000; + unsigned int hdse_offset; u32 dsmr; u32 escr; @@ -261,12 +262,13 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) rcar_du_group_write(rcrtc->group, DPLLCR, dpllcr); escr = ESCR_DCLKSEL_DCLKIN | div; - } else if (rcdu->info->lvds_clk_mask & BIT(rcrtc->index)) { + } else if (rcdu->info->lvds_clk_mask & BIT(rcrtc->index) || + rcdu->info->dsi_clk_mask & BIT(rcrtc->index)) { /* - * Use the LVDS PLL output as the dot clock when outputting to - * the LVDS encoder on an SoC that supports this clock routing - * option. We use the clock directly in that case, without any - * additional divider. + * Use the external LVDS or DSI PLL output as the dot clock when + * outputting to the LVDS or DSI encoder on an SoC that supports + * this clock routing option. We use the clock directly in that + * case, without any additional divider. */ escr = ESCR_DCLKSEL_DCLKIN; } else { @@ -298,10 +300,15 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) | DSMR_DIPM_DISP | DSMR_CSPM; rcar_du_crtc_write(rcrtc, DSMR, dsmr); + hdse_offset = 19; + if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2)) + hdse_offset += 25; + /* Display timings */ - rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start - 19); + rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start - + hdse_offset); rcar_du_crtc_write(rcrtc, HDER, mode->htotal - mode->hsync_start + - mode->hdisplay - 19); + mode->hdisplay - hdse_offset); rcar_du_crtc_write(rcrtc, HSWR, mode->hsync_end - mode->hsync_start - 1); rcar_du_crtc_write(rcrtc, HCR, mode->htotal - 1); @@ -836,6 +843,7 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc, struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc); struct rcar_du_device *rcdu = rcrtc->dev; bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE; + unsigned int min_sync_porch; unsigned int vbp; if (interlaced && !rcar_du_has(rcdu, RCAR_DU_FEATURE_INTERLACED)) @@ -843,9 +851,14 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc, /* * The hardware requires a minimum combined horizontal sync and back - * porch of 20 pixels and a minimum vertical back porch of 3 lines. + * porch of 20 pixels (when CMM isn't used) or 45 pixels (when CMM is + * used), and a minimum vertical back porch of 3 lines. */ - if (mode->htotal - mode->hsync_start < 20) + min_sync_porch = 20; + if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2)) + min_sync_porch += 25; + + if (mode->htotal - mode->hsync_start < min_sync_porch) return MODE_HBLANK_NARROW; vbp = (mode->vtotal - mode->vsync_end) / (interlaced ? 2 : 1); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 5612a9e7a905..5a8131ef81d5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -544,10 +544,12 @@ const char *rcar_du_output_name(enum rcar_du_output output) static const char * const names[] = { [RCAR_DU_OUTPUT_DPAD0] = "DPAD0", [RCAR_DU_OUTPUT_DPAD1] = "DPAD1", - [RCAR_DU_OUTPUT_LVDS0] = "LVDS0", - [RCAR_DU_OUTPUT_LVDS1] = "LVDS1", + [RCAR_DU_OUTPUT_DSI0] = "DSI0", + [RCAR_DU_OUTPUT_DSI1] = "DSI1", [RCAR_DU_OUTPUT_HDMI0] = "HDMI0", [RCAR_DU_OUTPUT_HDMI1] = "HDMI1", + [RCAR_DU_OUTPUT_LVDS0] = "LVDS0", + [RCAR_DU_OUTPUT_LVDS1] = "LVDS1", [RCAR_DU_OUTPUT_TCON] = "TCON", }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index eacb1f17f747..190dbb7f15dd 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -327,11 +327,11 @@ const struct rcar_du_format_info *rcar_du_format_info(u32 fourcc) */ static const struct drm_gem_object_funcs rcar_du_gem_funcs = { - .free = drm_gem_cma_free_object, - .print_info = drm_gem_cma_print_info, - .get_sg_table = drm_gem_cma_get_sg_table, - .vmap = drm_gem_cma_vmap, - .mmap = drm_gem_cma_mmap, + .free = drm_gem_cma_object_free, + .print_info = drm_gem_cma_object_print_info, + .get_sg_table = drm_gem_cma_object_get_sg_table, + .vmap = drm_gem_cma_object_vmap, + .mmap = drm_gem_cma_object_mmap, .vm_ops = &drm_gem_cma_vm_ops, }; diff --git a/drivers/gpu/drm/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/rcar-du/rcar_mipi_dsi.c new file mode 100644 index 000000000000..891bb956fd61 --- /dev/null +++ b/drivers/gpu/drm/rcar-du/rcar_mipi_dsi.c @@ -0,0 +1,819 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * rcar_mipi_dsi.c -- R-Car MIPI DSI Encoder + * + * Copyright (C) 2020 Renesas Electronics Corporation + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_graph.h> +#include <linux/platform_device.h> +#include <linux/reset.h> +#include <linux/slab.h> + +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_of.h> +#include <drm/drm_panel.h> +#include <drm/drm_probe_helper.h> + +#include "rcar_mipi_dsi_regs.h" + +struct rcar_mipi_dsi { + struct device *dev; + const struct rcar_mipi_dsi_device_info *info; + struct reset_control *rstc; + + struct mipi_dsi_host host; + struct drm_bridge bridge; + struct drm_bridge *next_bridge; + struct drm_connector connector; + + void __iomem *mmio; + struct { + struct clk *mod; + struct clk *pll; + struct clk *dsi; + } clocks; + + enum mipi_dsi_pixel_format format; + unsigned int num_data_lanes; + unsigned int lanes; +}; + +static inline struct rcar_mipi_dsi * +bridge_to_rcar_mipi_dsi(struct drm_bridge *bridge) +{ + return container_of(bridge, struct rcar_mipi_dsi, bridge); +} + +static inline struct rcar_mipi_dsi * +host_to_rcar_mipi_dsi(struct mipi_dsi_host *host) +{ + return container_of(host, struct rcar_mipi_dsi, host); +} + +static const u32 phtw[] = { + 0x01020114, 0x01600115, /* General testing */ + 0x01030116, 0x0102011d, /* General testing */ + 0x011101a4, 0x018601a4, /* 1Gbps testing */ + 0x014201a0, 0x010001a3, /* 1Gbps testing */ + 0x0101011f, /* 1Gbps testing */ +}; + +static const u32 phtw2[] = { + 0x010c0130, 0x010c0140, /* General testing */ + 0x010c0150, 0x010c0180, /* General testing */ + 0x010c0190, + 0x010a0160, 0x010a0170, + 0x01800164, 0x01800174, /* 1Gbps testing */ +}; + +static const u32 hsfreqrange_table[][2] = { + { 80000000U, 0x00 }, { 90000000U, 0x10 }, { 100000000U, 0x20 }, + { 110000000U, 0x30 }, { 120000000U, 0x01 }, { 130000000U, 0x11 }, + { 140000000U, 0x21 }, { 150000000U, 0x31 }, { 160000000U, 0x02 }, + { 170000000U, 0x12 }, { 180000000U, 0x22 }, { 190000000U, 0x32 }, + { 205000000U, 0x03 }, { 220000000U, 0x13 }, { 235000000U, 0x23 }, + { 250000000U, 0x33 }, { 275000000U, 0x04 }, { 300000000U, 0x14 }, + { 325000000U, 0x25 }, { 350000000U, 0x35 }, { 400000000U, 0x05 }, + { 450000000U, 0x16 }, { 500000000U, 0x26 }, { 550000000U, 0x37 }, + { 600000000U, 0x07 }, { 650000000U, 0x18 }, { 700000000U, 0x28 }, + { 750000000U, 0x39 }, { 800000000U, 0x09 }, { 850000000U, 0x19 }, + { 900000000U, 0x29 }, { 950000000U, 0x3a }, { 1000000000U, 0x0a }, + { 1050000000U, 0x1a }, { 1100000000U, 0x2a }, { 1150000000U, 0x3b }, + { 1200000000U, 0x0b }, { 1250000000U, 0x1b }, { 1300000000U, 0x2b }, + { 1350000000U, 0x3c }, { 1400000000U, 0x0c }, { 1450000000U, 0x1c }, + { 1500000000U, 0x2c }, { 1550000000U, 0x3d }, { 1600000000U, 0x0d }, + { 1650000000U, 0x1d }, { 1700000000U, 0x2e }, { 1750000000U, 0x3e }, + { 1800000000U, 0x0e }, { 1850000000U, 0x1e }, { 1900000000U, 0x2f }, + { 1950000000U, 0x3f }, { 2000000000U, 0x0f }, { 2050000000U, 0x40 }, + { 2100000000U, 0x41 }, { 2150000000U, 0x42 }, { 2200000000U, 0x43 }, + { 2250000000U, 0x44 }, { 2300000000U, 0x45 }, { 2350000000U, 0x46 }, + { 2400000000U, 0x47 }, { 2450000000U, 0x48 }, { 2500000000U, 0x49 }, + { /* sentinel */ }, +}; + +struct vco_cntrl_value { + u32 min_freq; + u32 max_freq; + u16 value; +}; + +static const struct vco_cntrl_value vco_cntrl_table[] = { + { .min_freq = 40000000U, .max_freq = 55000000U, .value = 0x3f }, + { .min_freq = 52500000U, .max_freq = 80000000U, .value = 0x39 }, + { .min_freq = 80000000U, .max_freq = 110000000U, .value = 0x2f }, + { .min_freq = 105000000U, .max_freq = 160000000U, .value = 0x29 }, + { .min_freq = 160000000U, .max_freq = 220000000U, .value = 0x1f }, + { .min_freq = 210000000U, .max_freq = 320000000U, .value = 0x19 }, + { .min_freq = 320000000U, .max_freq = 440000000U, .value = 0x0f }, + { .min_freq = 420000000U, .max_freq = 660000000U, .value = 0x09 }, + { .min_freq = 630000000U, .max_freq = 1149000000U, .value = 0x03 }, + { .min_freq = 1100000000U, .max_freq = 1152000000U, .value = 0x01 }, + { .min_freq = 1150000000U, .max_freq = 1250000000U, .value = 0x01 }, + { /* sentinel */ }, +}; + +static void rcar_mipi_dsi_write(struct rcar_mipi_dsi *dsi, u32 reg, u32 data) +{ + iowrite32(data, dsi->mmio + reg); +} + +static u32 rcar_mipi_dsi_read(struct rcar_mipi_dsi *dsi, u32 reg) +{ + return ioread32(dsi->mmio + reg); +} + +static void rcar_mipi_dsi_clr(struct rcar_mipi_dsi *dsi, u32 reg, u32 clr) +{ + rcar_mipi_dsi_write(dsi, reg, rcar_mipi_dsi_read(dsi, reg) & ~clr); +} + +static void rcar_mipi_dsi_set(struct rcar_mipi_dsi *dsi, u32 reg, u32 set) +{ + rcar_mipi_dsi_write(dsi, reg, rcar_mipi_dsi_read(dsi, reg) | set); +} + +static int rcar_mipi_dsi_phtw_test(struct rcar_mipi_dsi *dsi, u32 phtw) +{ + u32 status; + int ret; + + rcar_mipi_dsi_write(dsi, PHTW, phtw); + + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + !(status & (PHTW_DWEN | PHTW_CWEN)), + 2000, 10000, false, dsi, PHTW); + if (ret < 0) { + dev_err(dsi->dev, "PHY test interface write timeout (0x%08x)\n", + phtw); + return ret; + } + + return ret; +} + +/* ----------------------------------------------------------------------------- + * Hardware Setup + */ + +struct dsi_setup_info { + unsigned long fout; + u16 vco_cntrl; + u16 prop_cntrl; + u16 hsfreqrange; + u16 div; + unsigned int m; + unsigned int n; +}; + +static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi, + struct clk *clk, unsigned long target, + struct dsi_setup_info *setup_info) +{ + + const struct vco_cntrl_value *vco_cntrl; + unsigned long fout_target; + unsigned long fin, fout; + unsigned long hsfreq; + unsigned int best_err = -1; + unsigned int divider; + unsigned int n; + unsigned int i; + unsigned int err; + + /* + * Calculate Fout = dot clock * ColorDepth / (2 * Lane Count) + * The range out Fout is [40 - 1250] Mhz + */ + fout_target = target * mipi_dsi_pixel_format_to_bpp(dsi->format) + / (2 * dsi->lanes); + if (fout_target < 40000000 || fout_target > 1250000000) + return; + + /* Find vco_cntrl */ + for (vco_cntrl = vco_cntrl_table; vco_cntrl->min_freq != 0; vco_cntrl++) { + if (fout_target > vco_cntrl->min_freq && + fout_target <= vco_cntrl->max_freq) { + setup_info->vco_cntrl = vco_cntrl->value; + if (fout_target >= 1150000000) + setup_info->prop_cntrl = 0x0c; + else + setup_info->prop_cntrl = 0x0b; + break; + } + } + + /* Add divider */ + setup_info->div = (setup_info->vco_cntrl & 0x30) >> 4; + + /* Find hsfreqrange */ + hsfreq = fout_target * 2; + for (i = 0; i < ARRAY_SIZE(hsfreqrange_table); i++) { + if (hsfreqrange_table[i][0] >= hsfreq) { + setup_info->hsfreqrange = hsfreqrange_table[i][1]; + break; + } + } + + /* + * Calculate n and m for PLL clock + * Following the HW manual the ranges of n and m are + * n = [3-8] and m = [64-625] + */ + fin = clk_get_rate(clk); + divider = 1 << setup_info->div; + for (n = 3; n < 9; n++) { + unsigned long fpfd; + unsigned int m; + + fpfd = fin / n; + + for (m = 64; m < 626; m++) { + fout = fpfd * m / divider; + err = abs((long)(fout - fout_target) * 10000 / + (long)fout_target); + if (err < best_err) { + setup_info->m = m - 2; + setup_info->n = n - 1; + setup_info->fout = fout; + best_err = err; + if (err == 0) + goto done; + } + } + } + +done: + dev_dbg(dsi->dev, + "%pC %lu Hz -> Fout %lu Hz (target %lu Hz, error %d.%02u%%), PLL M/N/DIV %u/%u/%u\n", + clk, fin, setup_info->fout, fout_target, best_err / 100, + best_err % 100, setup_info->m, setup_info->n, setup_info->div); + dev_dbg(dsi->dev, + "vco_cntrl = 0x%x\tprop_cntrl = 0x%x\thsfreqrange = 0x%x\n", + setup_info->vco_cntrl, setup_info->prop_cntrl, + setup_info->hsfreqrange); +} + +static void rcar_mipi_dsi_set_display_timing(struct rcar_mipi_dsi *dsi, + const struct drm_display_mode *mode) +{ + u32 setr; + u32 vprmset0r; + u32 vprmset1r; + u32 vprmset2r; + u32 vprmset3r; + u32 vprmset4r; + + /* Configuration for Pixel Stream and Packet Header */ + if (mipi_dsi_pixel_format_to_bpp(dsi->format) == 24) + rcar_mipi_dsi_write(dsi, TXVMPSPHSETR, TXVMPSPHSETR_DT_RGB24); + else if (mipi_dsi_pixel_format_to_bpp(dsi->format) == 18) + rcar_mipi_dsi_write(dsi, TXVMPSPHSETR, TXVMPSPHSETR_DT_RGB18); + else if (mipi_dsi_pixel_format_to_bpp(dsi->format) == 16) + rcar_mipi_dsi_write(dsi, TXVMPSPHSETR, TXVMPSPHSETR_DT_RGB16); + else { + dev_warn(dsi->dev, "unsupported format"); + return; + } + + /* Configuration for Blanking sequence and Input Pixel */ + setr = TXVMSETR_HSABPEN_EN | TXVMSETR_HBPBPEN_EN + | TXVMSETR_HFPBPEN_EN | TXVMSETR_SYNSEQ_PULSES + | TXVMSETR_PIXWDTH | TXVMSETR_VSTPM; + rcar_mipi_dsi_write(dsi, TXVMSETR, setr); + + /* Configuration for Video Parameters */ + vprmset0r = (mode->flags & DRM_MODE_FLAG_PVSYNC ? + TXVMVPRMSET0R_VSPOL_HIG : TXVMVPRMSET0R_VSPOL_LOW) + | (mode->flags & DRM_MODE_FLAG_PHSYNC ? + TXVMVPRMSET0R_HSPOL_HIG : TXVMVPRMSET0R_HSPOL_LOW) + | TXVMVPRMSET0R_CSPC_RGB | TXVMVPRMSET0R_BPP_24; + + vprmset1r = TXVMVPRMSET1R_VACTIVE(mode->vdisplay) + | TXVMVPRMSET1R_VSA(mode->vsync_end - mode->vsync_start); + + vprmset2r = TXVMVPRMSET2R_VFP(mode->vsync_start - mode->vdisplay) + | TXVMVPRMSET2R_VBP(mode->vtotal - mode->vsync_end); + + vprmset3r = TXVMVPRMSET3R_HACTIVE(mode->hdisplay) + | TXVMVPRMSET3R_HSA(mode->hsync_end - mode->hsync_start); + + vprmset4r = TXVMVPRMSET4R_HFP(mode->hsync_start - mode->hdisplay) + | TXVMVPRMSET4R_HBP(mode->htotal - mode->hsync_end); + + rcar_mipi_dsi_write(dsi, TXVMVPRMSET0R, vprmset0r); + rcar_mipi_dsi_write(dsi, TXVMVPRMSET1R, vprmset1r); + rcar_mipi_dsi_write(dsi, TXVMVPRMSET2R, vprmset2r); + rcar_mipi_dsi_write(dsi, TXVMVPRMSET3R, vprmset3r); + rcar_mipi_dsi_write(dsi, TXVMVPRMSET4R, vprmset4r); +} + +static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi, + const struct drm_display_mode *mode) +{ + struct dsi_setup_info setup_info = {}; + unsigned int timeout; + int ret, i; + int dsi_format; + u32 phy_setup; + u32 clockset2, clockset3; + u32 ppisetr; + u32 vclkset; + + /* Checking valid format */ + dsi_format = mipi_dsi_pixel_format_to_bpp(dsi->format); + if (dsi_format < 0) { + dev_warn(dsi->dev, "invalid format"); + return -EINVAL; + } + + /* Parameters Calculation */ + rcar_mipi_dsi_parameters_calc(dsi, dsi->clocks.pll, + mode->clock * 1000, &setup_info); + + /* LPCLK enable */ + rcar_mipi_dsi_set(dsi, LPCLKSET, LPCLKSET_CKEN); + + /* CFGCLK enabled */ + rcar_mipi_dsi_set(dsi, CFGCLKSET, CFGCLKSET_CKEN); + + rcar_mipi_dsi_clr(dsi, PHYSETUP, PHYSETUP_RSTZ); + rcar_mipi_dsi_clr(dsi, PHYSETUP, PHYSETUP_SHUTDOWNZ); + + rcar_mipi_dsi_set(dsi, PHTC, PHTC_TESTCLR); + rcar_mipi_dsi_clr(dsi, PHTC, PHTC_TESTCLR); + + /* PHY setting */ + phy_setup = rcar_mipi_dsi_read(dsi, PHYSETUP); + phy_setup &= ~PHYSETUP_HSFREQRANGE_MASK; + phy_setup |= PHYSETUP_HSFREQRANGE(setup_info.hsfreqrange); + rcar_mipi_dsi_write(dsi, PHYSETUP, phy_setup); + + for (i = 0; i < ARRAY_SIZE(phtw); i++) { + ret = rcar_mipi_dsi_phtw_test(dsi, phtw[i]); + if (ret < 0) + return ret; + } + + /* PLL Clock Setting */ + rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_SHADOW_CLEAR); + rcar_mipi_dsi_set(dsi, CLOCKSET1, CLOCKSET1_SHADOW_CLEAR); + rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_SHADOW_CLEAR); + + clockset2 = CLOCKSET2_M(setup_info.m) | CLOCKSET2_N(setup_info.n) + | CLOCKSET2_VCO_CNTRL(setup_info.vco_cntrl); + clockset3 = CLOCKSET3_PROP_CNTRL(setup_info.prop_cntrl) + | CLOCKSET3_INT_CNTRL(0) + | CLOCKSET3_CPBIAS_CNTRL(0x10) + | CLOCKSET3_GMP_CNTRL(1); + rcar_mipi_dsi_write(dsi, CLOCKSET2, clockset2); + rcar_mipi_dsi_write(dsi, CLOCKSET3, clockset3); + + rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_UPDATEPLL); + rcar_mipi_dsi_set(dsi, CLOCKSET1, CLOCKSET1_UPDATEPLL); + udelay(10); + rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_UPDATEPLL); + + ppisetr = PPISETR_DLEN_3 | PPISETR_CLEN; + rcar_mipi_dsi_write(dsi, PPISETR, ppisetr); + + rcar_mipi_dsi_set(dsi, PHYSETUP, PHYSETUP_SHUTDOWNZ); + rcar_mipi_dsi_set(dsi, PHYSETUP, PHYSETUP_RSTZ); + usleep_range(400, 500); + + /* Checking PPI clock status register */ + for (timeout = 10; timeout > 0; --timeout) { + if ((rcar_mipi_dsi_read(dsi, PPICLSR) & PPICLSR_STPST) && + (rcar_mipi_dsi_read(dsi, PPIDLSR) & PPIDLSR_STPST) && + (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK)) + break; + + usleep_range(1000, 2000); + } + + if (!timeout) { + dev_err(dsi->dev, "failed to enable PPI clock\n"); + return -ETIMEDOUT; + } + + for (i = 0; i < ARRAY_SIZE(phtw2); i++) { + ret = rcar_mipi_dsi_phtw_test(dsi, phtw2[i]); + if (ret < 0) + return ret; + } + + /* Enable DOT clock */ + vclkset = VCLKSET_CKEN; + rcar_mipi_dsi_set(dsi, VCLKSET, vclkset); + + if (dsi_format == 24) + vclkset |= VCLKSET_BPP_24; + else if (dsi_format == 18) + vclkset |= VCLKSET_BPP_18; + else if (dsi_format == 16) + vclkset |= VCLKSET_BPP_16; + else { + dev_warn(dsi->dev, "unsupported format"); + return -EINVAL; + } + vclkset |= VCLKSET_COLOR_RGB | VCLKSET_DIV(setup_info.div) + | VCLKSET_LANE(dsi->lanes - 1); + + rcar_mipi_dsi_set(dsi, VCLKSET, vclkset); + + /* After setting VCLKSET register, enable VCLKEN */ + rcar_mipi_dsi_set(dsi, VCLKEN, VCLKEN_CKEN); + + dev_dbg(dsi->dev, "DSI device is started\n"); + + return 0; +} + +static void rcar_mipi_dsi_shutdown(struct rcar_mipi_dsi *dsi) +{ + rcar_mipi_dsi_clr(dsi, PHYSETUP, PHYSETUP_RSTZ); + rcar_mipi_dsi_clr(dsi, PHYSETUP, PHYSETUP_SHUTDOWNZ); + + dev_dbg(dsi->dev, "DSI device is shutdown\n"); +} + +static int rcar_mipi_dsi_clk_enable(struct rcar_mipi_dsi *dsi) +{ + int ret; + + reset_control_deassert(dsi->rstc); + + ret = clk_prepare_enable(dsi->clocks.mod); + if (ret < 0) + goto err_reset; + + ret = clk_prepare_enable(dsi->clocks.dsi); + if (ret < 0) + goto err_clock; + + return 0; + +err_clock: + clk_disable_unprepare(dsi->clocks.mod); +err_reset: + reset_control_assert(dsi->rstc); + return ret; +} + +static void rcar_mipi_dsi_clk_disable(struct rcar_mipi_dsi *dsi) +{ + clk_disable_unprepare(dsi->clocks.dsi); + clk_disable_unprepare(dsi->clocks.mod); + + reset_control_assert(dsi->rstc); +} + +static int rcar_mipi_dsi_start_hs_clock(struct rcar_mipi_dsi *dsi) +{ + /* + * In HW manual, we need to check TxDDRClkHS-Q Stable? but it dont + * write how to check. So we skip this check in this patch + */ + u32 status; + int ret; + + /* Start HS clock. */ + rcar_mipi_dsi_set(dsi, PPICLCR, PPICLCR_TXREQHS); + + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + status & PPICLSR_TOHS, + 2000, 10000, false, dsi, PPICLSR); + if (ret < 0) { + dev_err(dsi->dev, "failed to enable HS clock\n"); + return ret; + } + + rcar_mipi_dsi_set(dsi, PPICLSCR, PPICLSCR_TOHS); + + return 0; +} + +static int rcar_mipi_dsi_start_video(struct rcar_mipi_dsi *dsi) +{ + u32 status; + int ret; + + /* Wait for the link to be ready. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + !(status & (LINKSR_LPBUSY | LINKSR_HSBUSY)), + 2000, 10000, false, dsi, LINKSR); + if (ret < 0) { + dev_err(dsi->dev, "Link failed to become ready\n"); + return ret; + } + + /* De-assert video FIFO clear. */ + rcar_mipi_dsi_clr(dsi, TXVMCR, TXVMCR_VFCLR); + + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + status & TXVMSR_VFRDY, + 2000, 10000, false, dsi, TXVMSR); + if (ret < 0) { + dev_err(dsi->dev, "Failed to de-assert video FIFO clear\n"); + return ret; + } + + /* Enable transmission in video mode. */ + rcar_mipi_dsi_set(dsi, TXVMCR, TXVMCR_EN_VIDEO); + + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + status & TXVMSR_RDY, + 2000, 10000, false, dsi, TXVMSR); + if (ret < 0) { + dev_err(dsi->dev, "Failed to enable video transmission\n"); + return ret; + } + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Bridge + */ + +static int rcar_mipi_dsi_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) +{ + struct rcar_mipi_dsi *dsi = bridge_to_rcar_mipi_dsi(bridge); + + return drm_bridge_attach(bridge->encoder, dsi->next_bridge, bridge, + flags); +} + +static void rcar_mipi_dsi_atomic_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct drm_atomic_state *state = old_bridge_state->base.state; + struct rcar_mipi_dsi *dsi = bridge_to_rcar_mipi_dsi(bridge); + const struct drm_display_mode *mode; + struct drm_connector *connector; + struct drm_crtc *crtc; + int ret; + + connector = drm_atomic_get_new_connector_for_encoder(state, + bridge->encoder); + crtc = drm_atomic_get_new_connector_state(state, connector)->crtc; + mode = &drm_atomic_get_new_crtc_state(state, crtc)->adjusted_mode; + + ret = rcar_mipi_dsi_clk_enable(dsi); + if (ret < 0) { + dev_err(dsi->dev, "failed to enable DSI clocks\n"); + return; + } + + ret = rcar_mipi_dsi_startup(dsi, mode); + if (ret < 0) + goto err_dsi_startup; + + rcar_mipi_dsi_set_display_timing(dsi, mode); + + ret = rcar_mipi_dsi_start_hs_clock(dsi); + if (ret < 0) + goto err_dsi_start_hs; + + rcar_mipi_dsi_start_video(dsi); + + return; + +err_dsi_start_hs: + rcar_mipi_dsi_shutdown(dsi); +err_dsi_startup: + rcar_mipi_dsi_clk_disable(dsi); +} + +static void rcar_mipi_dsi_atomic_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct rcar_mipi_dsi *dsi = bridge_to_rcar_mipi_dsi(bridge); + + rcar_mipi_dsi_shutdown(dsi); + rcar_mipi_dsi_clk_disable(dsi); +} + +static enum drm_mode_status +rcar_mipi_dsi_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + if (mode->clock > 297000) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static const struct drm_bridge_funcs rcar_mipi_dsi_bridge_ops = { + .attach = rcar_mipi_dsi_attach, + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_enable = rcar_mipi_dsi_atomic_enable, + .atomic_disable = rcar_mipi_dsi_atomic_disable, + .mode_valid = rcar_mipi_dsi_bridge_mode_valid, +}; + +/* ----------------------------------------------------------------------------- + * Host setting + */ + +static int rcar_mipi_dsi_host_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + int ret; + + if (device->lanes > dsi->num_data_lanes) + return -EINVAL; + + dsi->lanes = device->lanes; + dsi->format = device->format; + + dsi->next_bridge = devm_drm_of_get_bridge(dsi->dev, dsi->dev->of_node, + 1, 0); + if (IS_ERR(dsi->next_bridge)) { + ret = PTR_ERR(dsi->next_bridge); + dev_err(dsi->dev, "failed to get next bridge: %d\n", ret); + return ret; + } + + /* Initialize the DRM bridge. */ + dsi->bridge.funcs = &rcar_mipi_dsi_bridge_ops; + dsi->bridge.of_node = dsi->dev->of_node; + drm_bridge_add(&dsi->bridge); + + return 0; +} + +static int rcar_mipi_dsi_host_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + + drm_bridge_remove(&dsi->bridge); + + return 0; +} + +static const struct mipi_dsi_host_ops rcar_mipi_dsi_host_ops = { + .attach = rcar_mipi_dsi_host_attach, + .detach = rcar_mipi_dsi_host_detach, +}; + +/* ----------------------------------------------------------------------------- + * Probe & Remove + */ + +static int rcar_mipi_dsi_parse_dt(struct rcar_mipi_dsi *dsi) +{ + struct device_node *ep; + u32 data_lanes[4]; + int ret; + + ep = of_graph_get_endpoint_by_regs(dsi->dev->of_node, 1, 0); + if (!ep) { + dev_dbg(dsi->dev, "unconnected port@1\n"); + return -ENODEV; + } + + ret = of_property_read_variable_u32_array(ep, "data-lanes", data_lanes, + 1, 4); + of_node_put(ep); + + if (ret < 0) { + dev_err(dsi->dev, "missing or invalid data-lanes property\n"); + return -ENODEV; + } + + dsi->num_data_lanes = ret; + return 0; +} + +static struct clk *rcar_mipi_dsi_get_clock(struct rcar_mipi_dsi *dsi, + const char *name, + bool optional) +{ + struct clk *clk; + + clk = devm_clk_get(dsi->dev, name); + if (!IS_ERR(clk)) + return clk; + + if (PTR_ERR(clk) == -ENOENT && optional) + return NULL; + + dev_err_probe(dsi->dev, PTR_ERR(clk), "failed to get %s clock\n", + name ? name : "module"); + + return clk; +} + +static int rcar_mipi_dsi_get_clocks(struct rcar_mipi_dsi *dsi) +{ + dsi->clocks.mod = rcar_mipi_dsi_get_clock(dsi, NULL, false); + if (IS_ERR(dsi->clocks.mod)) + return PTR_ERR(dsi->clocks.mod); + + dsi->clocks.pll = rcar_mipi_dsi_get_clock(dsi, "pll", true); + if (IS_ERR(dsi->clocks.pll)) + return PTR_ERR(dsi->clocks.pll); + + dsi->clocks.dsi = rcar_mipi_dsi_get_clock(dsi, "dsi", true); + if (IS_ERR(dsi->clocks.dsi)) + return PTR_ERR(dsi->clocks.dsi); + + if (!dsi->clocks.pll && !dsi->clocks.dsi) { + dev_err(dsi->dev, "no input clock (pll, dsi)\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_mipi_dsi_probe(struct platform_device *pdev) +{ + struct rcar_mipi_dsi *dsi; + struct resource *mem; + int ret; + + dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL); + if (dsi == NULL) + return -ENOMEM; + + platform_set_drvdata(pdev, dsi); + + dsi->dev = &pdev->dev; + dsi->info = of_device_get_match_data(&pdev->dev); + + ret = rcar_mipi_dsi_parse_dt(dsi); + if (ret < 0) + return ret; + + /* Acquire resources. */ + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dsi->mmio = devm_ioremap_resource(dsi->dev, mem); + if (IS_ERR(dsi->mmio)) + return PTR_ERR(dsi->mmio); + + ret = rcar_mipi_dsi_get_clocks(dsi); + if (ret < 0) + return ret; + + dsi->rstc = devm_reset_control_get(dsi->dev, NULL); + if (IS_ERR(dsi->rstc)) { + dev_err(dsi->dev, "failed to get cpg reset\n"); + return PTR_ERR(dsi->rstc); + } + + /* Initialize the DSI host. */ + dsi->host.dev = dsi->dev; + dsi->host.ops = &rcar_mipi_dsi_host_ops; + ret = mipi_dsi_host_register(&dsi->host); + if (ret < 0) + return ret; + + return 0; +} + +static int rcar_mipi_dsi_remove(struct platform_device *pdev) +{ + struct rcar_mipi_dsi *dsi = platform_get_drvdata(pdev); + + mipi_dsi_host_unregister(&dsi->host); + + return 0; +} + +static const struct of_device_id rcar_mipi_dsi_of_table[] = { + { .compatible = "renesas,r8a779a0-dsi-csi2-tx" }, + { } +}; + +MODULE_DEVICE_TABLE(of, rcar_mipi_dsi_of_table); + +static struct platform_driver rcar_mipi_dsi_platform_driver = { + .probe = rcar_mipi_dsi_probe, + .remove = rcar_mipi_dsi_remove, + .driver = { + .name = "rcar-mipi-dsi", + .of_match_table = rcar_mipi_dsi_of_table, + }, +}; + +module_platform_driver(rcar_mipi_dsi_platform_driver); + +MODULE_DESCRIPTION("Renesas R-Car MIPI DSI Encoder Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/rcar-du/rcar_mipi_dsi_regs.h new file mode 100644 index 000000000000..0e7a9274749f --- /dev/null +++ b/drivers/gpu/drm/rcar-du/rcar_mipi_dsi_regs.h @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * rcar_mipi_dsi_regs.h -- R-Car MIPI DSI Interface Registers Definitions + * + * Copyright (C) 2020 Renesas Electronics Corporation + */ + +#ifndef __RCAR_MIPI_DSI_REGS_H__ +#define __RCAR_MIPI_DSI_REGS_H__ + +#define LINKSR 0x010 +#define LINKSR_LPBUSY (1 << 1) +#define LINKSR_HSBUSY (1 << 0) + +/* + * Video Mode Register + */ +#define TXVMSETR 0x180 +#define TXVMSETR_SYNSEQ_PULSES (0 << 16) +#define TXVMSETR_SYNSEQ_EVENTS (1 << 16) +#define TXVMSETR_VSTPM (1 << 15) +#define TXVMSETR_PIXWDTH (1 << 8) +#define TXVMSETR_VSEN_EN (1 << 4) +#define TXVMSETR_VSEN_DIS (0 << 4) +#define TXVMSETR_HFPBPEN_EN (1 << 2) +#define TXVMSETR_HFPBPEN_DIS (0 << 2) +#define TXVMSETR_HBPBPEN_EN (1 << 1) +#define TXVMSETR_HBPBPEN_DIS (0 << 1) +#define TXVMSETR_HSABPEN_EN (1 << 0) +#define TXVMSETR_HSABPEN_DIS (0 << 0) + +#define TXVMCR 0x190 +#define TXVMCR_VFCLR (1 << 12) +#define TXVMCR_EN_VIDEO (1 << 0) + +#define TXVMSR 0x1a0 +#define TXVMSR_STR (1 << 16) +#define TXVMSR_VFRDY (1 << 12) +#define TXVMSR_ACT (1 << 8) +#define TXVMSR_RDY (1 << 0) + +#define TXVMSCR 0x1a4 +#define TXVMSCR_STR (1 << 16) + +#define TXVMPSPHSETR 0x1c0 +#define TXVMPSPHSETR_DT_RGB16 (0x0e << 16) +#define TXVMPSPHSETR_DT_RGB18 (0x1e << 16) +#define TXVMPSPHSETR_DT_RGB18_LS (0x2e << 16) +#define TXVMPSPHSETR_DT_RGB24 (0x3e << 16) +#define TXVMPSPHSETR_DT_YCBCR16 (0x2c << 16) + +#define TXVMVPRMSET0R 0x1d0 +#define TXVMVPRMSET0R_HSPOL_HIG (0 << 17) +#define TXVMVPRMSET0R_HSPOL_LOW (1 << 17) +#define TXVMVPRMSET0R_VSPOL_HIG (0 << 16) +#define TXVMVPRMSET0R_VSPOL_LOW (1 << 16) +#define TXVMVPRMSET0R_CSPC_RGB (0 << 4) +#define TXVMVPRMSET0R_CSPC_YCbCr (1 << 4) +#define TXVMVPRMSET0R_BPP_16 (0 << 0) +#define TXVMVPRMSET0R_BPP_18 (1 << 0) +#define TXVMVPRMSET0R_BPP_24 (2 << 0) + +#define TXVMVPRMSET1R 0x1d4 +#define TXVMVPRMSET1R_VACTIVE(x) (((x) & 0x7fff) << 16) +#define TXVMVPRMSET1R_VSA(x) (((x) & 0xfff) << 0) + +#define TXVMVPRMSET2R 0x1d8 +#define TXVMVPRMSET2R_VFP(x) (((x) & 0x1fff) << 16) +#define TXVMVPRMSET2R_VBP(x) (((x) & 0x1fff) << 0) + +#define TXVMVPRMSET3R 0x1dc +#define TXVMVPRMSET3R_HACTIVE(x) (((x) & 0x7fff) << 16) +#define TXVMVPRMSET3R_HSA(x) (((x) & 0xfff) << 0) + +#define TXVMVPRMSET4R 0x1e0 +#define TXVMVPRMSET4R_HFP(x) (((x) & 0x1fff) << 16) +#define TXVMVPRMSET4R_HBP(x) (((x) & 0x1fff) << 0) + +/* + * PHY-Protocol Interface (PPI) Registers + */ +#define PPISETR 0x700 +#define PPISETR_DLEN_0 (0x1 << 0) +#define PPISETR_DLEN_1 (0x3 << 0) +#define PPISETR_DLEN_2 (0x7 << 0) +#define PPISETR_DLEN_3 (0xf << 0) +#define PPISETR_CLEN (1 << 8) + +#define PPICLCR 0x710 +#define PPICLCR_TXREQHS (1 << 8) +#define PPICLCR_TXULPSEXT (1 << 1) +#define PPICLCR_TXULPSCLK (1 << 0) + +#define PPICLSR 0x720 +#define PPICLSR_HSTOLP (1 << 27) +#define PPICLSR_TOHS (1 << 26) +#define PPICLSR_STPST (1 << 0) + +#define PPICLSCR 0x724 +#define PPICLSCR_HSTOLP (1 << 27) +#define PPICLSCR_TOHS (1 << 26) + +#define PPIDLSR 0x760 +#define PPIDLSR_STPST (0xf << 0) + +/* + * Clocks registers + */ +#define LPCLKSET 0x1000 +#define LPCLKSET_CKEN (1 << 8) +#define LPCLKSET_LPCLKDIV(x) (((x) & 0x3f) << 0) + +#define CFGCLKSET 0x1004 +#define CFGCLKSET_CKEN (1 << 8) +#define CFGCLKSET_CFGCLKDIV(x) (((x) & 0x3f) << 0) + +#define DOTCLKDIV 0x1008 +#define DOTCLKDIV_CKEN (1 << 8) +#define DOTCLKDIV_DOTCLKDIV(x) (((x) & 0x3f) << 0) + +#define VCLKSET 0x100c +#define VCLKSET_CKEN (1 << 16) +#define VCLKSET_COLOR_RGB (0 << 8) +#define VCLKSET_COLOR_YCC (1 << 8) +#define VCLKSET_DIV(x) (((x) & 0x3) << 4) +#define VCLKSET_BPP_16 (0 << 2) +#define VCLKSET_BPP_18 (1 << 2) +#define VCLKSET_BPP_18L (2 << 2) +#define VCLKSET_BPP_24 (3 << 2) +#define VCLKSET_LANE(x) (((x) & 0x3) << 0) + +#define VCLKEN 0x1010 +#define VCLKEN_CKEN (1 << 0) + +#define PHYSETUP 0x1014 +#define PHYSETUP_HSFREQRANGE(x) (((x) & 0x7f) << 16) +#define PHYSETUP_HSFREQRANGE_MASK (0x7f << 16) +#define PHYSETUP_CFGCLKFREQRANGE(x) (((x) & 0x3f) << 8) +#define PHYSETUP_SHUTDOWNZ (1 << 1) +#define PHYSETUP_RSTZ (1 << 0) + +#define CLOCKSET1 0x101c +#define CLOCKSET1_LOCK_PHY (1 << 17) +#define CLOCKSET1_LOCK (1 << 16) +#define CLOCKSET1_CLKSEL (1 << 8) +#define CLOCKSET1_CLKINSEL_EXTAL (0 << 2) +#define CLOCKSET1_CLKINSEL_DIG (1 << 2) +#define CLOCKSET1_CLKINSEL_DU (1 << 3) +#define CLOCKSET1_SHADOW_CLEAR (1 << 1) +#define CLOCKSET1_UPDATEPLL (1 << 0) + +#define CLOCKSET2 0x1020 +#define CLOCKSET2_M(x) (((x) & 0xfff) << 16) +#define CLOCKSET2_VCO_CNTRL(x) (((x) & 0x3f) << 8) +#define CLOCKSET2_N(x) (((x) & 0xf) << 0) + +#define CLOCKSET3 0x1024 +#define CLOCKSET3_PROP_CNTRL(x) (((x) & 0x3f) << 24) +#define CLOCKSET3_INT_CNTRL(x) (((x) & 0x3f) << 16) +#define CLOCKSET3_CPBIAS_CNTRL(x) (((x) & 0x7f) << 8) +#define CLOCKSET3_GMP_CNTRL(x) (((x) & 0x3) << 0) + +#define PHTW 0x1034 +#define PHTW_DWEN (1 << 24) +#define PHTW_TESTDIN_DATA(x) (((x) & 0xff) << 16) +#define PHTW_CWEN (1 << 8) +#define PHTW_TESTDIN_CODE(x) (((x) & 0xff) << 0) + +#define PHTC 0x103c +#define PHTC_TESTCLR (1 << 0) + +#endif /* __RCAR_MIPI_DSI_REGS_H__ */ diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile index 17a9e7eb2130..1a56f696558c 100644 --- a/drivers/gpu/drm/rockchip/Makefile +++ b/drivers/gpu/drm/rockchip/Makefile @@ -5,7 +5,6 @@ rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \ rockchip_drm_gem.o rockchip_drm_vop.o rockchip_vop_reg.o -rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o rockchipdrm-$(CONFIG_ROCKCHIP_ANALOGIX_DP) += analogix_dp-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_CDN_DP) += cdn-dp-core.o cdn-dp-reg.o diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 69c699459dce..bec207de4544 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -26,7 +26,6 @@ #include "rockchip_drm_drv.h" #include "rockchip_drm_fb.h" -#include "rockchip_drm_fbdev.h" #include "rockchip_drm_gem.h" #define DRIVER_NAME "rockchip" @@ -159,10 +158,6 @@ static int rockchip_drm_bind(struct device *dev) drm_mode_config_reset(drm_dev); - ret = rockchip_drm_fbdev_init(drm_dev); - if (ret) - goto err_unbind_all; - /* init kms poll for handling hpd */ drm_kms_helper_poll_init(drm_dev); @@ -170,10 +165,11 @@ static int rockchip_drm_bind(struct device *dev) if (ret) goto err_kms_helper_poll_fini; + drm_fbdev_generic_setup(drm_dev, 0); + return 0; err_kms_helper_poll_fini: drm_kms_helper_poll_fini(drm_dev); - rockchip_drm_fbdev_fini(drm_dev); err_unbind_all: component_unbind_all(dev, drm_dev); err_iommu_cleanup: @@ -189,7 +185,6 @@ static void rockchip_drm_unbind(struct device *dev) drm_dev_unregister(drm_dev); - rockchip_drm_fbdev_fini(drm_dev); drm_kms_helper_poll_fini(drm_dev); drm_atomic_helper_shutdown(drm_dev); @@ -203,7 +198,6 @@ DEFINE_DRM_GEM_FOPS(rockchip_drm_driver_fops); static const struct drm_driver rockchip_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, - .lastclose = drm_fb_helper_lastclose, .dumb_create = rockchip_gem_dumb_create, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index aa0909e8edf9..143a48330f84 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -43,8 +43,6 @@ struct rockchip_crtc_state { * @mm_lock: protect drm_mm on multi-threads. */ struct rockchip_drm_private { - struct drm_fb_helper fbdev_helper; - struct drm_gem_object *fbdev_bo; struct iommu_domain *domain; struct mutex mm_lock; struct drm_mm mm; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c deleted file mode 100644 index d8418dd39d0e..000000000000 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c +++ /dev/null @@ -1,164 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd - * Author:Mark Yao <mark.yao@rock-chips.com> - */ - -#include <drm/drm.h> -#include <drm/drm_fb_helper.h> -#include <drm/drm_fourcc.h> -#include <drm/drm_prime.h> -#include <drm/drm_probe_helper.h> - -#include "rockchip_drm_drv.h" -#include "rockchip_drm_gem.h" -#include "rockchip_drm_fb.h" -#include "rockchip_drm_fbdev.h" - -#define PREFERRED_BPP 32 -#define to_drm_private(x) \ - container_of(x, struct rockchip_drm_private, fbdev_helper) - -static int rockchip_fbdev_mmap(struct fb_info *info, - struct vm_area_struct *vma) -{ - struct drm_fb_helper *helper = info->par; - struct rockchip_drm_private *private = to_drm_private(helper); - - return drm_gem_prime_mmap(private->fbdev_bo, vma); -} - -static const struct fb_ops rockchip_drm_fbdev_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_mmap = rockchip_fbdev_mmap, - .fb_fillrect = drm_fb_helper_cfb_fillrect, - .fb_copyarea = drm_fb_helper_cfb_copyarea, - .fb_imageblit = drm_fb_helper_cfb_imageblit, -}; - -static int rockchip_drm_fbdev_create(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct rockchip_drm_private *private = to_drm_private(helper); - struct drm_mode_fb_cmd2 mode_cmd = { 0 }; - struct drm_device *dev = helper->dev; - struct rockchip_gem_object *rk_obj; - struct drm_framebuffer *fb; - unsigned int bytes_per_pixel; - unsigned long offset; - struct fb_info *fbi; - size_t size; - int ret; - - bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8); - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - mode_cmd.pitches[0] = sizes->surface_width * bytes_per_pixel; - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - size = mode_cmd.pitches[0] * mode_cmd.height; - - rk_obj = rockchip_gem_create_object(dev, size, true); - if (IS_ERR(rk_obj)) - return -ENOMEM; - - private->fbdev_bo = &rk_obj->base; - - fbi = drm_fb_helper_alloc_fbi(helper); - if (IS_ERR(fbi)) { - DRM_DEV_ERROR(dev->dev, "Failed to create framebuffer info.\n"); - ret = PTR_ERR(fbi); - goto out; - } - - helper->fb = rockchip_drm_framebuffer_init(dev, &mode_cmd, - private->fbdev_bo); - if (IS_ERR(helper->fb)) { - DRM_DEV_ERROR(dev->dev, - "Failed to allocate DRM framebuffer.\n"); - ret = PTR_ERR(helper->fb); - goto out; - } - - fbi->fbops = &rockchip_drm_fbdev_ops; - - fb = helper->fb; - drm_fb_helper_fill_info(fbi, helper, sizes); - - offset = fbi->var.xoffset * bytes_per_pixel; - offset += fbi->var.yoffset * fb->pitches[0]; - - dev->mode_config.fb_base = 0; - fbi->screen_base = rk_obj->kvaddr + offset; - fbi->screen_size = rk_obj->base.size; - fbi->fix.smem_len = rk_obj->base.size; - - DRM_DEBUG_KMS("FB [%dx%d]-%d kvaddr=%p offset=%ld size=%zu\n", - fb->width, fb->height, fb->format->depth, - rk_obj->kvaddr, - offset, size); - - return 0; - -out: - rockchip_gem_free_object(&rk_obj->base); - return ret; -} - -static const struct drm_fb_helper_funcs rockchip_drm_fb_helper_funcs = { - .fb_probe = rockchip_drm_fbdev_create, -}; - -int rockchip_drm_fbdev_init(struct drm_device *dev) -{ - struct rockchip_drm_private *private = dev->dev_private; - struct drm_fb_helper *helper; - int ret; - - if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector) - return -EINVAL; - - helper = &private->fbdev_helper; - - drm_fb_helper_prepare(dev, helper, &rockchip_drm_fb_helper_funcs); - - ret = drm_fb_helper_init(dev, helper); - if (ret < 0) { - DRM_DEV_ERROR(dev->dev, - "Failed to initialize drm fb helper - %d.\n", - ret); - return ret; - } - - ret = drm_fb_helper_initial_config(helper, PREFERRED_BPP); - if (ret < 0) { - DRM_DEV_ERROR(dev->dev, - "Failed to set initial hw config - %d.\n", - ret); - goto err_drm_fb_helper_fini; - } - - return 0; - -err_drm_fb_helper_fini: - drm_fb_helper_fini(helper); - return ret; -} - -void rockchip_drm_fbdev_fini(struct drm_device *dev) -{ - struct rockchip_drm_private *private = dev->dev_private; - struct drm_fb_helper *helper; - - helper = &private->fbdev_helper; - - drm_fb_helper_unregister_fbi(helper); - - if (helper->fb) - drm_framebuffer_put(helper->fb); - - drm_fb_helper_fini(helper); -} diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h deleted file mode 100644 index 5fb7ac2371a8..000000000000 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd - * Author:Mark Yao <mark.yao@rock-chips.com> - */ - -#ifndef _ROCKCHIP_DRM_FBDEV_H -#define _ROCKCHIP_DRM_FBDEV_H - -#ifdef CONFIG_DRM_FBDEV_EMULATION -int rockchip_drm_fbdev_init(struct drm_device *dev); -void rockchip_drm_fbdev_fini(struct drm_device *dev); -#else -static inline int rockchip_drm_fbdev_init(struct drm_device *dev) -{ - return 0; -} - -static inline void rockchip_drm_fbdev_fini(struct drm_device *dev) -{ -} -#endif - -#endif /* _ROCKCHIP_DRM_FBDEV_H */ diff --git a/drivers/gpu/drm/shmobile/Kconfig b/drivers/gpu/drm/shmobile/Kconfig index e2a6c82c8252..288b838a904a 100644 --- a/drivers/gpu/drm/shmobile/Kconfig +++ b/drivers/gpu/drm/shmobile/Kconfig @@ -5,7 +5,6 @@ config DRM_SHMOBILE depends on ARCH_SHMOBILE || COMPILE_TEST select BACKLIGHT_CLASS_DEVICE select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER help Choose this option if you have an SH Mobile chipset. diff --git a/drivers/gpu/drm/sprd/Kconfig b/drivers/gpu/drm/sprd/Kconfig new file mode 100644 index 000000000000..3edeaeca0e65 --- /dev/null +++ b/drivers/gpu/drm/sprd/Kconfig @@ -0,0 +1,13 @@ +config DRM_SPRD + tristate "DRM Support for Unisoc SoCs Platform" + depends on ARCH_SPRD || COMPILE_TEST + depends on DRM && OF + select DRM_GEM_CMA_HELPER + select DRM_KMS_CMA_HELPER + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select VIDEOMODE_HELPERS + help + Choose this option if you have a Unisoc chipset. + If M is selected the module will be called sprd_drm. + diff --git a/drivers/gpu/drm/sprd/Makefile b/drivers/gpu/drm/sprd/Makefile new file mode 100644 index 000000000000..e82e6a6f89de --- /dev/null +++ b/drivers/gpu/drm/sprd/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +sprd-drm-y := sprd_drm.o \ + sprd_dpu.o \ + sprd_dsi.o \ + megacores_pll.o + +obj-$(CONFIG_DRM_SPRD) += sprd-drm.o
\ No newline at end of file diff --git a/drivers/gpu/drm/sprd/megacores_pll.c b/drivers/gpu/drm/sprd/megacores_pll.c new file mode 100644 index 000000000000..3091dfdc11e3 --- /dev/null +++ b/drivers/gpu/drm/sprd/megacores_pll.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#include <asm/div64.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/regmap.h> +#include <linux/string.h> + +#include "sprd_dsi.h" + +#define L 0 +#define H 1 +#define CLK 0 +#define DATA 1 +#define INFINITY 0xffffffff +#define MIN_OUTPUT_FREQ (100) + +#define AVERAGE(a, b) (min(a, b) + abs((b) - (a)) / 2) + +/* sharkle */ +#define VCO_BAND_LOW 750 +#define VCO_BAND_MID 1100 +#define VCO_BAND_HIGH 1500 +#define PHY_REF_CLK 26000 + +static int dphy_calc_pll_param(struct dphy_pll *pll) +{ + const u32 khz = 1000; + const u32 mhz = 1000000; + const unsigned long long factor = 100; + unsigned long long tmp; + int i; + + pll->potential_fvco = pll->freq / khz; + pll->ref_clk = PHY_REF_CLK / khz; + + for (i = 0; i < 4; ++i) { + if (pll->potential_fvco >= VCO_BAND_LOW && + pll->potential_fvco <= VCO_BAND_HIGH) { + pll->fvco = pll->potential_fvco; + pll->out_sel = BIT(i); + break; + } + pll->potential_fvco <<= 1; + } + if (pll->fvco == 0) + return -EINVAL; + + if (pll->fvco >= VCO_BAND_LOW && pll->fvco <= VCO_BAND_MID) { + /* vco band control */ + pll->vco_band = 0x0; + /* low pass filter control */ + pll->lpf_sel = 1; + } else if (pll->fvco > VCO_BAND_MID && pll->fvco <= VCO_BAND_HIGH) { + pll->vco_band = 0x1; + pll->lpf_sel = 0; + } else { + return -EINVAL; + } + + pll->nint = pll->fvco / pll->ref_clk; + tmp = pll->fvco * factor * mhz; + do_div(tmp, pll->ref_clk); + tmp = tmp - pll->nint * factor * mhz; + tmp *= BIT(20); + do_div(tmp, 100000000); + pll->kint = (u32)tmp; + pll->refin = 3; /* pre-divider bypass */ + pll->sdm_en = true; /* use fraction N PLL */ + pll->fdk_s = 0x1; /* fraction */ + pll->cp_s = 0x0; + pll->det_delay = 0x1; + + return 0; +} + +static void dphy_set_pll_reg(struct dphy_pll *pll, struct regmap *regmap) +{ + u8 reg_val[9] = {0}; + int i; + + u8 reg_addr[] = { + 0x03, 0x04, 0x06, 0x08, 0x09, + 0x0a, 0x0b, 0x0e, 0x0f + }; + + reg_val[0] = 1 | (1 << 1) | (pll->lpf_sel << 2); + reg_val[1] = pll->div | (1 << 3) | (pll->cp_s << 5) | (pll->fdk_s << 7); + reg_val[2] = pll->nint; + reg_val[3] = pll->vco_band | (pll->sdm_en << 1) | (pll->refin << 2); + reg_val[4] = pll->kint >> 12; + reg_val[5] = pll->kint >> 4; + reg_val[6] = pll->out_sel | ((pll->kint << 4) & 0xf); + reg_val[7] = 1 << 4; + reg_val[8] = pll->det_delay; + + for (i = 0; i < sizeof(reg_addr); ++i) { + regmap_write(regmap, reg_addr[i], reg_val[i]); + DRM_DEBUG("%02x: %02x\n", reg_addr[i], reg_val[i]); + } +} + +int dphy_pll_config(struct dsi_context *ctx) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + struct regmap *regmap = ctx->regmap; + struct dphy_pll *pll = &ctx->pll; + int ret; + + pll->freq = dsi->slave->hs_rate; + + /* FREQ = 26M * (NINT + KINT / 2^20) / out_sel */ + ret = dphy_calc_pll_param(pll); + if (ret) { + drm_err(dsi->drm, "failed to calculate dphy pll parameters\n"); + return ret; + } + dphy_set_pll_reg(pll, regmap); + + return 0; +} + +static void dphy_set_timing_reg(struct regmap *regmap, int type, u8 val[]) +{ + switch (type) { + case REQUEST_TIME: + regmap_write(regmap, 0x31, val[CLK]); + regmap_write(regmap, 0x41, val[DATA]); + regmap_write(regmap, 0x51, val[DATA]); + regmap_write(regmap, 0x61, val[DATA]); + regmap_write(regmap, 0x71, val[DATA]); + + regmap_write(regmap, 0x90, val[CLK]); + regmap_write(regmap, 0xa0, val[DATA]); + regmap_write(regmap, 0xb0, val[DATA]); + regmap_write(regmap, 0xc0, val[DATA]); + regmap_write(regmap, 0xd0, val[DATA]); + break; + case PREPARE_TIME: + regmap_write(regmap, 0x32, val[CLK]); + regmap_write(regmap, 0x42, val[DATA]); + regmap_write(regmap, 0x52, val[DATA]); + regmap_write(regmap, 0x62, val[DATA]); + regmap_write(regmap, 0x72, val[DATA]); + + regmap_write(regmap, 0x91, val[CLK]); + regmap_write(regmap, 0xa1, val[DATA]); + regmap_write(regmap, 0xb1, val[DATA]); + regmap_write(regmap, 0xc1, val[DATA]); + regmap_write(regmap, 0xd1, val[DATA]); + break; + case ZERO_TIME: + regmap_write(regmap, 0x33, val[CLK]); + regmap_write(regmap, 0x43, val[DATA]); + regmap_write(regmap, 0x53, val[DATA]); + regmap_write(regmap, 0x63, val[DATA]); + regmap_write(regmap, 0x73, val[DATA]); + + regmap_write(regmap, 0x92, val[CLK]); + regmap_write(regmap, 0xa2, val[DATA]); + regmap_write(regmap, 0xb2, val[DATA]); + regmap_write(regmap, 0xc2, val[DATA]); + regmap_write(regmap, 0xd2, val[DATA]); + break; + case TRAIL_TIME: + regmap_write(regmap, 0x34, val[CLK]); + regmap_write(regmap, 0x44, val[DATA]); + regmap_write(regmap, 0x54, val[DATA]); + regmap_write(regmap, 0x64, val[DATA]); + regmap_write(regmap, 0x74, val[DATA]); + + regmap_write(regmap, 0x93, val[CLK]); + regmap_write(regmap, 0xa3, val[DATA]); + regmap_write(regmap, 0xb3, val[DATA]); + regmap_write(regmap, 0xc3, val[DATA]); + regmap_write(regmap, 0xd3, val[DATA]); + break; + case EXIT_TIME: + regmap_write(regmap, 0x36, val[CLK]); + regmap_write(regmap, 0x46, val[DATA]); + regmap_write(regmap, 0x56, val[DATA]); + regmap_write(regmap, 0x66, val[DATA]); + regmap_write(regmap, 0x76, val[DATA]); + + regmap_write(regmap, 0x95, val[CLK]); + regmap_write(regmap, 0xA5, val[DATA]); + regmap_write(regmap, 0xB5, val[DATA]); + regmap_write(regmap, 0xc5, val[DATA]); + regmap_write(regmap, 0xd5, val[DATA]); + break; + case CLKPOST_TIME: + regmap_write(regmap, 0x35, val[CLK]); + regmap_write(regmap, 0x94, val[CLK]); + break; + + /* the following just use default value */ + case SETTLE_TIME: + fallthrough; + case TA_GET: + fallthrough; + case TA_GO: + fallthrough; + case TA_SURE: + fallthrough; + default: + break; + } +} + +void dphy_timing_config(struct dsi_context *ctx) +{ + struct regmap *regmap = ctx->regmap; + struct dphy_pll *pll = &ctx->pll; + const u32 factor = 2; + const u32 scale = 100; + u32 t_ui, t_byteck, t_half_byteck; + u32 range[2], constant; + u8 val[2]; + u32 tmp = 0; + + /* t_ui: 1 ui, byteck: 8 ui, half byteck: 4 ui */ + t_ui = 1000 * scale / (pll->freq / 1000); + t_byteck = t_ui << 3; + t_half_byteck = t_ui << 2; + constant = t_ui << 1; + + /* REQUEST_TIME: HS T-LPX: LP-01 + * For T-LPX, mipi spec defined min value is 50ns, + * but maybe it shouldn't be too small, because BTA, + * LP-10, LP-00, LP-01, all of this is related to T-LPX. + */ + range[L] = 50 * scale; + range[H] = INFINITY; + val[CLK] = DIV_ROUND_UP(range[L] * (factor << 1), t_byteck) - 2; + val[DATA] = val[CLK]; + dphy_set_timing_reg(regmap, REQUEST_TIME, val); + + /* PREPARE_TIME: HS sequence: LP-00 */ + range[L] = 38 * scale; + range[H] = 95 * scale; + tmp = AVERAGE(range[L], range[H]); + val[CLK] = DIV_ROUND_UP(AVERAGE(range[L], range[H]), t_half_byteck) - 1; + range[L] = 40 * scale + 4 * t_ui; + range[H] = 85 * scale + 6 * t_ui; + tmp |= AVERAGE(range[L], range[H]) << 16; + val[DATA] = DIV_ROUND_UP(AVERAGE(range[L], range[H]), t_half_byteck) - 1; + dphy_set_timing_reg(regmap, PREPARE_TIME, val); + + /* ZERO_TIME: HS-ZERO */ + range[L] = 300 * scale; + range[H] = INFINITY; + val[CLK] = DIV_ROUND_UP(range[L] * factor + (tmp & 0xffff) + - 525 * t_byteck / 100, t_byteck) - 2; + range[L] = 145 * scale + 10 * t_ui; + val[DATA] = DIV_ROUND_UP(range[L] * factor + + ((tmp >> 16) & 0xffff) - 525 * t_byteck / 100, + t_byteck) - 2; + dphy_set_timing_reg(regmap, ZERO_TIME, val); + + /* TRAIL_TIME: HS-TRAIL */ + range[L] = 60 * scale; + range[H] = INFINITY; + val[CLK] = DIV_ROUND_UP(range[L] * factor - constant, t_half_byteck); + range[L] = max(8 * t_ui, 60 * scale + 4 * t_ui); + val[DATA] = DIV_ROUND_UP(range[L] * 3 / 2 - constant, t_half_byteck) - 2; + dphy_set_timing_reg(regmap, TRAIL_TIME, val); + + /* EXIT_TIME: */ + range[L] = 100 * scale; + range[H] = INFINITY; + val[CLK] = DIV_ROUND_UP(range[L] * factor, t_byteck) - 2; + val[DATA] = val[CLK]; + dphy_set_timing_reg(regmap, EXIT_TIME, val); + + /* CLKPOST_TIME: */ + range[L] = 60 * scale + 52 * t_ui; + range[H] = INFINITY; + val[CLK] = DIV_ROUND_UP(range[L] * factor, t_byteck) - 2; + val[DATA] = val[CLK]; + dphy_set_timing_reg(regmap, CLKPOST_TIME, val); + + /* SETTLE_TIME: + * This time is used for receiver. So for transmitter, + * it can be ignored. + */ + + /* TA_GO: + * transmitter drives bridge state(LP-00) before releasing control, + * reg 0x1f default value: 0x04, which is good. + */ + + /* TA_SURE: + * After LP-10 state and before bridge state(LP-00), + * reg 0x20 default value: 0x01, which is good. + */ + + /* TA_GET: + * receiver drives Bridge state(LP-00) before releasing control + * reg 0x21 default value: 0x03, which is good. + */ +} diff --git a/drivers/gpu/drm/sprd/sprd_dpu.c b/drivers/gpu/drm/sprd/sprd_dpu.c new file mode 100644 index 000000000000..06a3414ee43a --- /dev/null +++ b/drivers/gpu/drm/sprd/sprd_dpu.c @@ -0,0 +1,880 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#include <linux/component.h> +#include <linux/delay.h> +#include <linux/dma-buf.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_graph.h> +#include <linux/of_irq.h> +#include <linux/wait.h> +#include <linux/workqueue.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_plane_helper.h> + +#include "sprd_drm.h" +#include "sprd_dpu.h" +#include "sprd_dsi.h" + +/* Global control registers */ +#define REG_DPU_CTRL 0x04 +#define REG_DPU_CFG0 0x08 +#define REG_PANEL_SIZE 0x20 +#define REG_BLEND_SIZE 0x24 +#define REG_BG_COLOR 0x2C + +/* Layer0 control registers */ +#define REG_LAY_BASE_ADDR0 0x30 +#define REG_LAY_BASE_ADDR1 0x34 +#define REG_LAY_BASE_ADDR2 0x38 +#define REG_LAY_CTRL 0x40 +#define REG_LAY_SIZE 0x44 +#define REG_LAY_PITCH 0x48 +#define REG_LAY_POS 0x4C +#define REG_LAY_ALPHA 0x50 +#define REG_LAY_CROP_START 0x5C + +/* Interrupt control registers */ +#define REG_DPU_INT_EN 0x1E0 +#define REG_DPU_INT_CLR 0x1E4 +#define REG_DPU_INT_STS 0x1E8 + +/* DPI control registers */ +#define REG_DPI_CTRL 0x1F0 +#define REG_DPI_H_TIMING 0x1F4 +#define REG_DPI_V_TIMING 0x1F8 + +/* MMU control registers */ +#define REG_MMU_EN 0x800 +#define REG_MMU_VPN_RANGE 0x80C +#define REG_MMU_PPN1 0x83C +#define REG_MMU_RANGE1 0x840 +#define REG_MMU_PPN2 0x844 +#define REG_MMU_RANGE2 0x848 + +/* Global control bits */ +#define BIT_DPU_RUN BIT(0) +#define BIT_DPU_STOP BIT(1) +#define BIT_DPU_REG_UPDATE BIT(2) +#define BIT_DPU_IF_EDPI BIT(0) + +/* Layer control bits */ +#define BIT_DPU_LAY_EN BIT(0) +#define BIT_DPU_LAY_LAYER_ALPHA (0x01 << 2) +#define BIT_DPU_LAY_COMBO_ALPHA (0x02 << 2) +#define BIT_DPU_LAY_FORMAT_YUV422_2PLANE (0x00 << 4) +#define BIT_DPU_LAY_FORMAT_YUV420_2PLANE (0x01 << 4) +#define BIT_DPU_LAY_FORMAT_YUV420_3PLANE (0x02 << 4) +#define BIT_DPU_LAY_FORMAT_ARGB8888 (0x03 << 4) +#define BIT_DPU_LAY_FORMAT_RGB565 (0x04 << 4) +#define BIT_DPU_LAY_DATA_ENDIAN_B0B1B2B3 (0x00 << 8) +#define BIT_DPU_LAY_DATA_ENDIAN_B3B2B1B0 (0x01 << 8) +#define BIT_DPU_LAY_NO_SWITCH (0x00 << 10) +#define BIT_DPU_LAY_RB_OR_UV_SWITCH (0x01 << 10) +#define BIT_DPU_LAY_MODE_BLEND_NORMAL (0x00 << 16) +#define BIT_DPU_LAY_MODE_BLEND_PREMULT (0x01 << 16) +#define BIT_DPU_LAY_ROTATION_0 (0x00 << 20) +#define BIT_DPU_LAY_ROTATION_90 (0x01 << 20) +#define BIT_DPU_LAY_ROTATION_180 (0x02 << 20) +#define BIT_DPU_LAY_ROTATION_270 (0x03 << 20) +#define BIT_DPU_LAY_ROTATION_0_M (0x04 << 20) +#define BIT_DPU_LAY_ROTATION_90_M (0x05 << 20) +#define BIT_DPU_LAY_ROTATION_180_M (0x06 << 20) +#define BIT_DPU_LAY_ROTATION_270_M (0x07 << 20) + +/* Interrupt control & status bits */ +#define BIT_DPU_INT_DONE BIT(0) +#define BIT_DPU_INT_TE BIT(1) +#define BIT_DPU_INT_ERR BIT(2) +#define BIT_DPU_INT_UPDATE_DONE BIT(4) +#define BIT_DPU_INT_VSYNC BIT(5) + +/* DPI control bits */ +#define BIT_DPU_EDPI_TE_EN BIT(8) +#define BIT_DPU_EDPI_FROM_EXTERNAL_PAD BIT(10) +#define BIT_DPU_DPI_HALT_EN BIT(16) + +static const u32 layer_fmts[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, + DRM_FORMAT_NV12, + DRM_FORMAT_NV21, + DRM_FORMAT_NV16, + DRM_FORMAT_NV61, + DRM_FORMAT_YUV420, + DRM_FORMAT_YVU420, +}; + +struct sprd_plane { + struct drm_plane base; +}; + +static int dpu_wait_stop_done(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + int rc; + + if (ctx->stopped) + return 0; + + rc = wait_event_interruptible_timeout(ctx->wait_queue, ctx->evt_stop, + msecs_to_jiffies(500)); + ctx->evt_stop = false; + + ctx->stopped = true; + + if (!rc) { + drm_err(dpu->drm, "dpu wait for stop done time out!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int dpu_wait_update_done(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + int rc; + + ctx->evt_update = false; + + rc = wait_event_interruptible_timeout(ctx->wait_queue, ctx->evt_update, + msecs_to_jiffies(500)); + + if (!rc) { + drm_err(dpu->drm, "dpu wait for reg update done time out!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static u32 drm_format_to_dpu(struct drm_framebuffer *fb) +{ + u32 format = 0; + + switch (fb->format->format) { + case DRM_FORMAT_BGRA8888: + /* BGRA8888 -> ARGB8888 */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B3B2B1B0; + format |= BIT_DPU_LAY_FORMAT_ARGB8888; + break; + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + /* RGBA8888 -> ABGR8888 */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B3B2B1B0; + fallthrough; + case DRM_FORMAT_ABGR8888: + /* RB switch */ + format |= BIT_DPU_LAY_RB_OR_UV_SWITCH; + fallthrough; + case DRM_FORMAT_ARGB8888: + format |= BIT_DPU_LAY_FORMAT_ARGB8888; + break; + case DRM_FORMAT_XBGR8888: + /* RB switch */ + format |= BIT_DPU_LAY_RB_OR_UV_SWITCH; + fallthrough; + case DRM_FORMAT_XRGB8888: + format |= BIT_DPU_LAY_FORMAT_ARGB8888; + break; + case DRM_FORMAT_BGR565: + /* RB switch */ + format |= BIT_DPU_LAY_RB_OR_UV_SWITCH; + fallthrough; + case DRM_FORMAT_RGB565: + format |= BIT_DPU_LAY_FORMAT_RGB565; + break; + case DRM_FORMAT_NV12: + /* 2-Lane: Yuv420 */ + format |= BIT_DPU_LAY_FORMAT_YUV420_2PLANE; + /* Y endian */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B0B1B2B3; + /* UV endian */ + format |= BIT_DPU_LAY_NO_SWITCH; + break; + case DRM_FORMAT_NV21: + /* 2-Lane: Yuv420 */ + format |= BIT_DPU_LAY_FORMAT_YUV420_2PLANE; + /* Y endian */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B0B1B2B3; + /* UV endian */ + format |= BIT_DPU_LAY_RB_OR_UV_SWITCH; + break; + case DRM_FORMAT_NV16: + /* 2-Lane: Yuv422 */ + format |= BIT_DPU_LAY_FORMAT_YUV422_2PLANE; + /* Y endian */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B3B2B1B0; + /* UV endian */ + format |= BIT_DPU_LAY_RB_OR_UV_SWITCH; + break; + case DRM_FORMAT_NV61: + /* 2-Lane: Yuv422 */ + format |= BIT_DPU_LAY_FORMAT_YUV422_2PLANE; + /* Y endian */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B0B1B2B3; + /* UV endian */ + format |= BIT_DPU_LAY_NO_SWITCH; + break; + case DRM_FORMAT_YUV420: + format |= BIT_DPU_LAY_FORMAT_YUV420_3PLANE; + /* Y endian */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B0B1B2B3; + /* UV endian */ + format |= BIT_DPU_LAY_NO_SWITCH; + break; + case DRM_FORMAT_YVU420: + format |= BIT_DPU_LAY_FORMAT_YUV420_3PLANE; + /* Y endian */ + format |= BIT_DPU_LAY_DATA_ENDIAN_B0B1B2B3; + /* UV endian */ + format |= BIT_DPU_LAY_RB_OR_UV_SWITCH; + break; + default: + break; + } + + return format; +} + +static u32 drm_rotation_to_dpu(struct drm_plane_state *state) +{ + u32 rotation = 0; + + switch (state->rotation) { + default: + case DRM_MODE_ROTATE_0: + rotation = BIT_DPU_LAY_ROTATION_0; + break; + case DRM_MODE_ROTATE_90: + rotation = BIT_DPU_LAY_ROTATION_90; + break; + case DRM_MODE_ROTATE_180: + rotation = BIT_DPU_LAY_ROTATION_180; + break; + case DRM_MODE_ROTATE_270: + rotation = BIT_DPU_LAY_ROTATION_270; + break; + case DRM_MODE_REFLECT_Y: + rotation = BIT_DPU_LAY_ROTATION_180_M; + break; + case (DRM_MODE_REFLECT_Y | DRM_MODE_ROTATE_90): + rotation = BIT_DPU_LAY_ROTATION_90_M; + break; + case DRM_MODE_REFLECT_X: + rotation = BIT_DPU_LAY_ROTATION_0_M; + break; + case (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90): + rotation = BIT_DPU_LAY_ROTATION_270_M; + break; + } + + return rotation; +} + +static u32 drm_blend_to_dpu(struct drm_plane_state *state) +{ + u32 blend = 0; + + switch (state->pixel_blend_mode) { + case DRM_MODE_BLEND_COVERAGE: + /* alpha mode select - combo alpha */ + blend |= BIT_DPU_LAY_COMBO_ALPHA; + /* Normal mode */ + blend |= BIT_DPU_LAY_MODE_BLEND_NORMAL; + break; + case DRM_MODE_BLEND_PREMULTI: + /* alpha mode select - combo alpha */ + blend |= BIT_DPU_LAY_COMBO_ALPHA; + /* Pre-mult mode */ + blend |= BIT_DPU_LAY_MODE_BLEND_PREMULT; + break; + case DRM_MODE_BLEND_PIXEL_NONE: + default: + /* don't do blending, maybe RGBX */ + /* alpha mode select - layer alpha */ + blend |= BIT_DPU_LAY_LAYER_ALPHA; + break; + } + + return blend; +} + +static void sprd_dpu_layer(struct sprd_dpu *dpu, struct drm_plane_state *state) +{ + struct dpu_context *ctx = &dpu->ctx; + struct drm_gem_cma_object *cma_obj; + struct drm_framebuffer *fb = state->fb; + u32 addr, size, offset, pitch, blend, format, rotation; + u32 src_x = state->src_x >> 16; + u32 src_y = state->src_y >> 16; + u32 src_w = state->src_w >> 16; + u32 src_h = state->src_h >> 16; + u32 dst_x = state->crtc_x; + u32 dst_y = state->crtc_y; + u32 alpha = state->alpha; + u32 index = state->zpos; + int i; + + offset = (dst_x & 0xffff) | (dst_y << 16); + size = (src_w & 0xffff) | (src_h << 16); + + for (i = 0; i < fb->format->num_planes; i++) { + cma_obj = drm_fb_cma_get_gem_obj(fb, i); + addr = cma_obj->paddr + fb->offsets[i]; + + if (i == 0) + layer_reg_wr(ctx, REG_LAY_BASE_ADDR0, addr, index); + else if (i == 1) + layer_reg_wr(ctx, REG_LAY_BASE_ADDR1, addr, index); + else + layer_reg_wr(ctx, REG_LAY_BASE_ADDR2, addr, index); + } + + if (fb->format->num_planes == 3) { + /* UV pitch is 1/2 of Y pitch */ + pitch = (fb->pitches[0] / fb->format->cpp[0]) | + (fb->pitches[0] / fb->format->cpp[0] << 15); + } else { + pitch = fb->pitches[0] / fb->format->cpp[0]; + } + + layer_reg_wr(ctx, REG_LAY_POS, offset, index); + layer_reg_wr(ctx, REG_LAY_SIZE, size, index); + layer_reg_wr(ctx, REG_LAY_CROP_START, + src_y << 16 | src_x, index); + layer_reg_wr(ctx, REG_LAY_ALPHA, alpha, index); + layer_reg_wr(ctx, REG_LAY_PITCH, pitch, index); + + format = drm_format_to_dpu(fb); + blend = drm_blend_to_dpu(state); + rotation = drm_rotation_to_dpu(state); + + layer_reg_wr(ctx, REG_LAY_CTRL, BIT_DPU_LAY_EN | + format | + blend | + rotation, + index); +} + +static void sprd_dpu_flip(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + + /* + * Make sure the dpu is in stop status. DPU has no shadow + * registers in EDPI mode. So the config registers can only be + * updated in the rising edge of DPU_RUN bit. + */ + if (ctx->if_type == SPRD_DPU_IF_EDPI) + dpu_wait_stop_done(dpu); + + /* update trigger and wait */ + if (ctx->if_type == SPRD_DPU_IF_DPI) { + if (!ctx->stopped) { + dpu_reg_set(ctx, REG_DPU_CTRL, BIT_DPU_REG_UPDATE); + dpu_wait_update_done(dpu); + } + + dpu_reg_set(ctx, REG_DPU_INT_EN, BIT_DPU_INT_ERR); + } else if (ctx->if_type == SPRD_DPU_IF_EDPI) { + dpu_reg_set(ctx, REG_DPU_CTRL, BIT_DPU_RUN); + + ctx->stopped = false; + } +} + +static void sprd_dpu_init(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + u32 int_mask = 0; + + writel(0x00, ctx->base + REG_BG_COLOR); + writel(0x00, ctx->base + REG_MMU_EN); + writel(0x00, ctx->base + REG_MMU_PPN1); + writel(0xffff, ctx->base + REG_MMU_RANGE1); + writel(0x00, ctx->base + REG_MMU_PPN2); + writel(0xffff, ctx->base + REG_MMU_RANGE2); + writel(0x1ffff, ctx->base + REG_MMU_VPN_RANGE); + + if (ctx->if_type == SPRD_DPU_IF_DPI) { + /* use dpi as interface */ + dpu_reg_clr(ctx, REG_DPU_CFG0, BIT_DPU_IF_EDPI); + /* disable Halt function for SPRD DSI */ + dpu_reg_clr(ctx, REG_DPI_CTRL, BIT_DPU_DPI_HALT_EN); + /* select te from external pad */ + dpu_reg_set(ctx, REG_DPI_CTRL, BIT_DPU_EDPI_FROM_EXTERNAL_PAD); + + /* enable dpu update done INT */ + int_mask |= BIT_DPU_INT_UPDATE_DONE; + /* enable dpu done INT */ + int_mask |= BIT_DPU_INT_DONE; + /* enable dpu dpi vsync */ + int_mask |= BIT_DPU_INT_VSYNC; + /* enable dpu TE INT */ + int_mask |= BIT_DPU_INT_TE; + /* enable underflow err INT */ + int_mask |= BIT_DPU_INT_ERR; + } else if (ctx->if_type == SPRD_DPU_IF_EDPI) { + /* use edpi as interface */ + dpu_reg_set(ctx, REG_DPU_CFG0, BIT_DPU_IF_EDPI); + /* use external te */ + dpu_reg_set(ctx, REG_DPI_CTRL, BIT_DPU_EDPI_FROM_EXTERNAL_PAD); + /* enable te */ + dpu_reg_set(ctx, REG_DPI_CTRL, BIT_DPU_EDPI_TE_EN); + + /* enable stop done INT */ + int_mask |= BIT_DPU_INT_DONE; + /* enable TE INT */ + int_mask |= BIT_DPU_INT_TE; + } + + writel(int_mask, ctx->base + REG_DPU_INT_EN); +} + +static void sprd_dpu_fini(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + + writel(0x00, ctx->base + REG_DPU_INT_EN); + writel(0xff, ctx->base + REG_DPU_INT_CLR); +} + +static void sprd_dpi_init(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + u32 reg_val; + u32 size; + + size = (ctx->vm.vactive << 16) | ctx->vm.hactive; + writel(size, ctx->base + REG_PANEL_SIZE); + writel(size, ctx->base + REG_BLEND_SIZE); + + if (ctx->if_type == SPRD_DPU_IF_DPI) { + /* set dpi timing */ + reg_val = ctx->vm.hsync_len << 0 | + ctx->vm.hback_porch << 8 | + ctx->vm.hfront_porch << 20; + writel(reg_val, ctx->base + REG_DPI_H_TIMING); + + reg_val = ctx->vm.vsync_len << 0 | + ctx->vm.vback_porch << 8 | + ctx->vm.vfront_porch << 20; + writel(reg_val, ctx->base + REG_DPI_V_TIMING); + } +} + +void sprd_dpu_run(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + + dpu_reg_set(ctx, REG_DPU_CTRL, BIT_DPU_RUN); + + ctx->stopped = false; +} + +void sprd_dpu_stop(struct sprd_dpu *dpu) +{ + struct dpu_context *ctx = &dpu->ctx; + + if (ctx->if_type == SPRD_DPU_IF_DPI) + dpu_reg_set(ctx, REG_DPU_CTRL, BIT_DPU_STOP); + + dpu_wait_stop_done(dpu); +} + +static int sprd_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_crtc_state *crtc_state; + u32 fmt; + + if (!plane_state->fb || !plane_state->crtc) + return 0; + + fmt = drm_format_to_dpu(plane_state->fb); + if (!fmt) + return -EINVAL; + + crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + return drm_atomic_helper_check_plane_state(plane_state, crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); +} + +static void sprd_plane_atomic_update(struct drm_plane *drm_plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + drm_plane); + struct sprd_dpu *dpu = to_sprd_crtc(new_state->crtc); + + /* start configure dpu layers */ + sprd_dpu_layer(dpu, new_state); +} + +static void sprd_plane_atomic_disable(struct drm_plane *drm_plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + drm_plane); + struct sprd_dpu *dpu = to_sprd_crtc(old_state->crtc); + + layer_reg_wr(&dpu->ctx, REG_LAY_CTRL, 0x00, old_state->zpos); +} + +static void sprd_plane_create_properties(struct sprd_plane *plane, int index) +{ + unsigned int supported_modes = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); + + /* create rotation property */ + drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_MASK | + DRM_MODE_REFLECT_MASK); + + /* create alpha property */ + drm_plane_create_alpha_property(&plane->base); + + /* create blend mode property */ + drm_plane_create_blend_mode_property(&plane->base, supported_modes); + + /* create zpos property */ + drm_plane_create_zpos_immutable_property(&plane->base, index); +} + +static const struct drm_plane_helper_funcs sprd_plane_helper_funcs = { + .atomic_check = sprd_plane_atomic_check, + .atomic_update = sprd_plane_atomic_update, + .atomic_disable = sprd_plane_atomic_disable, +}; + +static const struct drm_plane_funcs sprd_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + .reset = drm_atomic_helper_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +}; + +static struct sprd_plane *sprd_planes_init(struct drm_device *drm) +{ + struct sprd_plane *plane, *primary; + enum drm_plane_type plane_type; + int i; + + for (i = 0; i < 6; i++) { + plane_type = (i == 0) ? DRM_PLANE_TYPE_PRIMARY : + DRM_PLANE_TYPE_OVERLAY; + + plane = drmm_universal_plane_alloc(drm, struct sprd_plane, base, + 1, &sprd_plane_funcs, + layer_fmts, ARRAY_SIZE(layer_fmts), + NULL, plane_type, NULL); + if (IS_ERR(plane)) { + drm_err(drm, "failed to init drm plane: %d\n", i); + return plane; + } + + drm_plane_helper_add(&plane->base, &sprd_plane_helper_funcs); + + sprd_plane_create_properties(plane, i); + + if (i == 0) + primary = plane; + } + + return primary; +} + +static void sprd_crtc_mode_set_nofb(struct drm_crtc *crtc) +{ + struct sprd_dpu *dpu = to_sprd_crtc(crtc); + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct drm_encoder *encoder; + struct sprd_dsi *dsi; + + drm_display_mode_to_videomode(mode, &dpu->ctx.vm); + + drm_for_each_encoder_mask(encoder, crtc->dev, + crtc->state->encoder_mask) { + dsi = encoder_to_dsi(encoder); + + if (dsi->slave->mode_flags & MIPI_DSI_MODE_VIDEO) + dpu->ctx.if_type = SPRD_DPU_IF_DPI; + else + dpu->ctx.if_type = SPRD_DPU_IF_EDPI; + } + + sprd_dpi_init(dpu); +} + +static void sprd_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct sprd_dpu *dpu = to_sprd_crtc(crtc); + + sprd_dpu_init(dpu); + + drm_crtc_vblank_on(&dpu->base); +} + +static void sprd_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct sprd_dpu *dpu = to_sprd_crtc(crtc); + struct drm_device *drm = dpu->base.dev; + + drm_crtc_vblank_off(&dpu->base); + + sprd_dpu_fini(dpu); + + spin_lock_irq(&drm->event_lock); + if (crtc->state->event) { + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + } + spin_unlock_irq(&drm->event_lock); +} + +static void sprd_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) + +{ + struct sprd_dpu *dpu = to_sprd_crtc(crtc); + struct drm_device *drm = dpu->base.dev; + + sprd_dpu_flip(dpu); + + spin_lock_irq(&drm->event_lock); + if (crtc->state->event) { + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + } + spin_unlock_irq(&drm->event_lock); +} + +static int sprd_crtc_enable_vblank(struct drm_crtc *crtc) +{ + struct sprd_dpu *dpu = to_sprd_crtc(crtc); + + dpu_reg_set(&dpu->ctx, REG_DPU_INT_EN, BIT_DPU_INT_VSYNC); + + return 0; +} + +static void sprd_crtc_disable_vblank(struct drm_crtc *crtc) +{ + struct sprd_dpu *dpu = to_sprd_crtc(crtc); + + dpu_reg_clr(&dpu->ctx, REG_DPU_INT_EN, BIT_DPU_INT_VSYNC); +} + +static const struct drm_crtc_helper_funcs sprd_crtc_helper_funcs = { + .mode_set_nofb = sprd_crtc_mode_set_nofb, + .atomic_flush = sprd_crtc_atomic_flush, + .atomic_enable = sprd_crtc_atomic_enable, + .atomic_disable = sprd_crtc_atomic_disable, +}; + +static const struct drm_crtc_funcs sprd_crtc_funcs = { + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = sprd_crtc_enable_vblank, + .disable_vblank = sprd_crtc_disable_vblank, +}; + +static struct sprd_dpu *sprd_crtc_init(struct drm_device *drm, + struct drm_plane *primary, struct device *dev) +{ + struct device_node *port; + struct sprd_dpu *dpu; + + dpu = drmm_crtc_alloc_with_planes(drm, struct sprd_dpu, base, + primary, NULL, + &sprd_crtc_funcs, NULL); + if (IS_ERR(dpu)) { + drm_err(drm, "failed to init crtc\n"); + return dpu; + } + drm_crtc_helper_add(&dpu->base, &sprd_crtc_helper_funcs); + + /* + * set crtc port so that drm_of_find_possible_crtcs call works + */ + port = of_graph_get_port_by_id(dev->of_node, 0); + if (!port) { + drm_err(drm, "failed to found crtc output port for %s\n", + dev->of_node->full_name); + return ERR_PTR(-EINVAL); + } + dpu->base.port = port; + of_node_put(port); + + return dpu; +} + +static irqreturn_t sprd_dpu_isr(int irq, void *data) +{ + struct sprd_dpu *dpu = data; + struct dpu_context *ctx = &dpu->ctx; + u32 reg_val, int_mask = 0; + + reg_val = readl(ctx->base + REG_DPU_INT_STS); + + /* disable err interrupt */ + if (reg_val & BIT_DPU_INT_ERR) { + int_mask |= BIT_DPU_INT_ERR; + drm_warn(dpu->drm, "Warning: dpu underflow!\n"); + } + + /* dpu update done isr */ + if (reg_val & BIT_DPU_INT_UPDATE_DONE) { + ctx->evt_update = true; + wake_up_interruptible_all(&ctx->wait_queue); + } + + /* dpu stop done isr */ + if (reg_val & BIT_DPU_INT_DONE) { + ctx->evt_stop = true; + wake_up_interruptible_all(&ctx->wait_queue); + } + + if (reg_val & BIT_DPU_INT_VSYNC) + drm_crtc_handle_vblank(&dpu->base); + + writel(reg_val, ctx->base + REG_DPU_INT_CLR); + dpu_reg_clr(ctx, REG_DPU_INT_EN, int_mask); + + return IRQ_HANDLED; +} + +static int sprd_dpu_context_init(struct sprd_dpu *dpu, + struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dpu_context *ctx = &dpu->ctx; + struct resource *res; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ctx->base = devm_ioremap(dev, res->start, resource_size(res)); + if (!ctx->base) { + dev_err(dev, "failed to map dpu registers\n"); + return -EFAULT; + } + + ctx->irq = platform_get_irq(pdev, 0); + if (ctx->irq < 0) { + dev_err(dev, "failed to get dpu irq\n"); + return ctx->irq; + } + + /* disable and clear interrupts before register dpu IRQ. */ + writel(0x00, ctx->base + REG_DPU_INT_EN); + writel(0xff, ctx->base + REG_DPU_INT_CLR); + + ret = devm_request_irq(dev, ctx->irq, sprd_dpu_isr, + IRQF_TRIGGER_NONE, "DPU", dpu); + if (ret) { + dev_err(dev, "failed to register dpu irq handler\n"); + return ret; + } + + init_waitqueue_head(&ctx->wait_queue); + + return 0; +} + +static int sprd_dpu_bind(struct device *dev, struct device *master, void *data) +{ + struct drm_device *drm = data; + struct sprd_dpu *dpu; + struct sprd_plane *plane; + int ret; + + plane = sprd_planes_init(drm); + if (IS_ERR(plane)) + return PTR_ERR(plane); + + dpu = sprd_crtc_init(drm, &plane->base, dev); + if (IS_ERR(dpu)) + return PTR_ERR(dpu); + + dpu->drm = drm; + dev_set_drvdata(dev, dpu); + + ret = sprd_dpu_context_init(dpu, dev); + if (ret) + return ret; + + return 0; +} + +static const struct component_ops dpu_component_ops = { + .bind = sprd_dpu_bind, +}; + +static const struct of_device_id dpu_match_table[] = { + { .compatible = "sprd,sharkl3-dpu" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, dpu_match_table); + +static int sprd_dpu_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &dpu_component_ops); +} + +static int sprd_dpu_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &dpu_component_ops); + + return 0; +} + +struct platform_driver sprd_dpu_driver = { + .probe = sprd_dpu_probe, + .remove = sprd_dpu_remove, + .driver = { + .name = "sprd-dpu-drv", + .of_match_table = dpu_match_table, + }, +}; + +MODULE_AUTHOR("Leon He <leon.he@unisoc.com>"); +MODULE_AUTHOR("Kevin Tang <kevin.tang@unisoc.com>"); +MODULE_DESCRIPTION("Unisoc Display Controller Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/sprd/sprd_dpu.h b/drivers/gpu/drm/sprd/sprd_dpu.h new file mode 100644 index 000000000000..157a78f24dc1 --- /dev/null +++ b/drivers/gpu/drm/sprd/sprd_dpu.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#ifndef __SPRD_DPU_H__ +#define __SPRD_DPU_H__ + +#include <linux/bug.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/string.h> +#include <video/videomode.h> + +#include <drm/drm_crtc.h> +#include <drm/drm_fourcc.h> +#include <drm/drm_print.h> +#include <drm/drm_vblank.h> +#include <uapi/drm/drm_mode.h> + +/* DPU Layer registers offset */ +#define DPU_LAY_REG_OFFSET 0x30 + +enum { + SPRD_DPU_IF_DPI, + SPRD_DPU_IF_EDPI, + SPRD_DPU_IF_LIMIT +}; + +/** + * Sprd DPU context structure + * + * @base: DPU controller base address + * @irq: IRQ number to install the handler for + * @if_type: The type of DPI interface, default is DPI mode. + * @vm: videomode structure to use for DPU and DPI initialization + * @stopped: indicates whether DPU are stopped + * @wait_queue: wait queue, used to wait for DPU shadow register update done and + * DPU stop register done interrupt signal. + * @evt_update: wait queue condition for DPU shadow register + * @evt_stop: wait queue condition for DPU stop register + */ +struct dpu_context { + void __iomem *base; + int irq; + u8 if_type; + struct videomode vm; + bool stopped; + wait_queue_head_t wait_queue; + bool evt_update; + bool evt_stop; +}; + +/** + * Sprd DPU device structure + * + * @crtc: crtc object + * @drm: A point to drm device + * @ctx: DPU's implementation specific context object + */ +struct sprd_dpu { + struct drm_crtc base; + struct drm_device *drm; + struct dpu_context ctx; +}; + +static inline struct sprd_dpu *to_sprd_crtc(struct drm_crtc *crtc) +{ + return container_of(crtc, struct sprd_dpu, base); +} + +static inline void +dpu_reg_set(struct dpu_context *ctx, u32 offset, u32 set_bits) +{ + u32 bits = readl_relaxed(ctx->base + offset); + + writel(bits | set_bits, ctx->base + offset); +} + +static inline void +dpu_reg_clr(struct dpu_context *ctx, u32 offset, u32 clr_bits) +{ + u32 bits = readl_relaxed(ctx->base + offset); + + writel(bits & ~clr_bits, ctx->base + offset); +} + +static inline u32 +layer_reg_rd(struct dpu_context *ctx, u32 offset, int index) +{ + u32 layer_offset = offset + index * DPU_LAY_REG_OFFSET; + + return readl(ctx->base + layer_offset); +} + +static inline void +layer_reg_wr(struct dpu_context *ctx, u32 offset, u32 cfg_bits, int index) +{ + u32 layer_offset = offset + index * DPU_LAY_REG_OFFSET; + + writel(cfg_bits, ctx->base + layer_offset); +} + +void sprd_dpu_run(struct sprd_dpu *dpu); +void sprd_dpu_stop(struct sprd_dpu *dpu); + +#endif diff --git a/drivers/gpu/drm/sprd/sprd_drm.c b/drivers/gpu/drm/sprd/sprd_drm.c new file mode 100644 index 000000000000..a077e2d4d721 --- /dev/null +++ b/drivers/gpu/drm/sprd/sprd_drm.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#include <linux/component.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of_graph.h> +#include <linux/of_platform.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_of.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_vblank.h> + +#include "sprd_drm.h" + +#define DRIVER_NAME "sprd" +#define DRIVER_DESC "Spreadtrum SoCs' DRM Driver" +#define DRIVER_DATE "20200201" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 + +static const struct drm_mode_config_helper_funcs sprd_drm_mode_config_helper = { + .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, +}; + +static const struct drm_mode_config_funcs sprd_drm_mode_config_funcs = { + .fb_create = drm_gem_fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static void sprd_drm_mode_config_init(struct drm_device *drm) +{ + drm->mode_config.min_width = 0; + drm->mode_config.min_height = 0; + drm->mode_config.max_width = 8192; + drm->mode_config.max_height = 8192; + drm->mode_config.allow_fb_modifiers = true; + + drm->mode_config.funcs = &sprd_drm_mode_config_funcs; + drm->mode_config.helper_private = &sprd_drm_mode_config_helper; +} + +DEFINE_DRM_GEM_CMA_FOPS(sprd_drm_fops); + +static struct drm_driver sprd_drm_drv = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .fops = &sprd_drm_fops, + + /* GEM Operations */ + DRM_GEM_CMA_DRIVER_OPS, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, +}; + +static int sprd_drm_bind(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct drm_device *drm; + struct sprd_drm *sprd; + int ret; + + sprd = devm_drm_dev_alloc(dev, &sprd_drm_drv, struct sprd_drm, drm); + if (IS_ERR(sprd)) + return PTR_ERR(sprd); + + drm = &sprd->drm; + platform_set_drvdata(pdev, drm); + + ret = drmm_mode_config_init(drm); + if (ret) + return ret; + + sprd_drm_mode_config_init(drm); + + /* bind and init sub drivers */ + ret = component_bind_all(drm->dev, drm); + if (ret) { + drm_err(drm, "failed to bind all component.\n"); + return ret; + } + + /* vblank init */ + ret = drm_vblank_init(drm, drm->mode_config.num_crtc); + if (ret) { + drm_err(drm, "failed to initialize vblank.\n"); + goto err_unbind_all; + } + + /* reset all the states of crtc/plane/encoder/connector */ + drm_mode_config_reset(drm); + + /* init kms poll for handling hpd */ + drm_kms_helper_poll_init(drm); + + ret = drm_dev_register(drm, 0); + if (ret < 0) + goto err_kms_helper_poll_fini; + + return 0; + +err_kms_helper_poll_fini: + drm_kms_helper_poll_fini(drm); +err_unbind_all: + component_unbind_all(drm->dev, drm); + return ret; +} + +static void sprd_drm_unbind(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + + drm_dev_unregister(drm); + + drm_kms_helper_poll_fini(drm); + + component_unbind_all(drm->dev, drm); +} + +static const struct component_master_ops drm_component_ops = { + .bind = sprd_drm_bind, + .unbind = sprd_drm_unbind, +}; + +static int compare_of(struct device *dev, void *data) +{ + return dev->of_node == data; +} + +static int sprd_drm_probe(struct platform_device *pdev) +{ + return drm_of_component_probe(&pdev->dev, compare_of, &drm_component_ops); +} + +static int sprd_drm_remove(struct platform_device *pdev) +{ + component_master_del(&pdev->dev, &drm_component_ops); + return 0; +} + +static void sprd_drm_shutdown(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + + if (!drm) { + drm_warn(drm, "drm device is not available, no shutdown\n"); + return; + } + + drm_atomic_helper_shutdown(drm); +} + +static const struct of_device_id drm_match_table[] = { + { .compatible = "sprd,display-subsystem", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, drm_match_table); + +static struct platform_driver sprd_drm_driver = { + .probe = sprd_drm_probe, + .remove = sprd_drm_remove, + .shutdown = sprd_drm_shutdown, + .driver = { + .name = "sprd-drm-drv", + .of_match_table = drm_match_table, + }, +}; + +static struct platform_driver *sprd_drm_drivers[] = { + &sprd_drm_driver, + &sprd_dpu_driver, + &sprd_dsi_driver, +}; + +static int __init sprd_drm_init(void) +{ + return platform_register_drivers(sprd_drm_drivers, + ARRAY_SIZE(sprd_drm_drivers)); +} + +static void __exit sprd_drm_exit(void) +{ + platform_unregister_drivers(sprd_drm_drivers, + ARRAY_SIZE(sprd_drm_drivers)); +} + +module_init(sprd_drm_init); +module_exit(sprd_drm_exit); + +MODULE_AUTHOR("Leon He <leon.he@unisoc.com>"); +MODULE_AUTHOR("Kevin Tang <kevin.tang@unisoc.com>"); +MODULE_DESCRIPTION("Unisoc DRM KMS Master Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/sprd/sprd_drm.h b/drivers/gpu/drm/sprd/sprd_drm.h new file mode 100644 index 000000000000..95d1b972f333 --- /dev/null +++ b/drivers/gpu/drm/sprd/sprd_drm.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#ifndef _SPRD_DRM_H_ +#define _SPRD_DRM_H_ + +#include <drm/drm_atomic.h> +#include <drm/drm_print.h> + +struct sprd_drm { + struct drm_device drm; +}; + +extern struct platform_driver sprd_dpu_driver; +extern struct platform_driver sprd_dsi_driver; + +#endif /* _SPRD_DRM_H_ */ diff --git a/drivers/gpu/drm/sprd/sprd_dsi.c b/drivers/gpu/drm/sprd/sprd_dsi.c new file mode 100644 index 000000000000..911b3cddc264 --- /dev/null +++ b/drivers/gpu/drm/sprd/sprd_dsi.c @@ -0,0 +1,1073 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#include <linux/component.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/of_graph.h> +#include <video/mipi_display.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_of.h> +#include <drm/drm_probe_helper.h> + +#include "sprd_drm.h" +#include "sprd_dpu.h" +#include "sprd_dsi.h" + +#define SOFT_RESET 0x04 +#define MASK_PROTOCOL_INT 0x0C +#define MASK_INTERNAL_INT 0x14 +#define DSI_MODE_CFG 0x18 + +#define VIRTUAL_CHANNEL_ID 0x1C +#define GEN_RX_VCID GENMASK(1, 0) +#define VIDEO_PKT_VCID GENMASK(3, 2) + +#define DPI_VIDEO_FORMAT 0x20 +#define DPI_VIDEO_MODE_FORMAT GENMASK(5, 0) +#define LOOSELY18_EN BIT(6) + +#define VIDEO_PKT_CONFIG 0x24 +#define VIDEO_PKT_SIZE GENMASK(15, 0) +#define VIDEO_LINE_CHUNK_NUM GENMASK(31, 16) + +#define VIDEO_LINE_HBLK_TIME 0x28 +#define VIDEO_LINE_HBP_TIME GENMASK(15, 0) +#define VIDEO_LINE_HSA_TIME GENMASK(31, 16) + +#define VIDEO_LINE_TIME 0x2C + +#define VIDEO_VBLK_LINES 0x30 +#define VFP_LINES GENMASK(9, 0) +#define VBP_LINES GENMASK(19, 10) +#define VSA_LINES GENMASK(29, 20) + +#define VIDEO_VACTIVE_LINES 0x34 + +#define VID_MODE_CFG 0x38 +#define VID_MODE_TYPE GENMASK(1, 0) +#define LP_VSA_EN BIT(8) +#define LP_VBP_EN BIT(9) +#define LP_VFP_EN BIT(10) +#define LP_VACT_EN BIT(11) +#define LP_HBP_EN BIT(12) +#define LP_HFP_EN BIT(13) +#define FRAME_BTA_ACK_EN BIT(14) + +#define TIMEOUT_CNT_CLK_CONFIG 0x40 +#define HTX_TO_CONFIG 0x44 +#define LRX_H_TO_CONFIG 0x48 + +#define TX_ESC_CLK_CONFIG 0x5C + +#define CMD_MODE_CFG 0x68 +#define TEAR_FX_EN BIT(0) + +#define GEN_HDR 0x6C +#define GEN_DT GENMASK(5, 0) +#define GEN_VC GENMASK(7, 6) + +#define GEN_PLD_DATA 0x70 + +#define PHY_CLK_LANE_LP_CTRL 0x74 +#define PHY_CLKLANE_TX_REQ_HS BIT(0) +#define AUTO_CLKLANE_CTRL_EN BIT(1) + +#define PHY_INTERFACE_CTRL 0x78 +#define RF_PHY_SHUTDOWN BIT(0) +#define RF_PHY_RESET_N BIT(1) +#define RF_PHY_CLK_EN BIT(2) + +#define CMD_MODE_STATUS 0x98 +#define GEN_CMD_RDATA_FIFO_EMPTY BIT(1) +#define GEN_CMD_WDATA_FIFO_EMPTY BIT(3) +#define GEN_CMD_CMD_FIFO_EMPTY BIT(5) +#define GEN_CMD_RDCMD_DONE BIT(7) + +#define PHY_STATUS 0x9C +#define PHY_LOCK BIT(1) + +#define PHY_MIN_STOP_TIME 0xA0 +#define PHY_LANE_NUM_CONFIG 0xA4 + +#define PHY_CLKLANE_TIME_CONFIG 0xA8 +#define PHY_CLKLANE_LP_TO_HS_TIME GENMASK(15, 0) +#define PHY_CLKLANE_HS_TO_LP_TIME GENMASK(31, 16) + +#define PHY_DATALANE_TIME_CONFIG 0xAC +#define PHY_DATALANE_LP_TO_HS_TIME GENMASK(15, 0) +#define PHY_DATALANE_HS_TO_LP_TIME GENMASK(31, 16) + +#define MAX_READ_TIME 0xB0 + +#define RX_PKT_CHECK_CONFIG 0xB4 +#define RX_PKT_ECC_EN BIT(0) +#define RX_PKT_CRC_EN BIT(1) + +#define TA_EN 0xB8 + +#define EOTP_EN 0xBC +#define TX_EOTP_EN BIT(0) +#define RX_EOTP_EN BIT(1) + +#define VIDEO_NULLPKT_SIZE 0xC0 +#define DCS_WM_PKT_SIZE 0xC4 + +#define VIDEO_SIG_DELAY_CONFIG 0xD0 +#define VIDEO_SIG_DELAY GENMASK(23, 0) + +#define PHY_TST_CTRL0 0xF0 +#define PHY_TESTCLR BIT(0) +#define PHY_TESTCLK BIT(1) + +#define PHY_TST_CTRL1 0xF4 +#define PHY_TESTDIN GENMASK(7, 0) +#define PHY_TESTDOUT GENMASK(15, 8) +#define PHY_TESTEN BIT(16) + +#define host_to_dsi(host) \ + container_of(host, struct sprd_dsi, host) + +static inline u32 +dsi_reg_rd(struct dsi_context *ctx, u32 offset, u32 mask, + u32 shift) +{ + return (readl(ctx->base + offset) & mask) >> shift; +} + +static inline void +dsi_reg_wr(struct dsi_context *ctx, u32 offset, u32 mask, + u32 shift, u32 val) +{ + u32 ret; + + ret = readl(ctx->base + offset); + ret &= ~mask; + ret |= (val << shift) & mask; + writel(ret, ctx->base + offset); +} + +static inline void +dsi_reg_up(struct dsi_context *ctx, u32 offset, u32 mask, + u32 val) +{ + u32 ret = readl(ctx->base + offset); + + writel((ret & ~mask) | (val & mask), ctx->base + offset); +} + +static int regmap_tst_io_write(void *context, u32 reg, u32 val) +{ + struct sprd_dsi *dsi = context; + struct dsi_context *ctx = &dsi->ctx; + + if (val > 0xff || reg > 0xff) + return -EINVAL; + + drm_dbg(dsi->drm, "reg = 0x%02x, val = 0x%02x\n", reg, val); + + dsi_reg_up(ctx, PHY_TST_CTRL1, PHY_TESTEN, PHY_TESTEN); + dsi_reg_wr(ctx, PHY_TST_CTRL1, PHY_TESTDIN, 0, reg); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLK, PHY_TESTCLK); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLK, 0); + dsi_reg_up(ctx, PHY_TST_CTRL1, PHY_TESTEN, 0); + dsi_reg_wr(ctx, PHY_TST_CTRL1, PHY_TESTDIN, 0, val); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLK, PHY_TESTCLK); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLK, 0); + + return 0; +} + +static int regmap_tst_io_read(void *context, u32 reg, u32 *val) +{ + struct sprd_dsi *dsi = context; + struct dsi_context *ctx = &dsi->ctx; + int ret; + + if (reg > 0xff) + return -EINVAL; + + dsi_reg_up(ctx, PHY_TST_CTRL1, PHY_TESTEN, PHY_TESTEN); + dsi_reg_wr(ctx, PHY_TST_CTRL1, PHY_TESTDIN, 0, reg); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLK, PHY_TESTCLK); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLK, 0); + dsi_reg_up(ctx, PHY_TST_CTRL1, PHY_TESTEN, 0); + + udelay(1); + + ret = dsi_reg_rd(ctx, PHY_TST_CTRL1, PHY_TESTDOUT, 8); + if (ret < 0) + return ret; + + *val = ret; + + drm_dbg(dsi->drm, "reg = 0x%02x, val = 0x%02x\n", reg, *val); + return 0; +} + +static struct regmap_bus regmap_tst_io = { + .reg_write = regmap_tst_io_write, + .reg_read = regmap_tst_io_read, +}; + +static const struct regmap_config byte_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static int dphy_wait_pll_locked(struct dsi_context *ctx) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + int i; + + for (i = 0; i < 50000; i++) { + if (dsi_reg_rd(ctx, PHY_STATUS, PHY_LOCK, 1)) + return 0; + udelay(3); + } + + drm_err(dsi->drm, "dphy pll can not be locked\n"); + return -ETIMEDOUT; +} + +static int dsi_wait_tx_payload_fifo_empty(struct dsi_context *ctx) +{ + int i; + + for (i = 0; i < 5000; i++) { + if (dsi_reg_rd(ctx, CMD_MODE_STATUS, GEN_CMD_WDATA_FIFO_EMPTY, 3)) + return 0; + udelay(1); + } + + return -ETIMEDOUT; +} + +static int dsi_wait_tx_cmd_fifo_empty(struct dsi_context *ctx) +{ + int i; + + for (i = 0; i < 5000; i++) { + if (dsi_reg_rd(ctx, CMD_MODE_STATUS, GEN_CMD_CMD_FIFO_EMPTY, 5)) + return 0; + udelay(1); + } + + return -ETIMEDOUT; +} + +static int dsi_wait_rd_resp_completed(struct dsi_context *ctx) +{ + int i; + + for (i = 0; i < 10000; i++) { + if (dsi_reg_rd(ctx, CMD_MODE_STATUS, GEN_CMD_RDCMD_DONE, 7)) + return 0; + udelay(10); + } + + return -ETIMEDOUT; +} + +static u16 calc_bytes_per_pixel_x100(int coding) +{ + u16 bpp_x100; + + switch (coding) { + case COLOR_CODE_16BIT_CONFIG1: + case COLOR_CODE_16BIT_CONFIG2: + case COLOR_CODE_16BIT_CONFIG3: + bpp_x100 = 200; + break; + case COLOR_CODE_18BIT_CONFIG1: + case COLOR_CODE_18BIT_CONFIG2: + bpp_x100 = 225; + break; + case COLOR_CODE_24BIT: + bpp_x100 = 300; + break; + case COLOR_CODE_COMPRESSTION: + bpp_x100 = 100; + break; + case COLOR_CODE_20BIT_YCC422_LOOSELY: + bpp_x100 = 250; + break; + case COLOR_CODE_24BIT_YCC422: + bpp_x100 = 300; + break; + case COLOR_CODE_16BIT_YCC422: + bpp_x100 = 200; + break; + case COLOR_CODE_30BIT: + bpp_x100 = 375; + break; + case COLOR_CODE_36BIT: + bpp_x100 = 450; + break; + case COLOR_CODE_12BIT_YCC420: + bpp_x100 = 150; + break; + default: + DRM_ERROR("invalid color coding"); + bpp_x100 = 0; + break; + } + + return bpp_x100; +} + +static u8 calc_video_size_step(int coding) +{ + u8 video_size_step; + + switch (coding) { + case COLOR_CODE_16BIT_CONFIG1: + case COLOR_CODE_16BIT_CONFIG2: + case COLOR_CODE_16BIT_CONFIG3: + case COLOR_CODE_18BIT_CONFIG1: + case COLOR_CODE_18BIT_CONFIG2: + case COLOR_CODE_24BIT: + case COLOR_CODE_COMPRESSTION: + return video_size_step = 1; + case COLOR_CODE_20BIT_YCC422_LOOSELY: + case COLOR_CODE_24BIT_YCC422: + case COLOR_CODE_16BIT_YCC422: + case COLOR_CODE_30BIT: + case COLOR_CODE_36BIT: + case COLOR_CODE_12BIT_YCC420: + return video_size_step = 2; + default: + DRM_ERROR("invalid color coding"); + return 0; + } +} + +static u16 round_video_size(int coding, u16 video_size) +{ + switch (coding) { + case COLOR_CODE_16BIT_YCC422: + case COLOR_CODE_24BIT_YCC422: + case COLOR_CODE_20BIT_YCC422_LOOSELY: + case COLOR_CODE_12BIT_YCC420: + /* round up active H pixels to a multiple of 2 */ + if ((video_size % 2) != 0) + video_size += 1; + break; + default: + break; + } + + return video_size; +} + +#define SPRD_MIPI_DSI_FMT_DSC 0xff +static u32 fmt_to_coding(u32 fmt) +{ + switch (fmt) { + case MIPI_DSI_FMT_RGB565: + return COLOR_CODE_16BIT_CONFIG1; + case MIPI_DSI_FMT_RGB666: + case MIPI_DSI_FMT_RGB666_PACKED: + return COLOR_CODE_18BIT_CONFIG1; + case MIPI_DSI_FMT_RGB888: + return COLOR_CODE_24BIT; + case SPRD_MIPI_DSI_FMT_DSC: + return COLOR_CODE_COMPRESSTION; + default: + DRM_ERROR("Unsupported format (%d)\n", fmt); + return COLOR_CODE_24BIT; + } +} + +#define ns_to_cycle(ns, byte_clk) \ + DIV_ROUND_UP((ns) * (byte_clk), 1000000) + +static void sprd_dsi_init(struct dsi_context *ctx) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + u32 byte_clk = dsi->slave->hs_rate / 8; + u16 data_hs2lp, data_lp2hs, clk_hs2lp, clk_lp2hs; + u16 max_rd_time; + int div; + + writel(0, ctx->base + SOFT_RESET); + writel(0xffffffff, ctx->base + MASK_PROTOCOL_INT); + writel(0xffffffff, ctx->base + MASK_INTERNAL_INT); + writel(1, ctx->base + DSI_MODE_CFG); + dsi_reg_up(ctx, EOTP_EN, RX_EOTP_EN, 0); + dsi_reg_up(ctx, EOTP_EN, TX_EOTP_EN, 0); + dsi_reg_up(ctx, RX_PKT_CHECK_CONFIG, RX_PKT_ECC_EN, RX_PKT_ECC_EN); + dsi_reg_up(ctx, RX_PKT_CHECK_CONFIG, RX_PKT_CRC_EN, RX_PKT_CRC_EN); + writel(1, ctx->base + TA_EN); + dsi_reg_up(ctx, VIRTUAL_CHANNEL_ID, VIDEO_PKT_VCID, 0); + dsi_reg_up(ctx, VIRTUAL_CHANNEL_ID, GEN_RX_VCID, 0); + + div = DIV_ROUND_UP(byte_clk, dsi->slave->lp_rate); + writel(div, ctx->base + TX_ESC_CLK_CONFIG); + + max_rd_time = ns_to_cycle(ctx->max_rd_time, byte_clk); + writel(max_rd_time, ctx->base + MAX_READ_TIME); + + data_hs2lp = ns_to_cycle(ctx->data_hs2lp, byte_clk); + data_lp2hs = ns_to_cycle(ctx->data_lp2hs, byte_clk); + clk_hs2lp = ns_to_cycle(ctx->clk_hs2lp, byte_clk); + clk_lp2hs = ns_to_cycle(ctx->clk_lp2hs, byte_clk); + dsi_reg_wr(ctx, PHY_DATALANE_TIME_CONFIG, + PHY_DATALANE_HS_TO_LP_TIME, 16, data_hs2lp); + dsi_reg_wr(ctx, PHY_DATALANE_TIME_CONFIG, + PHY_DATALANE_LP_TO_HS_TIME, 0, data_lp2hs); + dsi_reg_wr(ctx, PHY_CLKLANE_TIME_CONFIG, + PHY_CLKLANE_HS_TO_LP_TIME, 16, clk_hs2lp); + dsi_reg_wr(ctx, PHY_CLKLANE_TIME_CONFIG, + PHY_CLKLANE_LP_TO_HS_TIME, 0, clk_lp2hs); + + writel(1, ctx->base + SOFT_RESET); +} + +/* + * Free up resources and shutdown host controller and PHY + */ +static void sprd_dsi_fini(struct dsi_context *ctx) +{ + writel(0xffffffff, ctx->base + MASK_PROTOCOL_INT); + writel(0xffffffff, ctx->base + MASK_INTERNAL_INT); + writel(0, ctx->base + SOFT_RESET); +} + +/* + * If not in burst mode, it will compute the video and null packet sizes + * according to necessity. + * Configure timers for data lanes and/or clock lane to return to LP when + * bandwidth is not filled by data. + */ +static int sprd_dsi_dpi_video(struct dsi_context *ctx) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + struct videomode *vm = &ctx->vm; + u32 byte_clk = dsi->slave->hs_rate / 8; + u16 bpp_x100; + u16 video_size; + u32 ratio_x1000; + u16 null_pkt_size = 0; + u8 video_size_step; + u32 hs_to; + u32 total_bytes; + u32 bytes_per_chunk; + u32 chunks = 0; + u32 bytes_left = 0; + u32 chunk_overhead; + const u8 pkt_header = 6; + u8 coding; + int div; + u16 hline; + u16 byte_cycle; + + coding = fmt_to_coding(dsi->slave->format); + video_size = round_video_size(coding, vm->hactive); + bpp_x100 = calc_bytes_per_pixel_x100(coding); + video_size_step = calc_video_size_step(coding); + ratio_x1000 = byte_clk * 1000 / (vm->pixelclock / 1000); + hline = vm->hactive + vm->hsync_len + vm->hfront_porch + + vm->hback_porch; + + writel(0, ctx->base + SOFT_RESET); + dsi_reg_wr(ctx, VID_MODE_CFG, FRAME_BTA_ACK_EN, 15, ctx->frame_ack_en); + dsi_reg_wr(ctx, DPI_VIDEO_FORMAT, DPI_VIDEO_MODE_FORMAT, 0, coding); + dsi_reg_wr(ctx, VID_MODE_CFG, VID_MODE_TYPE, 0, ctx->burst_mode); + byte_cycle = 95 * hline * ratio_x1000 / 100000; + dsi_reg_wr(ctx, VIDEO_SIG_DELAY_CONFIG, VIDEO_SIG_DELAY, 0, byte_cycle); + byte_cycle = hline * ratio_x1000 / 1000; + writel(byte_cycle, ctx->base + VIDEO_LINE_TIME); + byte_cycle = vm->hsync_len * ratio_x1000 / 1000; + dsi_reg_wr(ctx, VIDEO_LINE_HBLK_TIME, VIDEO_LINE_HSA_TIME, 16, byte_cycle); + byte_cycle = vm->hback_porch * ratio_x1000 / 1000; + dsi_reg_wr(ctx, VIDEO_LINE_HBLK_TIME, VIDEO_LINE_HBP_TIME, 0, byte_cycle); + writel(vm->vactive, ctx->base + VIDEO_VACTIVE_LINES); + dsi_reg_wr(ctx, VIDEO_VBLK_LINES, VFP_LINES, 0, vm->vfront_porch); + dsi_reg_wr(ctx, VIDEO_VBLK_LINES, VBP_LINES, 10, vm->vback_porch); + dsi_reg_wr(ctx, VIDEO_VBLK_LINES, VSA_LINES, 20, vm->vsync_len); + dsi_reg_up(ctx, VID_MODE_CFG, LP_HBP_EN | LP_HFP_EN | LP_VACT_EN | + LP_VFP_EN | LP_VBP_EN | LP_VSA_EN, LP_HBP_EN | LP_HFP_EN | + LP_VACT_EN | LP_VFP_EN | LP_VBP_EN | LP_VSA_EN); + + hs_to = (hline * vm->vactive) + (2 * bpp_x100) / 100; + for (div = 0x80; (div < hs_to) && (div > 2); div--) { + if ((hs_to % div) == 0) { + writel(div, ctx->base + TIMEOUT_CNT_CLK_CONFIG); + writel(hs_to / div, ctx->base + LRX_H_TO_CONFIG); + writel(hs_to / div, ctx->base + HTX_TO_CONFIG); + break; + } + } + + if (ctx->burst_mode == VIDEO_BURST_WITH_SYNC_PULSES) { + dsi_reg_wr(ctx, VIDEO_PKT_CONFIG, VIDEO_PKT_SIZE, 0, video_size); + writel(0, ctx->base + VIDEO_NULLPKT_SIZE); + dsi_reg_up(ctx, VIDEO_PKT_CONFIG, VIDEO_LINE_CHUNK_NUM, 0); + } else { + /* non burst transmission */ + null_pkt_size = 0; + + /* bytes to be sent - first as one chunk */ + bytes_per_chunk = vm->hactive * bpp_x100 / 100 + pkt_header; + + /* hline total bytes from the DPI interface */ + total_bytes = (vm->hactive + vm->hfront_porch) * + ratio_x1000 / dsi->slave->lanes / 1000; + + /* check if the pixels actually fit on the DSI link */ + if (total_bytes < bytes_per_chunk) { + drm_err(dsi->drm, "current resolution can not be set\n"); + return -EINVAL; + } + + chunk_overhead = total_bytes - bytes_per_chunk; + + /* overhead higher than 1 -> enable multi packets */ + if (chunk_overhead > 1) { + /* multi packets */ + for (video_size = video_size_step; + video_size < vm->hactive; + video_size += video_size_step) { + if (vm->hactive * 1000 / video_size % 1000) + continue; + + chunks = vm->hactive / video_size; + bytes_per_chunk = bpp_x100 * video_size / 100 + + pkt_header; + if (total_bytes >= (bytes_per_chunk * chunks)) { + bytes_left = total_bytes - + bytes_per_chunk * chunks; + break; + } + } + + /* prevent overflow (unsigned - unsigned) */ + if (bytes_left > (pkt_header * chunks)) { + null_pkt_size = (bytes_left - + pkt_header * chunks) / chunks; + /* avoid register overflow */ + if (null_pkt_size > 1023) + null_pkt_size = 1023; + } + + } else { + /* single packet */ + chunks = 1; + + /* must be a multiple of 4 except 18 loosely */ + for (video_size = vm->hactive; + (video_size % video_size_step) != 0; + video_size++) + ; + } + + dsi_reg_wr(ctx, VIDEO_PKT_CONFIG, VIDEO_PKT_SIZE, 0, video_size); + writel(null_pkt_size, ctx->base + VIDEO_NULLPKT_SIZE); + dsi_reg_wr(ctx, VIDEO_PKT_CONFIG, VIDEO_LINE_CHUNK_NUM, 16, chunks); + } + + writel(ctx->int0_mask, ctx->base + MASK_PROTOCOL_INT); + writel(ctx->int1_mask, ctx->base + MASK_INTERNAL_INT); + writel(1, ctx->base + SOFT_RESET); + + return 0; +} + +static void sprd_dsi_edpi_video(struct dsi_context *ctx) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + const u32 fifo_depth = 1096; + const u32 word_length = 4; + u32 hactive = ctx->vm.hactive; + u32 bpp_x100; + u32 max_fifo_len; + u8 coding; + + coding = fmt_to_coding(dsi->slave->format); + bpp_x100 = calc_bytes_per_pixel_x100(coding); + max_fifo_len = word_length * fifo_depth * 100 / bpp_x100; + + writel(0, ctx->base + SOFT_RESET); + dsi_reg_wr(ctx, DPI_VIDEO_FORMAT, DPI_VIDEO_MODE_FORMAT, 0, coding); + dsi_reg_wr(ctx, CMD_MODE_CFG, TEAR_FX_EN, 0, ctx->te_ack_en); + + if (max_fifo_len > hactive) + writel(hactive, ctx->base + DCS_WM_PKT_SIZE); + else + writel(max_fifo_len, ctx->base + DCS_WM_PKT_SIZE); + + writel(ctx->int0_mask, ctx->base + MASK_PROTOCOL_INT); + writel(ctx->int1_mask, ctx->base + MASK_INTERNAL_INT); + writel(1, ctx->base + SOFT_RESET); +} + +/* + * Send a packet on the generic interface, + * this function has an active delay to wait for the buffer to clear. + * The delay is limited to: + * (param_length / 4) x DSIH_FIFO_ACTIVE_WAIT x register access time + * the controller restricts the sending of. + * + * This function will not be able to send Null and Blanking packets due to + * controller restriction + */ +static int sprd_dsi_wr_pkt(struct dsi_context *ctx, u8 vc, u8 type, + const u8 *param, u16 len) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + u8 wc_lsbyte, wc_msbyte; + u32 payload; + int i, j, ret; + + if (vc > 3) + return -EINVAL; + + /* 1st: for long packet, must config payload first */ + ret = dsi_wait_tx_payload_fifo_empty(ctx); + if (ret) { + drm_err(dsi->drm, "tx payload fifo is not empty\n"); + return ret; + } + + if (len > 2) { + for (i = 0, j = 0; i < len; i += j) { + payload = 0; + for (j = 0; (j < 4) && ((j + i) < (len)); j++) + payload |= param[i + j] << (j * 8); + + writel(payload, ctx->base + GEN_PLD_DATA); + } + wc_lsbyte = len & 0xff; + wc_msbyte = len >> 8; + } else { + wc_lsbyte = (len > 0) ? param[0] : 0; + wc_msbyte = (len > 1) ? param[1] : 0; + } + + /* 2nd: then set packet header */ + ret = dsi_wait_tx_cmd_fifo_empty(ctx); + if (ret) { + drm_err(dsi->drm, "tx cmd fifo is not empty\n"); + return ret; + } + + writel(type | (vc << 6) | (wc_lsbyte << 8) | (wc_msbyte << 16), + ctx->base + GEN_HDR); + + return 0; +} + +/* + * Send READ packet to peripheral using the generic interface, + * this will force command mode and stop video mode (because of BTA). + * + * This function has an active delay to wait for the buffer to clear, + * the delay is limited to 2 x DSIH_FIFO_ACTIVE_WAIT + * (waiting for command buffer, and waiting for receiving) + * @note this function will enable BTA + */ +static int sprd_dsi_rd_pkt(struct dsi_context *ctx, u8 vc, u8 type, + u8 msb_byte, u8 lsb_byte, + u8 *buffer, u8 bytes_to_read) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + int i, ret; + int count = 0; + u32 temp; + + if (vc > 3) + return -EINVAL; + + /* 1st: send read command to peripheral */ + ret = dsi_reg_rd(ctx, CMD_MODE_STATUS, GEN_CMD_CMD_FIFO_EMPTY, 5); + if (!ret) + return -EIO; + + writel(type | (vc << 6) | (lsb_byte << 8) | (msb_byte << 16), + ctx->base + GEN_HDR); + + /* 2nd: wait peripheral response completed */ + ret = dsi_wait_rd_resp_completed(ctx); + if (ret) { + drm_err(dsi->drm, "wait read response time out\n"); + return ret; + } + + /* 3rd: get data from rx payload fifo */ + ret = dsi_reg_rd(ctx, CMD_MODE_STATUS, GEN_CMD_RDATA_FIFO_EMPTY, 1); + if (ret) { + drm_err(dsi->drm, "rx payload fifo empty\n"); + return -EIO; + } + + for (i = 0; i < 100; i++) { + temp = readl(ctx->base + GEN_PLD_DATA); + + if (count < bytes_to_read) + buffer[count++] = temp & 0xff; + if (count < bytes_to_read) + buffer[count++] = (temp >> 8) & 0xff; + if (count < bytes_to_read) + buffer[count++] = (temp >> 16) & 0xff; + if (count < bytes_to_read) + buffer[count++] = (temp >> 24) & 0xff; + + ret = dsi_reg_rd(ctx, CMD_MODE_STATUS, GEN_CMD_RDATA_FIFO_EMPTY, 1); + if (ret) + return count; + } + + return 0; +} + +static void sprd_dsi_set_work_mode(struct dsi_context *ctx, u8 mode) +{ + if (mode == DSI_MODE_CMD) + writel(1, ctx->base + DSI_MODE_CFG); + else + writel(0, ctx->base + DSI_MODE_CFG); +} + +static void sprd_dsi_state_reset(struct dsi_context *ctx) +{ + writel(0, ctx->base + SOFT_RESET); + udelay(100); + writel(1, ctx->base + SOFT_RESET); +} + +static int sprd_dphy_init(struct dsi_context *ctx) +{ + struct sprd_dsi *dsi = container_of(ctx, struct sprd_dsi, ctx); + int ret; + + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_RESET_N, 0); + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_SHUTDOWN, 0); + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_CLK_EN, 0); + + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLR, 0); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLR, PHY_TESTCLR); + dsi_reg_up(ctx, PHY_TST_CTRL0, PHY_TESTCLR, 0); + + dphy_pll_config(ctx); + dphy_timing_config(ctx); + + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_SHUTDOWN, RF_PHY_SHUTDOWN); + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_RESET_N, RF_PHY_RESET_N); + writel(0x1C, ctx->base + PHY_MIN_STOP_TIME); + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_CLK_EN, RF_PHY_CLK_EN); + writel(dsi->slave->lanes - 1, ctx->base + PHY_LANE_NUM_CONFIG); + + ret = dphy_wait_pll_locked(ctx); + if (ret) { + drm_err(dsi->drm, "dphy initial failed\n"); + return ret; + } + + return 0; +} + +static void sprd_dphy_fini(struct dsi_context *ctx) +{ + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_RESET_N, 0); + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_SHUTDOWN, 0); + dsi_reg_up(ctx, PHY_INTERFACE_CTRL, RF_PHY_RESET_N, RF_PHY_RESET_N); +} + +static void sprd_dsi_encoder_mode_set(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adj_mode) +{ + struct sprd_dsi *dsi = encoder_to_dsi(encoder); + + drm_display_mode_to_videomode(adj_mode, &dsi->ctx.vm); +} + +static void sprd_dsi_encoder_enable(struct drm_encoder *encoder) +{ + struct sprd_dsi *dsi = encoder_to_dsi(encoder); + struct sprd_dpu *dpu = to_sprd_crtc(encoder->crtc); + struct dsi_context *ctx = &dsi->ctx; + + if (ctx->enabled) { + drm_warn(dsi->drm, "dsi is initialized\n"); + return; + } + + sprd_dsi_init(ctx); + if (ctx->work_mode == DSI_MODE_VIDEO) + sprd_dsi_dpi_video(ctx); + else + sprd_dsi_edpi_video(ctx); + + sprd_dphy_init(ctx); + + sprd_dsi_set_work_mode(ctx, ctx->work_mode); + sprd_dsi_state_reset(ctx); + + if (dsi->slave->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) { + dsi_reg_up(ctx, PHY_CLK_LANE_LP_CTRL, AUTO_CLKLANE_CTRL_EN, + AUTO_CLKLANE_CTRL_EN); + } else { + dsi_reg_up(ctx, PHY_CLK_LANE_LP_CTRL, RF_PHY_CLK_EN, RF_PHY_CLK_EN); + dsi_reg_up(ctx, PHY_CLK_LANE_LP_CTRL, PHY_CLKLANE_TX_REQ_HS, + PHY_CLKLANE_TX_REQ_HS); + dphy_wait_pll_locked(ctx); + } + + sprd_dpu_run(dpu); + + ctx->enabled = true; +} + +static void sprd_dsi_encoder_disable(struct drm_encoder *encoder) +{ + struct sprd_dsi *dsi = encoder_to_dsi(encoder); + struct sprd_dpu *dpu = to_sprd_crtc(encoder->crtc); + struct dsi_context *ctx = &dsi->ctx; + + if (!ctx->enabled) { + drm_warn(dsi->drm, "dsi isn't initialized\n"); + return; + } + + sprd_dpu_stop(dpu); + sprd_dphy_fini(ctx); + sprd_dsi_fini(ctx); + + ctx->enabled = false; +} + +static const struct drm_encoder_helper_funcs sprd_encoder_helper_funcs = { + .mode_set = sprd_dsi_encoder_mode_set, + .enable = sprd_dsi_encoder_enable, + .disable = sprd_dsi_encoder_disable +}; + +static const struct drm_encoder_funcs sprd_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static int sprd_dsi_encoder_init(struct sprd_dsi *dsi, + struct device *dev) +{ + struct drm_encoder *encoder = &dsi->encoder; + u32 crtc_mask; + int ret; + + crtc_mask = drm_of_find_possible_crtcs(dsi->drm, dev->of_node); + if (!crtc_mask) { + drm_err(dsi->drm, "failed to find crtc mask\n"); + return -EINVAL; + } + + drm_dbg(dsi->drm, "find possible crtcs: 0x%08x\n", crtc_mask); + + encoder->possible_crtcs = crtc_mask; + ret = drm_encoder_init(dsi->drm, encoder, &sprd_encoder_funcs, + DRM_MODE_ENCODER_DSI, NULL); + if (ret) { + drm_err(dsi->drm, "failed to init dsi encoder\n"); + return ret; + } + + drm_encoder_helper_add(encoder, &sprd_encoder_helper_funcs); + + return 0; +} + +static int sprd_dsi_bridge_init(struct sprd_dsi *dsi, + struct device *dev) +{ + int ret; + + dsi->panel_bridge = devm_drm_of_get_bridge(dev, dev->of_node, 1, 0); + if (IS_ERR(dsi->panel_bridge)) + return PTR_ERR(dsi->panel_bridge); + + ret = drm_bridge_attach(&dsi->encoder, dsi->panel_bridge, NULL, 0); + if (ret) + return ret; + + return 0; +} + +static int sprd_dsi_context_init(struct sprd_dsi *dsi, + struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dsi_context *ctx = &dsi->ctx; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ctx->base = devm_ioremap(dev, res->start, resource_size(res)); + if (!ctx->base) { + drm_err(dsi->drm, "failed to map dsi host registers\n"); + return -ENXIO; + } + + ctx->regmap = devm_regmap_init(dev, ®map_tst_io, dsi, &byte_config); + if (IS_ERR(ctx->regmap)) { + drm_err(dsi->drm, "dphy regmap init failed\n"); + return PTR_ERR(ctx->regmap); + } + + ctx->data_hs2lp = 120; + ctx->data_lp2hs = 500; + ctx->clk_hs2lp = 4; + ctx->clk_lp2hs = 15; + ctx->max_rd_time = 6000; + ctx->int0_mask = 0xffffffff; + ctx->int1_mask = 0xffffffff; + ctx->enabled = true; + + return 0; +} + +static int sprd_dsi_bind(struct device *dev, struct device *master, void *data) +{ + struct drm_device *drm = data; + struct sprd_dsi *dsi = dev_get_drvdata(dev); + int ret; + + dsi->drm = drm; + + ret = sprd_dsi_encoder_init(dsi, dev); + if (ret) + return ret; + + ret = sprd_dsi_bridge_init(dsi, dev); + if (ret) + return ret; + + ret = sprd_dsi_context_init(dsi, dev); + if (ret) + return ret; + + return 0; +} + +static void sprd_dsi_unbind(struct device *dev, + struct device *master, void *data) +{ + struct sprd_dsi *dsi = dev_get_drvdata(dev); + + drm_of_panel_bridge_remove(dev->of_node, 1, 0); + + drm_encoder_cleanup(&dsi->encoder); +} + +static const struct component_ops dsi_component_ops = { + .bind = sprd_dsi_bind, + .unbind = sprd_dsi_unbind, +}; + +static int sprd_dsi_host_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *slave) +{ + struct sprd_dsi *dsi = host_to_dsi(host); + struct dsi_context *ctx = &dsi->ctx; + + dsi->slave = slave; + + if (slave->mode_flags & MIPI_DSI_MODE_VIDEO) + ctx->work_mode = DSI_MODE_VIDEO; + else + ctx->work_mode = DSI_MODE_CMD; + + if (slave->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) + ctx->burst_mode = VIDEO_BURST_WITH_SYNC_PULSES; + else if (slave->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + ctx->burst_mode = VIDEO_NON_BURST_WITH_SYNC_PULSES; + else + ctx->burst_mode = VIDEO_NON_BURST_WITH_SYNC_EVENTS; + + return component_add(host->dev, &dsi_component_ops); +} + +static int sprd_dsi_host_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *slave) +{ + component_del(host->dev, &dsi_component_ops); + + return 0; +} + +static ssize_t sprd_dsi_host_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct sprd_dsi *dsi = host_to_dsi(host); + const u8 *tx_buf = msg->tx_buf; + + if (msg->rx_buf && msg->rx_len) { + u8 lsb = (msg->tx_len > 0) ? tx_buf[0] : 0; + u8 msb = (msg->tx_len > 1) ? tx_buf[1] : 0; + + return sprd_dsi_rd_pkt(&dsi->ctx, msg->channel, msg->type, + msb, lsb, msg->rx_buf, msg->rx_len); + } + + if (msg->tx_buf && msg->tx_len) + return sprd_dsi_wr_pkt(&dsi->ctx, msg->channel, msg->type, + tx_buf, msg->tx_len); + + return 0; +} + +static const struct mipi_dsi_host_ops sprd_dsi_host_ops = { + .attach = sprd_dsi_host_attach, + .detach = sprd_dsi_host_detach, + .transfer = sprd_dsi_host_transfer, +}; + +static const struct of_device_id dsi_match_table[] = { + { .compatible = "sprd,sharkl3-dsi-host" }, + { /* sentinel */ }, +}; + +static int sprd_dsi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sprd_dsi *dsi; + + dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + + dev_set_drvdata(dev, dsi); + + dsi->host.ops = &sprd_dsi_host_ops; + dsi->host.dev = dev; + + return mipi_dsi_host_register(&dsi->host); +} + +static int sprd_dsi_remove(struct platform_device *pdev) +{ + struct sprd_dsi *dsi = dev_get_drvdata(&pdev->dev); + + mipi_dsi_host_unregister(&dsi->host); + + return 0; +} + +struct platform_driver sprd_dsi_driver = { + .probe = sprd_dsi_probe, + .remove = sprd_dsi_remove, + .driver = { + .name = "sprd-dsi-drv", + .of_match_table = dsi_match_table, + }, +}; + +MODULE_AUTHOR("Leon He <leon.he@unisoc.com>"); +MODULE_AUTHOR("Kevin Tang <kevin.tang@unisoc.com>"); +MODULE_DESCRIPTION("Unisoc MIPI DSI HOST Controller Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/sprd/sprd_dsi.h b/drivers/gpu/drm/sprd/sprd_dsi.h new file mode 100644 index 000000000000..d858ebb11115 --- /dev/null +++ b/drivers/gpu/drm/sprd/sprd_dsi.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Unisoc Inc. + */ + +#ifndef __SPRD_DSI_H__ +#define __SPRD_DSI_H__ + +#include <linux/of.h> +#include <linux/device.h> +#include <linux/regmap.h> +#include <video/videomode.h> + +#include <drm/drm_bridge.h> +#include <drm/drm_connector.h> +#include <drm/drm_encoder.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_print.h> +#include <drm/drm_panel.h> + +#define encoder_to_dsi(encoder) \ + container_of(encoder, struct sprd_dsi, encoder) + +enum dsi_work_mode { + DSI_MODE_CMD = 0, + DSI_MODE_VIDEO +}; + +enum video_burst_mode { + VIDEO_NON_BURST_WITH_SYNC_PULSES = 0, + VIDEO_NON_BURST_WITH_SYNC_EVENTS, + VIDEO_BURST_WITH_SYNC_PULSES +}; + +enum dsi_color_coding { + COLOR_CODE_16BIT_CONFIG1 = 0, + COLOR_CODE_16BIT_CONFIG2, + COLOR_CODE_16BIT_CONFIG3, + COLOR_CODE_18BIT_CONFIG1, + COLOR_CODE_18BIT_CONFIG2, + COLOR_CODE_24BIT, + COLOR_CODE_20BIT_YCC422_LOOSELY, + COLOR_CODE_24BIT_YCC422, + COLOR_CODE_16BIT_YCC422, + COLOR_CODE_30BIT, + COLOR_CODE_36BIT, + COLOR_CODE_12BIT_YCC420, + COLOR_CODE_COMPRESSTION, + COLOR_CODE_MAX +}; + +enum pll_timing { + NONE, + REQUEST_TIME, + PREPARE_TIME, + SETTLE_TIME, + ZERO_TIME, + TRAIL_TIME, + EXIT_TIME, + CLKPOST_TIME, + TA_GET, + TA_GO, + TA_SURE, + TA_WAIT, +}; + +struct dphy_pll { + u8 refin; /* Pre-divider control signal */ + u8 cp_s; /* 00: SDM_EN=1, 10: SDM_EN=0 */ + u8 fdk_s; /* PLL mode control: integer or fraction */ + u8 sdm_en; + u8 div; + u8 int_n; /* integer N PLL */ + u32 ref_clk; /* dphy reference clock, unit: MHz */ + u32 freq; /* panel config, unit: KHz */ + u32 fvco; + u32 potential_fvco; + u32 nint; /* sigma delta modulator NINT control */ + u32 kint; /* sigma delta modulator KINT control */ + u8 lpf_sel; /* low pass filter control */ + u8 out_sel; /* post divider control */ + u8 vco_band; /* vco range */ + u8 det_delay; +}; + +struct dsi_context { + void __iomem *base; + struct regmap *regmap; + struct dphy_pll pll; + struct videomode vm; + bool enabled; + + u8 work_mode; + u8 burst_mode; + u32 int0_mask; + u32 int1_mask; + + /* maximum time (ns) for data lanes from HS to LP */ + u16 data_hs2lp; + /* maximum time (ns) for data lanes from LP to HS */ + u16 data_lp2hs; + /* maximum time (ns) for clk lanes from HS to LP */ + u16 clk_hs2lp; + /* maximum time (ns) for clk lanes from LP to HS */ + u16 clk_lp2hs; + /* maximum time (ns) for BTA operation - REQUIRED */ + u16 max_rd_time; + /* enable receiving frame ack packets - for video mode */ + bool frame_ack_en; + /* enable receiving tear effect ack packets - for cmd mode */ + bool te_ack_en; +}; + +struct sprd_dsi { + struct drm_device *drm; + struct mipi_dsi_host host; + struct mipi_dsi_device *slave; + struct drm_encoder encoder; + struct drm_bridge *panel_bridge; + struct dsi_context ctx; +}; + +int dphy_pll_config(struct dsi_context *ctx); +void dphy_timing_config(struct dsi_context *ctx); + +#endif /* __SPRD_DSI_H__ */ diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig index d0cfdd36b38f..246a94afbe74 100644 --- a/drivers/gpu/drm/sti/Kconfig +++ b/drivers/gpu/drm/sti/Kconfig @@ -5,7 +5,6 @@ config DRM_STI select RESET_CONTROLLER select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER - select DRM_KMS_CMA_HELPER select DRM_PANEL select FW_LOADER select SND_SOC_HDMI_CODEC if SND_SOC diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig index b7d66915a2be..e0379488cd0d 100644 --- a/drivers/gpu/drm/stm/Kconfig +++ b/drivers/gpu/drm/stm/Kconfig @@ -4,7 +4,6 @@ config DRM_STM depends on DRM && (ARCH_STM32 || ARCH_MULTIPLATFORM) select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER - select DRM_KMS_CMA_HELPER select DRM_PANEL_BRIDGE select VIDEOMODE_HELPERS select FB_PROVIDE_GET_FB_UNMAPPED_AREA if FB diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig index 8c796de53222..befc5a80222d 100644 --- a/drivers/gpu/drm/sun4i/Kconfig +++ b/drivers/gpu/drm/sun4i/Kconfig @@ -5,7 +5,6 @@ config DRM_SUN4I depends on ARCH_SUNXI || COMPILE_TEST select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_PANEL select REGMAP_MMIO select VIDEOMODE_HELPERS diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 1650a448eabd..8cf5aeb9db6c 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -12,6 +12,9 @@ config DRM_TEGRA select INTERCONNECT select IOMMU_IOVA select CEC_CORE if CEC_NOTIFIER + select SND_SIMPLE_CARD if SND_SOC_TEGRA20_SPDIF + select SND_SOC_HDMI_CODEC if SND_SOC_TEGRA20_SPDIF + select SND_AUDIO_GRAPH_CARD if SND_SOC_TEGRA20_SPDIF help Choose this option if you have an NVIDIA Tegra SoC. diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index d801909182cf..df6cc986aeba 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -23,7 +23,8 @@ tegra-drm-y := \ gr2d.o \ gr3d.o \ falcon.o \ - vic.o + vic.o \ + nvdec.o tegra-drm-y += trace.o diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index a29d64f87563..eb70eee8992a 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -11,9 +11,12 @@ #include <linux/interconnect.h> #include <linux/module.h> #include <linux/of_device.h> +#include <linux/pm_domain.h> +#include <linux/pm_opp.h> #include <linux/pm_runtime.h> #include <linux/reset.h> +#include <soc/tegra/common.h> #include <soc/tegra/pmc.h> #include <drm/drm_atomic.h> @@ -890,11 +893,9 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, return 0; } -static void tegra_cursor_atomic_update(struct drm_plane *plane, - struct drm_atomic_state *state) +static void __tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_plane_state *new_state) { - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); struct tegra_dc *dc = to_tegra_dc(new_state->crtc); struct tegra_drm *tegra = plane->dev->dev_private; @@ -990,6 +991,14 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } +static void tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + + __tegra_cursor_atomic_update(plane, new_state); +} + static void tegra_cursor_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -1009,12 +1018,78 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); } +static int tegra_cursor_atomic_async_check(struct drm_plane *plane, struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc_state *crtc_state; + int min_scale, max_scale; + int err; + + crtc_state = drm_atomic_get_existing_crtc_state(state, new_state->crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + if (!crtc_state->active) + return -EINVAL; + + if (plane->state->crtc != new_state->crtc || + plane->state->src_w != new_state->src_w || + plane->state->src_h != new_state->src_h || + plane->state->crtc_w != new_state->crtc_w || + plane->state->crtc_h != new_state->crtc_h || + plane->state->fb != new_state->fb || + plane->state->fb == NULL) + return -EINVAL; + + min_scale = (1 << 16) / 8; + max_scale = (8 << 16) / 1; + + err = drm_atomic_helper_check_plane_state(new_state, crtc_state, min_scale, max_scale, + true, true); + if (err < 0) + return err; + + if (new_state->visible != plane->state->visible) + return -EINVAL; + + return 0; +} + +static void tegra_cursor_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct tegra_dc *dc = to_tegra_dc(new_state->crtc); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + + if (new_state->visible) { + struct tegra_plane *p = to_tegra_plane(plane); + u32 value; + + __tegra_cursor_atomic_update(plane, new_state); + + value = (WIN_A_ACT_REQ << p->index) << 8 | GENERAL_UPDATE; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + + value = (WIN_A_ACT_REQ << p->index) | GENERAL_ACT_REQ; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + } +} + static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = { .prepare_fb = tegra_plane_prepare_fb, .cleanup_fb = tegra_plane_cleanup_fb, .atomic_check = tegra_cursor_atomic_check, .atomic_update = tegra_cursor_atomic_update, .atomic_disable = tegra_cursor_atomic_disable, + .atomic_async_check = tegra_cursor_atomic_async_check, + .atomic_async_update = tegra_cursor_atomic_async_update, }; static const uint64_t linear_modifiers[] = { @@ -1267,9 +1342,9 @@ static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm, err = PTR_ERR(planes[i]); while (i--) - tegra_plane_funcs.destroy(planes[i]); + planes[i]->funcs->destroy(planes[i]); - tegra_plane_funcs.destroy(primary); + primary->funcs->destroy(primary); return ERR_PTR(err); } } @@ -1762,10 +1837,55 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc, return 0; } -static void tegra_dc_commit_state(struct tegra_dc *dc, - struct tegra_dc_state *state) +static void tegra_dc_update_voltage_state(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + unsigned long rate, pstate; + struct dev_pm_opp *opp; + int err; + + if (!dc->has_opp_table) + return; + + /* calculate actual pixel clock rate which depends on internal divider */ + rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2); + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate); + + /* + * Very high resolution modes may results in a clock rate that is + * above the characterized maximum. In this case it's okay to fall + * back to the characterized maximum. + */ + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dc->dev, &rate); + + if (IS_ERR(opp)) { + dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n", + rate, opp); + return; + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + /* + * The minimum core voltage depends on the pixel clock rate (which + * depends on internal clock divider of the CRTC) and not on the + * rate of the display controller clock. This is why we're not using + * dev_pm_opp_set_rate() API and instead controlling the power domain + * directly. + */ + err = dev_pm_genpd_set_performance_state(dc->dev, pstate); + if (err) + dev_err(dc->dev, "failed to set power domain state to %lu: %d\n", + pstate, err); +} + +static void tegra_dc_set_clock_rate(struct tegra_dc *dc, + struct tegra_dc_state *state) { - u32 value; int err; err = clk_set_parent(dc->clk, state->clk); @@ -1797,10 +1917,7 @@ static void tegra_dc_commit_state(struct tegra_dc *dc, state->div); DRM_DEBUG_KMS("pclk: %lu\n", state->pclk); - if (!dc->soc->has_nvdisplay) { - value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1; - tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); - } + tegra_dc_update_voltage_state(dc, state); } static void tegra_dc_stop(struct tegra_dc *dc) @@ -1991,6 +2108,13 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, err = host1x_client_suspend(&dc->client); if (err < 0) dev_err(dc->dev, "failed to suspend: %d\n", err); + + if (dc->has_opp_table) { + err = dev_pm_genpd_set_performance_state(dc->dev, 0); + if (err) + dev_err(dc->dev, + "failed to clear power domain state: %d\n", err); + } } static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, @@ -2002,6 +2126,9 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, u32 value; int err; + /* apply PLL changes */ + tegra_dc_set_clock_rate(dc, crtc_state); + err = host1x_client_resume(&dc->client); if (err < 0) { dev_err(dc->dev, "failed to resume: %d\n", err); @@ -2076,8 +2203,11 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, else tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR); - /* apply PLL and pixel clock changes */ - tegra_dc_commit_state(dc, crtc_state); + /* apply pixel clock changes */ + if (!dc->soc->has_nvdisplay) { + value = SHIFT_CLK_DIVIDER(crtc_state->div) | PIXEL_CLK_DIVIDER_PCD1; + tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); + } /* program display mode */ tegra_dc_set_timings(dc, mode); @@ -2107,6 +2237,12 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW); } + if (dc->rgb) { + /* XXX: parameterize? */ + value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE; + tegra_dc_writel(dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS); + } + tegra_dc_commit(dc); drm_crtc_vblank_on(crtc); @@ -2685,6 +2821,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = true, .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, }; static const struct tegra_dc_soc_info tegra30_dc_soc_info = { @@ -2707,6 +2844,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = true, + .has_pll_d2_out0 = true, }; static const struct tegra_dc_soc_info tegra114_dc_soc_info = { @@ -2729,6 +2867,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = true, + .has_pll_d2_out0 = true, }; static const struct tegra_dc_soc_info tegra124_dc_soc_info = { @@ -2751,6 +2890,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = true, }; static const struct tegra_dc_soc_info tegra210_dc_soc_info = { @@ -2773,6 +2913,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = true, }; static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { @@ -2823,6 +2964,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .wgrps = tegra186_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, }; static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { @@ -2873,6 +3015,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = { .wgrps = tegra194_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, }; static const struct of_device_id tegra_dc_of_match[] = { @@ -2973,6 +3116,23 @@ static int tegra_dc_couple(struct tegra_dc *dc) return 0; } +static int tegra_dc_init_opp_table(struct tegra_dc *dc) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params); + if (err && err != -ENODEV) + return err; + + if (err) + dc->has_opp_table = false; + else + dc->has_opp_table = true; + + return 0; +} + static int tegra_dc_probe(struct platform_device *pdev) { u64 dma_mask = dma_get_mask(pdev->dev.parent); @@ -3038,6 +3198,10 @@ static int tegra_dc_probe(struct platform_device *pdev) tegra_powergate_power_off(dc->powergate); } + err = tegra_dc_init_opp_table(dc); + if (err < 0) + return err; + dc->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(dc->regs)) return PTR_ERR(dc->regs); diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 40378308d527..3f91a10ea6c7 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -76,6 +76,7 @@ struct tegra_dc_soc_info { bool has_win_b_vfilter_mem_client; bool has_win_c_without_vert_filter; bool plane_tiled_memory_bandwidth_x2; + bool has_pll_d2_out0; }; struct tegra_dc { @@ -100,6 +101,8 @@ struct tegra_dc { struct drm_info_list *debugfs_files; const struct tegra_dc_soc_info *soc; + + bool has_opp_table; }; static inline struct tegra_dc * diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 8d37d6b00562..e9de91a4e7e8 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -10,6 +10,7 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <drm/drm_aperture.h> #include <drm/drm_atomic.h> @@ -21,6 +22,10 @@ #include <drm/drm_prime.h> #include <drm/drm_vblank.h> +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include <asm/dma-iommu.h> +#endif + #include "dc.h" #include "drm.h" #include "gem.h" @@ -116,6 +121,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) static void tegra_drm_context_free(struct tegra_drm_context *context) { context->client->ops->close_channel(context); + pm_runtime_put(context->client->base.dev); kfree(context); } @@ -427,13 +433,20 @@ static int tegra_client_open(struct tegra_drm_file *fpriv, { int err; + err = pm_runtime_resume_and_get(client->base.dev); + if (err) + return err; + err = client->ops->open_channel(client, context); - if (err < 0) + if (err < 0) { + pm_runtime_put(client->base.dev); return err; + } err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL); if (err < 0) { client->ops->close_channel(context); + pm_runtime_put(client->base.dev); return err; } @@ -936,6 +949,17 @@ int host1x_client_iommu_attach(struct host1x_client *client) struct iommu_group *group = NULL; int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (client->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(client->dev); + arm_iommu_detach_device(client->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(client->dev); + } +#endif + /* * If the host1x client is already attached to an IOMMU domain that is * not the shared IOMMU domain, don't try to attach it to a different @@ -1344,15 +1368,18 @@ static const struct of_device_id host1x_drm_subdevs[] = { { .compatible = "nvidia,tegra210-sor", }, { .compatible = "nvidia,tegra210-sor1", }, { .compatible = "nvidia,tegra210-vic", }, + { .compatible = "nvidia,tegra210-nvdec", }, { .compatible = "nvidia,tegra186-display", }, { .compatible = "nvidia,tegra186-dc", }, { .compatible = "nvidia,tegra186-sor", }, { .compatible = "nvidia,tegra186-sor1", }, { .compatible = "nvidia,tegra186-vic", }, + { .compatible = "nvidia,tegra186-nvdec", }, { .compatible = "nvidia,tegra194-display", }, { .compatible = "nvidia,tegra194-dc", }, { .compatible = "nvidia,tegra194-sor", }, { .compatible = "nvidia,tegra194-vic", }, + { .compatible = "nvidia,tegra194-nvdec", }, { /* sentinel */ } }; @@ -1376,6 +1403,7 @@ static struct platform_driver * const drivers[] = { &tegra_gr2d_driver, &tegra_gr3d_driver, &tegra_vic_driver, + &tegra_nvdec_driver, }; static int __init host1x_drm_init(void) diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 8b28327c931c..fc0a19554eac 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -202,5 +202,6 @@ extern struct platform_driver tegra_sor_driver; extern struct platform_driver tegra_gr2d_driver; extern struct platform_driver tegra_gr3d_driver; extern struct platform_driver tegra_vic_driver; +extern struct platform_driver tegra_nvdec_driver; #endif /* HOST1X_DRM_H */ diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index d38fd7e12b57..fce0e52973c2 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -23,86 +23,97 @@ MODULE_IMPORT_NS(DMA_BUF); -static void tegra_bo_put(struct host1x_bo *bo) +static unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents) { - struct tegra_bo *obj = host1x_to_tegra_bo(bo); + dma_addr_t next = ~(dma_addr_t)0; + unsigned int count = 0, i; + struct scatterlist *s; - drm_gem_object_put(&obj->gem); -} + for_each_sg(sgl, s, nents, i) { + /* sg_dma_address(s) is only valid for entries that have sg_dma_len(s) != 0. */ + if (!sg_dma_len(s)) + continue; -/* XXX move this into lib/scatterlist.c? */ -static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, - unsigned int nents, gfp_t gfp_mask) -{ - struct scatterlist *dst; - unsigned int i; - int err; + if (sg_dma_address(s) != next) { + next = sg_dma_address(s) + sg_dma_len(s); + count++; + } + } - err = sg_alloc_table(sgt, nents, gfp_mask); - if (err < 0) - return err; + return count; +} - dst = sgt->sgl; +static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt) +{ + return sg_dma_count_chunks(sgt->sgl, sgt->nents); +} - for (i = 0; i < nents; i++) { - sg_set_page(dst, sg_page(sg), sg->length, 0); - dst = sg_next(dst); - sg = sg_next(sg); - } +static void tegra_bo_put(struct host1x_bo *bo) +{ + struct tegra_bo *obj = host1x_to_tegra_bo(bo); - return 0; + drm_gem_object_put(&obj->gem); } -static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys) +static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction direction) { struct tegra_bo *obj = host1x_to_tegra_bo(bo); - struct sg_table *sgt; + struct drm_gem_object *gem = &obj->gem; + struct host1x_bo_mapping *map; int err; + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM); + + kref_init(&map->ref); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + /* - * If we've manually mapped the buffer object through the IOMMU, make - * sure to return the IOVA address of our mapping. - * - * Similarly, for buffers that have been allocated by the DMA API the - * physical address can be used for devices that are not attached to - * an IOMMU. For these devices, callers must pass a valid pointer via - * the @phys argument. - * - * Imported buffers were also already mapped at import time, so the - * existing mapping can be reused. + * Imported buffers need special treatment to satisfy the semantics of DMA-BUF. */ - if (phys) { - *phys = obj->iova; - return NULL; + if (gem->import_attach) { + struct dma_buf *buf = gem->import_attach->dmabuf; + + map->attach = dma_buf_attach(buf, dev); + if (IS_ERR(map->attach)) { + err = PTR_ERR(map->attach); + goto free; + } + + map->sgt = dma_buf_map_attachment(map->attach, direction); + if (IS_ERR(map->sgt)) { + dma_buf_detach(buf, map->attach); + err = PTR_ERR(map->sgt); + goto free; + } + + err = sgt_dma_count_chunks(map->sgt); + map->size = gem->size; + + goto out; } /* * If we don't have a mapping for this buffer yet, return an SG table * so that host1x can do the mapping for us via the DMA API. */ - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + } if (obj->pages) { /* * If the buffer object was allocated from the explicit IOMMU * API code paths, construct an SG table from the pages. */ - err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, - 0, obj->gem.size, GFP_KERNEL); - if (err < 0) - goto free; - } else if (obj->sgt) { - /* - * If the buffer object already has an SG table but no pages - * were allocated for it, it means the buffer was imported and - * the SG table needs to be copied to avoid overwriting any - * other potential users of the original SG table. - */ - err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, - obj->sgt->orig_nents, GFP_KERNEL); + err = sg_alloc_table_from_pages(map->sgt, obj->pages, obj->num_pages, 0, gem->size, + GFP_KERNEL); if (err < 0) goto free; } else { @@ -111,25 +122,53 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, * not imported, it had to be allocated with the DMA API, so * the DMA API helper can be used. */ - err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, - obj->gem.size); + err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova, gem->size); if (err < 0) goto free; } - return sgt; + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + +out: + /* + * If we've manually mapped the buffer object through the IOMMU, make sure to return the + * existing IOVA address of our mapping. + */ + if (!obj->mm) { + map->phys = sg_dma_address(map->sgt->sgl); + map->chunks = err; + } else { + map->phys = obj->iova; + map->chunks = 1; + } + + map->size = gem->size; + return map; + +free_sgt: + sg_free_table(map->sgt); free: - kfree(sgt); + kfree(map->sgt); + kfree(map); return ERR_PTR(err); } -static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); + if (map->attach) { + dma_buf_unmap_attachment(map->attach, map->sgt, map->direction); + dma_buf_detach(map->attach->dmabuf, map->attach); + } else { + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); } + + host1x_bo_put(map->bo); + kfree(map); } static void *tegra_bo_mmap(struct host1x_bo *bo) @@ -452,8 +491,18 @@ free: void tegra_bo_free_object(struct drm_gem_object *gem) { struct tegra_drm *tegra = gem->dev->dev_private; + struct host1x_bo_mapping *mapping, *tmp; struct tegra_bo *bo = to_tegra_bo(gem); + /* remove all mappings of this buffer object from any caches */ + list_for_each_entry_safe(mapping, tmp, &bo->base.mappings, list) { + if (mapping->cache) + host1x_bo_unpin(mapping); + else + dev_err(gem->dev->dev, "mapping %p stale for device %s\n", mapping, + dev_name(mapping->dev)); + } + if (tegra->domain) tegra_bo_iommu_unmap(tegra, bo); diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index de288cba3905..e3bb4c99ed39 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -4,14 +4,25 @@ */ #include <linux/clk.h> +#include <linux/delay.h> #include <linux/iommu.h> #include <linux/module.h> #include <linux/of_device.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> + +#include <soc/tegra/common.h> #include "drm.h" #include "gem.h" #include "gr2d.h" +enum { + RST_MC, + RST_GR2D, + RST_GR2D_MAX, +}; + struct gr2d_soc { unsigned int version; }; @@ -21,6 +32,9 @@ struct gr2d { struct host1x_channel *channel; struct clk *clk; + struct reset_control_bulk_data resets[RST_GR2D_MAX]; + unsigned int nresets; + const struct gr2d_soc *soc; DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS); @@ -56,15 +70,22 @@ static int gr2d_init(struct host1x_client *client) goto free; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 200); + err = tegra_drm_register_client(dev->dev_private, drm); if (err < 0) { dev_err(client->dev, "failed to register client: %d\n", err); - goto detach; + goto disable_rpm; } return 0; -detach: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); free: host1x_syncpt_put(client->syncpts[0]); @@ -85,10 +106,15 @@ static int gr2d_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr2d->channel); + gr2d->channel = NULL; + return 0; } @@ -190,6 +216,27 @@ static const u32 gr2d_addr_regs[] = { GR2D_VA_BASE_ADDR_SB, }; +static int gr2d_get_resets(struct device *dev, struct gr2d *gr2d) +{ + int err; + + gr2d->resets[RST_MC].id = "mc"; + gr2d->resets[RST_GR2D].id = "2d"; + gr2d->nresets = RST_GR2D_MAX; + + err = devm_reset_control_bulk_get_optional_exclusive_released( + dev, gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to get reset: %d\n", err); + return err; + } + + if (WARN_ON(!gr2d->resets[RST_GR2D].rstc)) + return -ENOENT; + + return 0; +} + static int gr2d_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -202,6 +249,8 @@ static int gr2d_probe(struct platform_device *pdev) if (!gr2d) return -ENOMEM; + platform_set_drvdata(pdev, gr2d); + gr2d->soc = of_device_get_match_data(dev); syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); @@ -214,11 +263,9 @@ static int gr2d_probe(struct platform_device *pdev) return PTR_ERR(gr2d->clk); } - err = clk_prepare_enable(gr2d->clk); - if (err) { - dev_err(dev, "cannot turn on clock\n"); + err = gr2d_get_resets(dev, gr2d); + if (err) return err; - } INIT_LIST_HEAD(&gr2d->client.base.list); gr2d->client.base.ops = &gr2d_client_ops; @@ -231,10 +278,13 @@ static int gr2d_probe(struct platform_device *pdev) gr2d->client.version = gr2d->soc->version; gr2d->client.ops = &gr2d_ops; + err = devm_tegra_core_dev_init_opp_table_common(dev); + if (err) + return err; + err = host1x_client_register(&gr2d->client.base); if (err < 0) { dev_err(dev, "failed to register host1x client: %d\n", err); - clk_disable_unprepare(gr2d->clk); return err; } @@ -242,8 +292,6 @@ static int gr2d_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); i++) set_bit(gr2d_addr_regs[i], gr2d->addr_regs); - platform_set_drvdata(pdev, gr2d); - return 0; } @@ -259,15 +307,100 @@ static int gr2d_remove(struct platform_device *pdev) return err; } + return 0; +} + +static int __maybe_unused gr2d_runtime_suspend(struct device *dev) +{ + struct gr2d *gr2d = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(gr2d->channel); + reset_control_bulk_release(gr2d->nresets, gr2d->resets); + + /* + * GR2D module shouldn't be reset while hardware is idling, otherwise + * host1x's cmdproc will stuck on trying to access any G2 register + * after reset. GR2D module could be either hot-reset or reset after + * power-gating of the HEG partition. Hence we will put in reset only + * the memory client part of the module, the HEG GENPD will take care + * of resetting GR2D module across power-gating. + * + * On Tegra20 there is no HEG partition, but it's okay to have + * undetermined h/w state since userspace is expected to reprogram + * the state on each job submission anyways. + */ + err = reset_control_acquire(gr2d->resets[RST_MC].rstc); + if (err) { + dev_err(dev, "failed to acquire MC reset: %d\n", err); + goto acquire_reset; + } + + err = reset_control_assert(gr2d->resets[RST_MC].rstc); + reset_control_release(gr2d->resets[RST_MC].rstc); + if (err) { + dev_err(dev, "failed to assert MC reset: %d\n", err); + goto acquire_reset; + } + clk_disable_unprepare(gr2d->clk); return 0; + +acquire_reset: + reset_control_bulk_acquire(gr2d->nresets, gr2d->resets); + reset_control_bulk_deassert(gr2d->nresets, gr2d->resets); + + return err; } +static int __maybe_unused gr2d_runtime_resume(struct device *dev) +{ + struct gr2d *gr2d = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(gr2d->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + usleep_range(2000, 4000); + + /* this is a reset array which deasserts both 2D MC and 2D itself */ + err = reset_control_bulk_deassert(gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + return 0; + +disable_clk: + clk_disable_unprepare(gr2d->clk); +release_reset: + reset_control_bulk_release(gr2d->nresets, gr2d->resets); + + return err; +} + +static const struct dev_pm_ops tegra_gr2d_pm = { + SET_RUNTIME_PM_OPS(gr2d_runtime_suspend, gr2d_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + struct platform_driver tegra_gr2d_driver = { .driver = { .name = "tegra-gr2d", .of_match_table = gr2d_match, + .pm = &tegra_gr2d_pm, }, .probe = gr2d_probe, .remove = gr2d_remove, diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 24442ade0da3..a1fd3113ea96 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -5,32 +5,47 @@ */ #include <linux/clk.h> +#include <linux/delay.h> #include <linux/host1x.h> #include <linux/iommu.h> #include <linux/module.h> #include <linux/of_device.h> #include <linux/platform_device.h> +#include <linux/pm_domain.h> +#include <linux/pm_opp.h> +#include <linux/pm_runtime.h> #include <linux/reset.h> +#include <soc/tegra/common.h> #include <soc/tegra/pmc.h> #include "drm.h" #include "gem.h" #include "gr3d.h" +enum { + RST_MC, + RST_GR3D, + RST_MC2, + RST_GR3D2, + RST_GR3D_MAX, +}; + struct gr3d_soc { unsigned int version; + unsigned int num_clocks; + unsigned int num_resets; }; struct gr3d { struct tegra_drm_client client; struct host1x_channel *channel; - struct clk *clk_secondary; - struct clk *clk; - struct reset_control *rst_secondary; - struct reset_control *rst; const struct gr3d_soc *soc; + struct clk_bulk_data *clocks; + unsigned int nclocks; + struct reset_control_bulk_data resets[RST_GR3D_MAX]; + unsigned int nresets; DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS); }; @@ -65,15 +80,22 @@ static int gr3d_init(struct host1x_client *client) goto free; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 200); + err = tegra_drm_register_client(dev->dev_private, drm); if (err < 0) { dev_err(client->dev, "failed to register client: %d\n", err); - goto detach; + goto disable_rpm; } return 0; -detach: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); free: host1x_syncpt_put(client->syncpts[0]); @@ -93,10 +115,15 @@ static int gr3d_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr3d->channel); + gr3d->channel = NULL; + return 0; } @@ -155,14 +182,20 @@ static const struct tegra_drm_client_ops gr3d_ops = { static const struct gr3d_soc tegra20_gr3d_soc = { .version = 0x20, + .num_clocks = 1, + .num_resets = 2, }; static const struct gr3d_soc tegra30_gr3d_soc = { .version = 0x30, + .num_clocks = 2, + .num_resets = 4, }; static const struct gr3d_soc tegra114_gr3d_soc = { .version = 0x35, + .num_clocks = 1, + .num_resets = 2, }; static const struct of_device_id tegra_gr3d_match[] = { @@ -278,69 +311,216 @@ static const u32 gr3d_addr_regs[] = { GR3D_GLOBAL_SAMP23SURFADDR(15), }; -static int gr3d_probe(struct platform_device *pdev) +static int gr3d_power_up_legacy_domain(struct device *dev, const char *name, + unsigned int id) { - struct device_node *np = pdev->dev.of_node; - struct host1x_syncpt **syncpts; - struct gr3d *gr3d; + struct gr3d *gr3d = dev_get_drvdata(dev); + struct reset_control *reset; + struct clk *clk; unsigned int i; int err; - gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL); - if (!gr3d) - return -ENOMEM; - - gr3d->soc = of_device_get_match_data(&pdev->dev); + /* + * Tegra20 device-tree doesn't specify 3d clock name and there is only + * one clock for Tegra20. Tegra30+ device-trees always specified names + * for the clocks. + */ + if (gr3d->nclocks == 1) { + if (id == TEGRA_POWERGATE_3D1) + return 0; + + clk = gr3d->clocks[0].clk; + } else { + for (i = 0; i < gr3d->nclocks; i++) { + if (WARN_ON(!gr3d->clocks[i].id)) + continue; + + if (!strcmp(gr3d->clocks[i].id, name)) { + clk = gr3d->clocks[i].clk; + break; + } + } - syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL); - if (!syncpts) - return -ENOMEM; + if (WARN_ON(i == gr3d->nclocks)) + return -EINVAL; + } - gr3d->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(gr3d->clk)) { - dev_err(&pdev->dev, "cannot get clock\n"); - return PTR_ERR(gr3d->clk); + /* + * We use array of resets, which includes MC resets, and MC + * reset shouldn't be asserted while hardware is gated because + * MC flushing will fail for gated hardware. Hence for legacy + * PD we request the individual reset separately. + */ + reset = reset_control_get_exclusive_released(dev, name); + if (IS_ERR(reset)) + return PTR_ERR(reset); + + err = reset_control_acquire(reset); + if (err) { + dev_err(dev, "failed to acquire %s reset: %d\n", name, err); + } else { + err = tegra_powergate_sequence_power_up(id, clk, reset); + reset_control_release(reset); } - gr3d->rst = devm_reset_control_get(&pdev->dev, "3d"); - if (IS_ERR(gr3d->rst)) { - dev_err(&pdev->dev, "cannot get reset\n"); - return PTR_ERR(gr3d->rst); + reset_control_put(reset); + if (err) + return err; + + /* + * tegra_powergate_sequence_power_up() leaves clocks enabled, + * while GENPD not. Hence keep clock-enable balanced. + */ + clk_disable_unprepare(clk); + + return 0; +} + +static void gr3d_del_link(void *link) +{ + device_link_del(link); +} + +static int gr3d_init_power(struct device *dev, struct gr3d *gr3d) +{ + static const char * const opp_genpd_names[] = { "3d0", "3d1", NULL }; + const u32 link_flags = DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME; + struct device **opp_virt_devs, *pd_dev; + struct device_link *link; + unsigned int i; + int err; + + err = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (err < 0) { + if (err != -ENOENT) + return err; + + /* + * Older device-trees don't use GENPD. In this case we should + * toggle power domain manually. + */ + err = gr3d_power_up_legacy_domain(dev, "3d", + TEGRA_POWERGATE_3D); + if (err) + return err; + + err = gr3d_power_up_legacy_domain(dev, "3d2", + TEGRA_POWERGATE_3D1); + if (err) + return err; + + return 0; } - if (of_device_is_compatible(np, "nvidia,tegra30-gr3d")) { - gr3d->clk_secondary = devm_clk_get(&pdev->dev, "3d2"); - if (IS_ERR(gr3d->clk_secondary)) { - dev_err(&pdev->dev, "cannot get secondary clock\n"); - return PTR_ERR(gr3d->clk_secondary); + /* + * The PM domain core automatically attaches a single power domain, + * otherwise it skips attaching completely. We have a single domain + * on Tegra20 and two domains on Tegra30+. + */ + if (dev->pm_domain) + return 0; + + err = devm_pm_opp_attach_genpd(dev, opp_genpd_names, &opp_virt_devs); + if (err) + return err; + + for (i = 0; opp_genpd_names[i]; i++) { + pd_dev = opp_virt_devs[i]; + if (!pd_dev) { + dev_err(dev, "failed to get %s power domain\n", + opp_genpd_names[i]); + return -EINVAL; } - gr3d->rst_secondary = devm_reset_control_get(&pdev->dev, - "3d2"); - if (IS_ERR(gr3d->rst_secondary)) { - dev_err(&pdev->dev, "cannot get secondary reset\n"); - return PTR_ERR(gr3d->rst_secondary); + link = device_link_add(dev, pd_dev, link_flags); + if (!link) { + dev_err(dev, "failed to link to %s\n", dev_name(pd_dev)); + return -EINVAL; } + + err = devm_add_action_or_reset(dev, gr3d_del_link, link); + if (err) + return err; } - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D, gr3d->clk, - gr3d->rst); + return 0; +} + +static int gr3d_get_clocks(struct device *dev, struct gr3d *gr3d) +{ + int err; + + err = devm_clk_bulk_get_all(dev, &gr3d->clocks); if (err < 0) { - dev_err(&pdev->dev, "failed to power up 3D unit\n"); + dev_err(dev, "failed to get clock: %d\n", err); return err; } + gr3d->nclocks = err; - if (gr3d->clk_secondary) { - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D1, - gr3d->clk_secondary, - gr3d->rst_secondary); - if (err < 0) { - dev_err(&pdev->dev, - "failed to power up secondary 3D unit\n"); - return err; - } + if (gr3d->nclocks != gr3d->soc->num_clocks) { + dev_err(dev, "invalid number of clocks: %u\n", gr3d->nclocks); + return -ENOENT; + } + + return 0; +} + +static int gr3d_get_resets(struct device *dev, struct gr3d *gr3d) +{ + int err; + + gr3d->resets[RST_MC].id = "mc"; + gr3d->resets[RST_MC2].id = "mc2"; + gr3d->resets[RST_GR3D].id = "3d"; + gr3d->resets[RST_GR3D2].id = "3d2"; + gr3d->nresets = gr3d->soc->num_resets; + + err = devm_reset_control_bulk_get_optional_exclusive_released( + dev, gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to get reset: %d\n", err); + return err; } + if (WARN_ON(!gr3d->resets[RST_GR3D].rstc) || + WARN_ON(!gr3d->resets[RST_GR3D2].rstc && gr3d->nresets == 4)) + return -ENOENT; + + return 0; +} + +static int gr3d_probe(struct platform_device *pdev) +{ + struct host1x_syncpt **syncpts; + struct gr3d *gr3d; + unsigned int i; + int err; + + gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL); + if (!gr3d) + return -ENOMEM; + + platform_set_drvdata(pdev, gr3d); + + gr3d->soc = of_device_get_match_data(&pdev->dev); + + syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL); + if (!syncpts) + return -ENOMEM; + + err = gr3d_get_clocks(&pdev->dev, gr3d); + if (err) + return err; + + err = gr3d_get_resets(&pdev->dev, gr3d); + if (err) + return err; + + err = gr3d_init_power(&pdev->dev, gr3d); + if (err) + return err; + INIT_LIST_HEAD(&gr3d->client.base.list); gr3d->client.base.ops = &gr3d_client_ops; gr3d->client.base.dev = &pdev->dev; @@ -352,6 +532,10 @@ static int gr3d_probe(struct platform_device *pdev) gr3d->client.version = gr3d->soc->version; gr3d->client.ops = &gr3d_ops; + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; + err = host1x_client_register(&gr3d->client.base); if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", @@ -363,8 +547,6 @@ static int gr3d_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(gr3d_addr_regs); i++) set_bit(gr3d_addr_regs[i], gr3d->addr_regs); - platform_set_drvdata(pdev, gr3d); - return 0; } @@ -380,23 +562,80 @@ static int gr3d_remove(struct platform_device *pdev) return err; } - if (gr3d->clk_secondary) { - reset_control_assert(gr3d->rst_secondary); - tegra_powergate_power_off(TEGRA_POWERGATE_3D1); - clk_disable_unprepare(gr3d->clk_secondary); + return 0; +} + +static int __maybe_unused gr3d_runtime_suspend(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(gr3d->channel); + + err = reset_control_bulk_assert(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + usleep_range(10, 20); + + /* + * Older device-trees don't specify MC resets and power-gating can't + * be done safely in that case. Hence we will keep the power ungated + * for older DTBs. For newer DTBs, GENPD will perform the power-gating. + */ + + clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks); + reset_control_bulk_release(gr3d->nresets, gr3d->resets); + + return 0; +} + +static int __maybe_unused gr3d_runtime_resume(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_bulk_prepare_enable(gr3d->nclocks, gr3d->clocks); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; } - reset_control_assert(gr3d->rst); - tegra_powergate_power_off(TEGRA_POWERGATE_3D); - clk_disable_unprepare(gr3d->clk); + err = reset_control_bulk_deassert(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } return 0; + +disable_clk: + clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks); +release_reset: + reset_control_bulk_release(gr3d->nresets, gr3d->resets); + + return err; } +static const struct dev_pm_ops tegra_gr3d_pm = { + SET_RUNTIME_PM_OPS(gr3d_runtime_suspend, gr3d_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + struct platform_driver tegra_gr3d_driver = { .driver = { .name = "tegra-gr3d", .of_match_table = tegra_gr3d_match, + .pm = &tegra_gr3d_pm, }, .probe = gr3d_probe, .remove = gr3d_remove, diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index e5d2a4026028..8845af5d325f 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -11,10 +11,14 @@ #include <linux/math64.h> #include <linux/module.h> #include <linux/of_device.h> +#include <linux/pm_opp.h> #include <linux/pm_runtime.h> #include <linux/regulator/consumer.h> #include <linux/reset.h> +#include <soc/tegra/common.h> +#include <sound/hdmi-codec.h> + #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_debugfs.h> @@ -78,6 +82,9 @@ struct tegra_hdmi { bool dvi; struct drm_info_list *debugfs_files; + + struct platform_device *audio_pdev; + struct mutex audio_lock; }; static inline struct tegra_hdmi * @@ -360,6 +367,18 @@ static const struct tmds_config tegra124_tmds_config[] = { }, }; +static void tegra_hdmi_audio_lock(struct tegra_hdmi *hdmi) +{ + mutex_lock(&hdmi->audio_lock); + disable_irq(hdmi->irq); +} + +static void tegra_hdmi_audio_unlock(struct tegra_hdmi *hdmi) +{ + enable_irq(hdmi->irq); + mutex_unlock(&hdmi->audio_lock); +} + static int tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pix_clock, struct tegra_hdmi_audio_config *config) @@ -829,6 +848,23 @@ static void tegra_hdmi_setup_tmds(struct tegra_hdmi *hdmi, HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT); } +static int tegra_hdmi_reconfigure_audio(struct tegra_hdmi *hdmi) +{ + int err; + + err = tegra_hdmi_setup_audio(hdmi); + if (err < 0) { + tegra_hdmi_disable_audio_infoframe(hdmi); + tegra_hdmi_disable_audio(hdmi); + } else { + tegra_hdmi_setup_audio_infoframe(hdmi); + tegra_hdmi_enable_audio_infoframe(hdmi); + tegra_hdmi_enable_audio(hdmi); + } + + return err; +} + static bool tegra_output_is_hdmi(struct tegra_output *output) { struct edid *edid; @@ -1135,6 +1171,8 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder) u32 value; int err; + tegra_hdmi_audio_lock(hdmi); + /* * The following accesses registers of the display controller, so make * sure it's only executed when the output is attached to one. @@ -1159,6 +1197,10 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder) tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE); tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK); + hdmi->pixel_clock = 0; + + tegra_hdmi_audio_unlock(hdmi); + err = host1x_client_suspend(&hdmi->client); if (err < 0) dev_err(hdmi->dev, "failed to suspend: %d\n", err); @@ -1182,6 +1224,8 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) return; } + tegra_hdmi_audio_lock(hdmi); + /* * Enable and unmask the HDA codec SCRATCH0 register interrupt. This * is used for interoperability between the HDA codec driver and the @@ -1195,7 +1239,7 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) h_back_porch = mode->htotal - mode->hsync_end; h_front_porch = mode->hsync_start - mode->hdisplay; - err = clk_set_rate(hdmi->clk, hdmi->pixel_clock); + err = dev_pm_opp_set_rate(hdmi->dev, hdmi->pixel_clock); if (err < 0) { dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n", err); @@ -1387,6 +1431,8 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) } /* TODO: add HDCP support */ + + tegra_hdmi_audio_unlock(hdmi); } static int @@ -1416,6 +1462,91 @@ static const struct drm_encoder_helper_funcs tegra_hdmi_encoder_helper_funcs = { .atomic_check = tegra_hdmi_encoder_atomic_check, }; +static int tegra_hdmi_hw_params(struct device *dev, void *data, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms) +{ + struct tegra_hdmi *hdmi = data; + int ret = 0; + + tegra_hdmi_audio_lock(hdmi); + + hdmi->format.sample_rate = hparms->sample_rate; + hdmi->format.channels = hparms->channels; + + if (hdmi->pixel_clock && !hdmi->dvi) + ret = tegra_hdmi_reconfigure_audio(hdmi); + + tegra_hdmi_audio_unlock(hdmi); + + return ret; +} + +static int tegra_hdmi_audio_startup(struct device *dev, void *data) +{ + struct tegra_hdmi *hdmi = data; + int ret; + + ret = host1x_client_resume(&hdmi->client); + if (ret < 0) + dev_err(hdmi->dev, "failed to resume: %d\n", ret); + + return ret; +} + +static void tegra_hdmi_audio_shutdown(struct device *dev, void *data) +{ + struct tegra_hdmi *hdmi = data; + int ret; + + tegra_hdmi_audio_lock(hdmi); + + hdmi->format.sample_rate = 0; + hdmi->format.channels = 0; + + tegra_hdmi_audio_unlock(hdmi); + + ret = host1x_client_suspend(&hdmi->client); + if (ret < 0) + dev_err(hdmi->dev, "failed to suspend: %d\n", ret); +} + +static const struct hdmi_codec_ops tegra_hdmi_codec_ops = { + .hw_params = tegra_hdmi_hw_params, + .audio_startup = tegra_hdmi_audio_startup, + .audio_shutdown = tegra_hdmi_audio_shutdown, +}; + +static int tegra_hdmi_codec_register(struct tegra_hdmi *hdmi) +{ + struct hdmi_codec_pdata codec_data = {}; + + if (hdmi->config->has_hda) + return 0; + + codec_data.ops = &tegra_hdmi_codec_ops; + codec_data.data = hdmi; + codec_data.spdif = 1; + + hdmi->audio_pdev = platform_device_register_data(hdmi->dev, + HDMI_CODEC_DRV_NAME, + PLATFORM_DEVID_AUTO, + &codec_data, + sizeof(codec_data)); + if (IS_ERR(hdmi->audio_pdev)) + return PTR_ERR(hdmi->audio_pdev); + + hdmi->format.channels = 2; + + return 0; +} + +static void tegra_hdmi_codec_unregister(struct tegra_hdmi *hdmi) +{ + if (hdmi->audio_pdev) + platform_device_unregister(hdmi->audio_pdev); +} + static int tegra_hdmi_init(struct host1x_client *client) { struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client); @@ -1453,28 +1584,47 @@ static int tegra_hdmi_init(struct host1x_client *client) if (err < 0) { dev_err(client->dev, "failed to enable HDMI regulator: %d\n", err); - return err; + goto output_exit; } err = regulator_enable(hdmi->pll); if (err < 0) { dev_err(hdmi->dev, "failed to enable PLL regulator: %d\n", err); - return err; + goto disable_hdmi; } err = regulator_enable(hdmi->vdd); if (err < 0) { dev_err(hdmi->dev, "failed to enable VDD regulator: %d\n", err); - return err; + goto disable_pll; + } + + err = tegra_hdmi_codec_register(hdmi); + if (err < 0) { + dev_err(hdmi->dev, "failed to register audio codec: %d\n", err); + goto disable_vdd; } return 0; + +disable_vdd: + regulator_disable(hdmi->vdd); +disable_pll: + regulator_disable(hdmi->pll); +disable_hdmi: + regulator_disable(hdmi->hdmi); +output_exit: + tegra_output_exit(&hdmi->output); + + return err; } static int tegra_hdmi_exit(struct host1x_client *client) { struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client); + tegra_hdmi_codec_unregister(hdmi); + tegra_output_exit(&hdmi->output); regulator_disable(hdmi->vdd); @@ -1599,7 +1749,6 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) { struct tegra_hdmi *hdmi = data; u32 value; - int err; value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_INT_STATUS); tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_INT_STATUS); @@ -1614,16 +1763,7 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK; tegra_hda_parse_format(format, &hdmi->format); - - err = tegra_hdmi_setup_audio(hdmi); - if (err < 0) { - tegra_hdmi_disable_audio_infoframe(hdmi); - tegra_hdmi_disable_audio(hdmi); - } else { - tegra_hdmi_setup_audio_infoframe(hdmi); - tegra_hdmi_enable_audio_infoframe(hdmi); - tegra_hdmi_enable_audio(hdmi); - } + tegra_hdmi_reconfigure_audio(hdmi); } else { tegra_hdmi_disable_audio_infoframe(hdmi); tegra_hdmi_disable_audio(hdmi); @@ -1651,6 +1791,8 @@ static int tegra_hdmi_probe(struct platform_device *pdev) hdmi->stereo = false; hdmi->dvi = false; + mutex_init(&hdmi->audio_lock); + hdmi->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(hdmi->clk)) { dev_err(&pdev->dev, "failed to get clock\n"); @@ -1732,7 +1874,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, hdmi); - pm_runtime_enable(&pdev->dev); + + err = devm_pm_runtime_enable(&pdev->dev); + if (err) + return err; + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; INIT_LIST_HEAD(&hdmi->client.list); hdmi->client.ops = &hdmi_client_ops; @@ -1753,8 +1902,6 @@ static int tegra_hdmi_remove(struct platform_device *pdev) struct tegra_hdmi *hdmi = platform_get_drvdata(pdev); int err; - pm_runtime_disable(&pdev->dev); - err = host1x_client_unregister(&hdmi->client); if (err < 0) { dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h index 3efa1be07ff8..23c4b2115ed1 100644 --- a/drivers/gpu/drm/tegra/hub.h +++ b/drivers/gpu/drm/tegra/hub.h @@ -72,7 +72,6 @@ to_tegra_display_hub_state(struct drm_private_state *priv) return container_of(priv, struct tegra_display_hub_state, base); } -struct tegra_dc; struct tegra_plane; int tegra_display_hub_prepare(struct tegra_display_hub *hub); diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c new file mode 100644 index 000000000000..79e1e88203cf --- /dev/null +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2015-2021, NVIDIA Corporation. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/host1x.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> + +#include <soc/tegra/pmc.h> + +#include "drm.h" +#include "falcon.h" +#include "vic.h" + +struct nvdec_config { + const char *firmware; + unsigned int version; + bool supports_sid; +}; + +struct nvdec { + struct falcon falcon; + + void __iomem *regs; + struct tegra_drm_client client; + struct host1x_channel *channel; + struct device *dev; + struct clk *clk; + + /* Platform configuration */ + const struct nvdec_config *config; +}; + +static inline struct nvdec *to_nvdec(struct tegra_drm_client *client) +{ + return container_of(client, struct nvdec, client); +} + +static inline void nvdec_writel(struct nvdec *nvdec, u32 value, + unsigned int offset) +{ + writel(value, nvdec->regs + offset); +} + +static int nvdec_boot(struct nvdec *nvdec) +{ +#ifdef CONFIG_IOMMU_API + struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev); +#endif + int err; + +#ifdef CONFIG_IOMMU_API + if (nvdec->config->supports_sid && spec) { + u32 value; + + value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW); + nvdec_writel(nvdec, value, VIC_TFBIF_TRANSCFG); + + if (spec->num_ids > 0) { + value = spec->ids[0] & 0xffff; + + nvdec_writel(nvdec, value, VIC_THI_STREAMID0); + nvdec_writel(nvdec, value, VIC_THI_STREAMID1); + } + } +#endif + + err = falcon_boot(&nvdec->falcon); + if (err < 0) + return err; + + err = falcon_wait_idle(&nvdec->falcon); + if (err < 0) { + dev_err(nvdec->dev, "falcon boot timed out\n"); + return err; + } + + return 0; +} + +static int nvdec_init(struct host1x_client *client) +{ + struct tegra_drm_client *drm = host1x_to_drm_client(client); + struct drm_device *dev = dev_get_drvdata(client->host); + struct tegra_drm *tegra = dev->dev_private; + struct nvdec *nvdec = to_nvdec(drm); + int err; + + err = host1x_client_iommu_attach(client); + if (err < 0 && err != -ENODEV) { + dev_err(nvdec->dev, "failed to attach to domain: %d\n", err); + return err; + } + + nvdec->channel = host1x_channel_request(client); + if (!nvdec->channel) { + err = -ENOMEM; + goto detach; + } + + client->syncpts[0] = host1x_syncpt_request(client, 0); + if (!client->syncpts[0]) { + err = -ENOMEM; + goto free_channel; + } + + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 500); + + err = tegra_drm_register_client(tegra, drm); + if (err < 0) + goto disable_rpm; + + /* + * Inherit the DMA parameters (such as maximum segment size) from the + * parent host1x device. + */ + client->dev->dma_parms = client->host->dma_parms; + + return 0; + +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + + host1x_syncpt_put(client->syncpts[0]); +free_channel: + host1x_channel_put(nvdec->channel); +detach: + host1x_client_iommu_detach(client); + + return err; +} + +static int nvdec_exit(struct host1x_client *client) +{ + struct tegra_drm_client *drm = host1x_to_drm_client(client); + struct drm_device *dev = dev_get_drvdata(client->host); + struct tegra_drm *tegra = dev->dev_private; + struct nvdec *nvdec = to_nvdec(drm); + int err; + + /* avoid a dangling pointer just in case this disappears */ + client->dev->dma_parms = NULL; + + err = tegra_drm_unregister_client(tegra, drm); + if (err < 0) + return err; + + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + + host1x_syncpt_put(client->syncpts[0]); + host1x_channel_put(nvdec->channel); + host1x_client_iommu_detach(client); + + nvdec->channel = NULL; + + if (client->group) { + dma_unmap_single(nvdec->dev, nvdec->falcon.firmware.phys, + nvdec->falcon.firmware.size, DMA_TO_DEVICE); + tegra_drm_free(tegra, nvdec->falcon.firmware.size, + nvdec->falcon.firmware.virt, + nvdec->falcon.firmware.iova); + } else { + dma_free_coherent(nvdec->dev, nvdec->falcon.firmware.size, + nvdec->falcon.firmware.virt, + nvdec->falcon.firmware.iova); + } + + return 0; +} + +static const struct host1x_client_ops nvdec_client_ops = { + .init = nvdec_init, + .exit = nvdec_exit, +}; + +static int nvdec_load_firmware(struct nvdec *nvdec) +{ + struct host1x_client *client = &nvdec->client.base; + struct tegra_drm *tegra = nvdec->client.drm; + dma_addr_t iova; + size_t size; + void *virt; + int err; + + if (nvdec->falcon.firmware.virt) + return 0; + + err = falcon_read_firmware(&nvdec->falcon, nvdec->config->firmware); + if (err < 0) + return err; + + size = nvdec->falcon.firmware.size; + + if (!client->group) { + virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); + + err = dma_mapping_error(nvdec->dev, iova); + if (err < 0) + return err; + } else { + virt = tegra_drm_alloc(tegra, size, &iova); + } + + nvdec->falcon.firmware.virt = virt; + nvdec->falcon.firmware.iova = iova; + + err = falcon_load_firmware(&nvdec->falcon); + if (err < 0) + goto cleanup; + + /* + * In this case we have received an IOVA from the shared domain, so we + * need to make sure to get the physical address so that the DMA API + * knows what memory pages to flush the cache for. + */ + if (client->group) { + dma_addr_t phys; + + phys = dma_map_single(nvdec->dev, virt, size, DMA_TO_DEVICE); + + err = dma_mapping_error(nvdec->dev, phys); + if (err < 0) + goto cleanup; + + nvdec->falcon.firmware.phys = phys; + } + + return 0; + +cleanup: + if (!client->group) + dma_free_coherent(nvdec->dev, size, virt, iova); + else + tegra_drm_free(tegra, size, virt, iova); + + return err; +} + + +static __maybe_unused int nvdec_runtime_resume(struct device *dev) +{ + struct nvdec *nvdec = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(nvdec->clk); + if (err < 0) + return err; + + usleep_range(10, 20); + + err = nvdec_load_firmware(nvdec); + if (err < 0) + goto disable; + + err = nvdec_boot(nvdec); + if (err < 0) + goto disable; + + return 0; + +disable: + clk_disable_unprepare(nvdec->clk); + return err; +} + +static __maybe_unused int nvdec_runtime_suspend(struct device *dev) +{ + struct nvdec *nvdec = dev_get_drvdata(dev); + + host1x_channel_stop(nvdec->channel); + + clk_disable_unprepare(nvdec->clk); + + return 0; +} + +static int nvdec_open_channel(struct tegra_drm_client *client, + struct tegra_drm_context *context) +{ + struct nvdec *nvdec = to_nvdec(client); + + context->channel = host1x_channel_get(nvdec->channel); + if (!context->channel) + return -ENOMEM; + + return 0; +} + +static void nvdec_close_channel(struct tegra_drm_context *context) +{ + host1x_channel_put(context->channel); +} + +static const struct tegra_drm_client_ops nvdec_ops = { + .open_channel = nvdec_open_channel, + .close_channel = nvdec_close_channel, + .submit = tegra_drm_submit, +}; + +#define NVIDIA_TEGRA_210_NVDEC_FIRMWARE "nvidia/tegra210/nvdec.bin" + +static const struct nvdec_config nvdec_t210_config = { + .firmware = NVIDIA_TEGRA_210_NVDEC_FIRMWARE, + .version = 0x21, + .supports_sid = false, +}; + +#define NVIDIA_TEGRA_186_NVDEC_FIRMWARE "nvidia/tegra186/nvdec.bin" + +static const struct nvdec_config nvdec_t186_config = { + .firmware = NVIDIA_TEGRA_186_NVDEC_FIRMWARE, + .version = 0x18, + .supports_sid = true, +}; + +#define NVIDIA_TEGRA_194_NVDEC_FIRMWARE "nvidia/tegra194/nvdec.bin" + +static const struct nvdec_config nvdec_t194_config = { + .firmware = NVIDIA_TEGRA_194_NVDEC_FIRMWARE, + .version = 0x19, + .supports_sid = true, +}; + +static const struct of_device_id tegra_nvdec_of_match[] = { + { .compatible = "nvidia,tegra210-nvdec", .data = &nvdec_t210_config }, + { .compatible = "nvidia,tegra186-nvdec", .data = &nvdec_t186_config }, + { .compatible = "nvidia,tegra194-nvdec", .data = &nvdec_t194_config }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_nvdec_of_match); + +static int nvdec_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct host1x_syncpt **syncpts; + struct nvdec *nvdec; + u32 host_class; + int err; + + /* inherit DMA mask from host1x parent */ + err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + nvdec = devm_kzalloc(dev, sizeof(*nvdec), GFP_KERNEL); + if (!nvdec) + return -ENOMEM; + + nvdec->config = of_device_get_match_data(dev); + + syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); + if (!syncpts) + return -ENOMEM; + + nvdec->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(nvdec->regs)) + return PTR_ERR(nvdec->regs); + + nvdec->clk = devm_clk_get(dev, NULL); + if (IS_ERR(nvdec->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + return PTR_ERR(nvdec->clk); + } + + err = clk_set_rate(nvdec->clk, ULONG_MAX); + if (err < 0) { + dev_err(&pdev->dev, "failed to set clock rate\n"); + return err; + } + + err = of_property_read_u32(dev->of_node, "nvidia,host1x-class", &host_class); + if (err < 0) + host_class = HOST1X_CLASS_NVDEC; + + nvdec->falcon.dev = dev; + nvdec->falcon.regs = nvdec->regs; + + err = falcon_init(&nvdec->falcon); + if (err < 0) + return err; + + platform_set_drvdata(pdev, nvdec); + + INIT_LIST_HEAD(&nvdec->client.base.list); + nvdec->client.base.ops = &nvdec_client_ops; + nvdec->client.base.dev = dev; + nvdec->client.base.class = host_class; + nvdec->client.base.syncpts = syncpts; + nvdec->client.base.num_syncpts = 1; + nvdec->dev = dev; + + INIT_LIST_HEAD(&nvdec->client.list); + nvdec->client.version = nvdec->config->version; + nvdec->client.ops = &nvdec_ops; + + err = host1x_client_register(&nvdec->client.base); + if (err < 0) { + dev_err(dev, "failed to register host1x client: %d\n", err); + goto exit_falcon; + } + + return 0; + +exit_falcon: + falcon_exit(&nvdec->falcon); + + return err; +} + +static int nvdec_remove(struct platform_device *pdev) +{ + struct nvdec *nvdec = platform_get_drvdata(pdev); + int err; + + err = host1x_client_unregister(&nvdec->client.base); + if (err < 0) { + dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", + err); + return err; + } + + falcon_exit(&nvdec->falcon); + + return 0; +} + +static const struct dev_pm_ops nvdec_pm_ops = { + SET_RUNTIME_PM_OPS(nvdec_runtime_suspend, nvdec_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + +struct platform_driver tegra_nvdec_driver = { + .driver = { + .name = "tegra-nvdec", + .of_match_table = tegra_nvdec_of_match, + .pm = &nvdec_pm_ops + }, + .probe = nvdec_probe, + .remove = nvdec_remove, +}; + +#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_210_NVDEC_FIRMWARE); +#endif +#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_186_NVDEC_FIRMWARE); +#endif +#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_194_NVDEC_FIRMWARE); +#endif diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 16a1cdc28657..321cb1f13da6 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -74,7 +74,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane) for (i = 0; i < 3; i++) { copy->iova[i] = DMA_MAPPING_ERROR; - copy->sgt[i] = NULL; + copy->map[i] = NULL; } return ©->base; @@ -138,55 +138,37 @@ const struct drm_plane_funcs tegra_plane_funcs = { static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev); unsigned int i; int err; for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - dma_addr_t phys_addr, *phys; - struct sg_table *sgt; + struct host1x_bo_mapping *map; - /* - * If we're not attached to a domain, we already stored the - * physical address when the buffer was allocated. If we're - * part of a group that's shared between all display - * controllers, we've also already mapped the framebuffer - * through the SMMU. In both cases we can short-circuit the - * code below and retrieve the stored IOV address. - */ - if (!domain || dc->client.group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dc->dev, &bo->base, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); + map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE, &dc->client.cache); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto unpin; } - if (sgt) { - err = dma_map_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto unpin; - + if (!dc->client.group) { /* * The display controller needs contiguous memory, so * fail if the buffer is discontiguous and we fail to * map its SG table to a single contiguous chunk of * I/O virtual memory. */ - if (sgt->nents > 1) { + if (map->chunks > 1) { err = -EINVAL; goto unpin; } - state->iova[i] = sg_dma_address(sgt->sgl); - state->sgt[i] = sgt; + state->iova[i] = map->phys; } else { - state->iova[i] = phys_addr; + state->iova[i] = bo->iova; } + + state->map[i] = map; } return 0; @@ -195,15 +177,9 @@ unpin: dev_err(dc->dev, "failed to map plane %u: %d\n", i, err); while (i--) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - if (sgt) - dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - - host1x_bo_unpin(dc->dev, &bo->base, sgt); + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; } return err; @@ -214,15 +190,9 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) unsigned int i; for (i = 0; i < state->base.fb->format->num_planes; i++) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - if (sgt) - dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - - host1x_bo_unpin(dc->dev, &bo->base, sgt); + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; } } @@ -230,11 +200,14 @@ int tegra_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { struct tegra_dc *dc = to_tegra_dc(state->crtc); + int err; if (!state->fb) return 0; - drm_gem_plane_helper_prepare_fb(plane, state); + err = drm_gem_plane_helper_prepare_fb(plane, state); + if (err < 0) + return err; return tegra_dc_pin(dc, to_tegra_plane_state(state)); } diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index d9470780c803..dfb20712fbd7 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -43,7 +43,7 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base; - struct sg_table *sgt[3]; + struct host1x_bo_mapping *map[3]; dma_addr_t iova[3]; struct tegra_bo_tiling tiling; diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 606c78a2b988..ff8fce36d2aa 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -17,6 +17,8 @@ struct tegra_rgb { struct tegra_output output; struct tegra_dc *dc; + struct clk *pll_d_out0; + struct clk *pll_d2_out0; struct clk *clk_parent; struct clk *clk; }; @@ -116,13 +118,21 @@ static void tegra_rgb_encoder_enable(struct drm_encoder *encoder) DISP_ORDER_RED_BLUE; tegra_dc_writel(rgb->dc, value, DC_DISP_DISP_INTERFACE_CONTROL); - /* XXX: parameterize? */ - value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE; - tegra_dc_writel(rgb->dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS); - tegra_dc_commit(rgb->dc); } +static bool tegra_rgb_pll_rate_change_allowed(struct tegra_rgb *rgb) +{ + if (!rgb->pll_d2_out0) + return false; + + if (!clk_is_match(rgb->clk_parent, rgb->pll_d_out0) && + !clk_is_match(rgb->clk_parent, rgb->pll_d2_out0)) + return false; + + return true; +} + static int tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, @@ -151,8 +161,17 @@ tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder, * and hope that the desired frequency can be matched (or at least * matched sufficiently close that the panel will still work). */ - div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2; - pclk = 0; + if (tegra_rgb_pll_rate_change_allowed(rgb)) { + /* + * Set display controller clock to x2 of PCLK in order to + * produce higher resolution pulse positions. + */ + div = 2; + pclk *= 2; + } else { + div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2; + pclk = 0; + } err = tegra_dc_state_setup_clock(dc, crtc_state, rgb->clk_parent, pclk, div); @@ -210,6 +229,22 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) return err; } + rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0"); + if (IS_ERR(rgb->pll_d_out0)) { + err = PTR_ERR(rgb->pll_d_out0); + dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err); + return err; + } + + if (dc->soc->has_pll_d2_out0) { + rgb->pll_d2_out0 = clk_get_sys(NULL, "pll_d2_out0"); + if (IS_ERR(rgb->pll_d2_out0)) { + err = PTR_ERR(rgb->pll_d2_out0); + dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); + return err; + } + } + dc->rgb = &rgb->output; return 0; @@ -217,9 +252,15 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) int tegra_dc_rgb_remove(struct tegra_dc *dc) { + struct tegra_rgb *rgb; + if (!dc->rgb) return 0; + rgb = to_rgb(dc->rgb); + clk_put(rgb->pll_d2_out0); + clk_put(rgb->pll_d_out0); + tegra_output_remove(dc->rgb); dc->rgb = NULL; diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 776f825df52f..6d6dd8c35475 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -64,33 +64,62 @@ static void gather_bo_put(struct host1x_bo *host_bo) kref_put(&bo->ref, gather_bo_release); } -static struct sg_table * -gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) +static struct host1x_bo_mapping * +gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction) { - struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); - struct sg_table *sgt; + struct gather_bo *gather = container_of(bo, struct gather_bo, base); + struct host1x_bo_mapping *map; int err; - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) return ERR_PTR(-ENOMEM); - err = dma_get_sgtable(bo->dev, sgt, bo->gather_data, bo->gather_data_dma, - bo->gather_data_words * 4); - if (err) { - kfree(sgt); - return ERR_PTR(err); + kref_init(&map->ref); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; } - return sgt; + err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma, + gather->gather_data_words * 4); + if (err) + goto free_sgt; + + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = gather->gather_data_words * 4; + map->chunks = err; + + return map; + +free_sgt: + sg_free_table(map->sgt); + kfree(map->sgt); +free: + kfree(map); + return ERR_PTR(err); } -static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) +static void gather_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); - } + if (!map) + return; + + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); + host1x_bo_put(map->bo); + + kfree(map); } static void *gather_bo_mmap(struct host1x_bo *host_bo) @@ -475,8 +504,8 @@ static void release_job(struct host1x_job *job) kfree(job_data->used_mappings); kfree(job_data); - if (pm_runtime_enabled(client->base.dev)) - pm_runtime_put_autosuspend(client->base.dev); + pm_runtime_mark_last_busy(client->base.dev); + pm_runtime_put_autosuspend(client->base.dev); } int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, @@ -560,12 +589,10 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, } /* Boot engine. */ - if (pm_runtime_enabled(context->client->base.dev)) { - err = pm_runtime_resume_and_get(context->client->base.dev); - if (err < 0) { - SUBMIT_ERR(context, "could not power up engine: %d", err); - goto unpin_job; - } + err = pm_runtime_resume_and_get(context->client->base.dev); + if (err < 0) { + SUBMIT_ERR(context, "could not power up engine: %d", err); + goto unpin_job; } job->user_data = job_data; diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index 690a339c52ec..9ab9179d2026 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -17,11 +17,7 @@ static void tegra_drm_mapping_release(struct kref *ref) struct tegra_drm_mapping *mapping = container_of(ref, struct tegra_drm_mapping, ref); - if (mapping->sgt) - dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - - host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_unpin(mapping->map); host1x_bo_put(mapping->bo); kfree(mapping); @@ -159,6 +155,7 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f struct drm_tegra_channel_map *args = data; struct tegra_drm_mapping *mapping; struct tegra_drm_context *context; + enum dma_data_direction direction; int err = 0; if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE) @@ -180,68 +177,53 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f kref_init(&mapping->ref); - mapping->dev = context->client->base.dev; mapping->bo = tegra_gem_lookup(file, args->handle); if (!mapping->bo) { err = -EINVAL; - goto unlock; + goto free; } - if (context->client->base.group) { - /* IOMMU domain managed directly using IOMMU API */ - host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova); - } else { - switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { - case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: - mapping->direction = DMA_BIDIRECTIONAL; - break; - - case DRM_TEGRA_CHANNEL_MAP_WRITE: - mapping->direction = DMA_FROM_DEVICE; - break; - - case DRM_TEGRA_CHANNEL_MAP_READ: - mapping->direction = DMA_TO_DEVICE; - break; + switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { + case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: + direction = DMA_BIDIRECTIONAL; + break; - default: - return -EINVAL; - } + case DRM_TEGRA_CHANNEL_MAP_WRITE: + direction = DMA_FROM_DEVICE; + break; - mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL); - if (IS_ERR(mapping->sgt)) { - err = PTR_ERR(mapping->sgt); - goto put_gem; - } + case DRM_TEGRA_CHANNEL_MAP_READ: + direction = DMA_TO_DEVICE; + break; - err = dma_map_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - if (err) - goto unpin; + default: + err = -EINVAL; + goto put_gem; + } - mapping->iova = sg_dma_address(mapping->sgt->sgl); + mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction, NULL); + if (IS_ERR(mapping->map)) { + err = PTR_ERR(mapping->map); + goto put_gem; } + mapping->iova = mapping->map->phys; mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->gem.size; err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX), GFP_KERNEL); if (err < 0) - goto unmap; + goto unpin; mutex_unlock(&fpriv->lock); return 0; -unmap: - if (mapping->sgt) { - dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - } unpin: - host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_unpin(mapping->map); put_gem: host1x_bo_put(mapping->bo); +free: kfree(mapping); unlock: mutex_unlock(&fpriv->lock); diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h index 12adad770ad3..92ff1e44ff15 100644 --- a/drivers/gpu/drm/tegra/uapi.h +++ b/drivers/gpu/drm/tegra/uapi.h @@ -27,10 +27,9 @@ struct tegra_drm_file { struct tegra_drm_mapping { struct kref ref; - struct device *dev; + struct host1x_bo_mapping *map; struct host1x_bo *bo; - struct sg_table *sgt; - enum dma_data_direction direction; + dma_addr_t iova; dma_addr_t iova_end; }; diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index c02010ff2b7f..1e342fa3d27b 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -5,6 +5,7 @@ #include <linux/clk.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> #include <linux/host1x.h> #include <linux/iommu.h> #include <linux/module.h> @@ -151,9 +152,13 @@ static int vic_init(struct host1x_client *client) goto free_channel; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 500); + err = tegra_drm_register_client(tegra, drm); if (err < 0) - goto free_syncpt; + goto disable_rpm; /* * Inherit the DMA parameters (such as maximum segment size) from the @@ -163,7 +168,10 @@ static int vic_init(struct host1x_client *client) return 0; -free_syncpt: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_syncpt_put(client->syncpts[0]); free_channel: host1x_channel_put(vic->channel); @@ -188,10 +196,15 @@ static int vic_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(vic->channel); host1x_client_iommu_detach(client); + vic->channel = NULL; + if (client->group) { dma_unmap_single(vic->dev, vic->falcon.firmware.phys, vic->falcon.firmware.size, DMA_TO_DEVICE); @@ -232,12 +245,12 @@ static int vic_load_firmware(struct vic *vic) if (!client->group) { virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(vic->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); + if (IS_ERR(virt)) + return PTR_ERR(virt); } vic->falcon.firmware.virt = virt; @@ -315,6 +328,8 @@ static int vic_runtime_suspend(struct device *dev) struct vic *vic = dev_get_drvdata(dev); int err; + host1x_channel_stop(vic->channel); + err = reset_control_assert(vic->rst); if (err < 0) return err; @@ -330,27 +345,17 @@ static int vic_open_channel(struct tegra_drm_client *client, struct tegra_drm_context *context) { struct vic *vic = to_vic(client); - int err; - - err = pm_runtime_resume_and_get(vic->dev); - if (err < 0) - return err; context->channel = host1x_channel_get(vic->channel); - if (!context->channel) { - pm_runtime_put(vic->dev); + if (!context->channel) return -ENOMEM; - } return 0; } static void vic_close_channel(struct tegra_drm_context *context) { - struct vic *vic = to_vic(context->client); - host1x_channel_put(context->channel); - pm_runtime_put(vic->dev); } static const struct tegra_drm_client_ops vic_ops = { @@ -441,6 +446,12 @@ static int vic_probe(struct platform_device *pdev) return PTR_ERR(vic->clk); } + err = clk_set_rate(vic->clk, ULONG_MAX); + if (err < 0) { + dev_err(&pdev->dev, "failed to set clock rate\n"); + return err; + } + if (!dev->pm_domain) { vic->rst = devm_reset_control_get(dev, "vic"); if (IS_ERR(vic->rst)) { @@ -476,17 +487,8 @@ static int vic_probe(struct platform_device *pdev) goto exit_falcon; } - pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - err = vic_runtime_resume(&pdev->dev); - if (err < 0) - goto unregister_client; - } - return 0; -unregister_client: - host1x_client_unregister(&vic->client.base); exit_falcon: falcon_exit(&vic->falcon); @@ -505,11 +507,6 @@ static int vic_remove(struct platform_device *pdev) return err; } - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_disable(&pdev->dev); - else - vic_runtime_suspend(&pdev->dev); - falcon_exit(&vic->falcon); return 0; @@ -517,6 +514,8 @@ static int vic_remove(struct platform_device *pdev) static const struct dev_pm_ops vic_pm_ops = { SET_RUNTIME_PM_OPS(vic_runtime_suspend, vic_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; struct platform_driver tegra_vic_driver = { diff --git a/drivers/gpu/drm/tidss/Kconfig b/drivers/gpu/drm/tidss/Kconfig index f790a5215302..bc4fa59b6fa9 100644 --- a/drivers/gpu/drm/tidss/Kconfig +++ b/drivers/gpu/drm/tidss/Kconfig @@ -3,7 +3,6 @@ config DRM_TIDSS depends on DRM && OF depends on ARM || ARM64 || COMPILE_TEST select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER help The TI Keystone family SoCs introduced a new generation of diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c index 4366b5c798e0..7c784e90e40e 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.c +++ b/drivers/gpu/drm/tidss/tidss_drv.c @@ -88,7 +88,7 @@ static int __maybe_unused tidss_resume(struct device *dev) return drm_mode_config_helper_resume(&tidss->ddev); } -static const struct dev_pm_ops tidss_pm_ops = { +static __maybe_unused const struct dev_pm_ops tidss_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(tidss_suspend, tidss_resume) SET_RUNTIME_PM_OPS(tidss_pm_runtime_suspend, tidss_pm_runtime_resume, NULL) }; diff --git a/drivers/gpu/drm/tilcdc/Kconfig b/drivers/gpu/drm/tilcdc/Kconfig index 9f505a149990..e315591eb36b 100644 --- a/drivers/gpu/drm/tilcdc/Kconfig +++ b/drivers/gpu/drm/tilcdc/Kconfig @@ -3,7 +3,6 @@ config DRM_TILCDC tristate "DRM Support for TI LCDC Display Controller" depends on DRM && OF && ARM select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_BRIDGE select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 4ca309f80dee..712e0004e96e 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -3,7 +3,7 @@ config DRM_ARCPGU tristate "ARC PGU" depends on DRM && OF - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER help Choose this option if you have an ARC PGU controller. @@ -71,7 +71,7 @@ config TINYDRM_HX8357D tristate "DRM support for HX8357D display panels" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI select BACKLIGHT_CLASS_DEVICE help @@ -84,7 +84,7 @@ config TINYDRM_ILI9163 tristate "DRM support for ILI9163 display panels" depends on DRM && SPI select BACKLIGHT_CLASS_DEVICE - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER select DRM_MIPI_DBI help @@ -97,7 +97,7 @@ config TINYDRM_ILI9225 tristate "DRM support for ILI9225 display panels" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI help DRM driver for the following Ilitek ILI9225 panels: @@ -109,7 +109,7 @@ config TINYDRM_ILI9341 tristate "DRM support for ILI9341 display panels" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI select BACKLIGHT_CLASS_DEVICE help @@ -122,7 +122,7 @@ config TINYDRM_ILI9486 tristate "DRM support for ILI9486 display panels" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI select BACKLIGHT_CLASS_DEVICE help @@ -136,7 +136,7 @@ config TINYDRM_MI0283QT tristate "DRM support for MI0283QT" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI select BACKLIGHT_CLASS_DEVICE help @@ -147,7 +147,7 @@ config TINYDRM_REPAPER tristate "DRM support for Pervasive Displays RePaper panels (V231)" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER help DRM driver for the following Pervasive Displays panels: 1.44" TFT EPD Panel (E1144CS021) @@ -161,7 +161,7 @@ config TINYDRM_ST7586 tristate "DRM support for Sitronix ST7586 display panels" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI help DRM driver for the following Sitronix ST7586 panels: @@ -173,7 +173,7 @@ config TINYDRM_ST7735R tristate "DRM support for Sitronix ST7715R/ST7735R display panels" depends on DRM && SPI select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER select DRM_MIPI_DBI select BACKLIGHT_CLASS_DEVICE help diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 2ce3bd903b70..fc26a1ce11ee 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include <linux/console.h> #include <linux/pci.h> #include <drm/drm_aperture.h> @@ -719,7 +718,7 @@ static struct pci_driver bochs_pci_driver = { static int __init bochs_init(void) { - if (vgacon_text_force() && bochs_modeset == -1) + if (drm_firmware_drivers_only() && bochs_modeset == -1) return -EINVAL; if (bochs_modeset == 0) diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c index 9327001d35dd..c95d9ff7d600 100644 --- a/drivers/gpu/drm/tiny/cirrus.c +++ b/drivers/gpu/drm/tiny/cirrus.c @@ -16,7 +16,6 @@ * Copyright 1999-2001 Jeff Garzik <jgarzik@pobox.com> */ -#include <linux/console.h> #include <linux/dma-buf-map.h> #include <linux/module.h> #include <linux/pci.h> @@ -636,8 +635,9 @@ static struct pci_driver cirrus_pci_driver = { static int __init cirrus_init(void) { - if (vgacon_text_force()) + if (drm_firmware_drivers_only()) return -EINVAL; + return pci_register_driver(&cirrus_pci_driver); } diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 2f999915b9aa..b977f5c94562 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -571,8 +571,8 @@ static const uint32_t simpledrm_default_formats[] = { //DRM_FORMAT_XRGB1555, //DRM_FORMAT_ARGB1555, DRM_FORMAT_RGB888, - //DRM_FORMAT_XRGB2101010, - //DRM_FORMAT_ARGB2101010, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, }; static const uint64_t simpledrm_format_modifiers[] = { diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e4a20a3a5d16..db3dc7ef5382 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1086,7 +1086,6 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, if (timeout == 0) return -EBUSY; - dma_resv_add_excl_fence(bo->base.resv, NULL); return 0; } EXPORT_SYMBOL(ttm_bo_wait); @@ -1105,7 +1104,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, * as an indication that we're about to swap out. */ memset(&place, 0, sizeof(place)); - place.mem_type = TTM_PL_SYSTEM; + place.mem_type = bo->resource->mem_type; if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY; @@ -1137,6 +1136,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, struct ttm_place hop; memset(&hop, 0, sizeof(hop)); + place.mem_type = TTM_PL_SYSTEM; ret = ttm_resource_alloc(bo, &place, &evict_mem); if (unlikely(ret)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index 67d68a4a8640..072e0baf2ab4 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -128,15 +128,17 @@ static const struct ttm_resource_manager_func ttm_range_manager_func = { }; /** - * ttm_range_man_init + * ttm_range_man_init_nocheck - Initialise a generic range manager for the + * selected memory type. * * @bdev: ttm device * @type: memory manager type * @use_tt: if the memory manager uses tt * @p_size: size of area to be managed in pages. * - * Initialise a generic range manager for the selected memory type. * The range manager is installed for this device in the type slot. + * + * Return: %0 on success or a negative error code on failure */ int ttm_range_man_init_nocheck(struct ttm_device *bdev, unsigned type, bool use_tt, @@ -166,12 +168,13 @@ int ttm_range_man_init_nocheck(struct ttm_device *bdev, EXPORT_SYMBOL(ttm_range_man_init_nocheck); /** - * ttm_range_man_fini + * ttm_range_man_fini_nocheck - Remove the generic range manager from a slot + * and tear it down. * * @bdev: ttm device * @type: memory manager type * - * Remove the generic range manager from a slot and tear it down. + * Return: %0 on success or a negative error code on failure */ int ttm_range_man_fini_nocheck(struct ttm_device *bdev, unsigned type) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 7e83c00a3f48..79c870a3bef8 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -34,6 +34,7 @@ #include <linux/sched.h> #include <linux/shmem_fs.h> #include <linux/file.h> +#include <linux/module.h> #include <drm/drm_cache.h> #include <drm/ttm/ttm_bo_driver.h> diff --git a/drivers/gpu/drm/tve200/Kconfig b/drivers/gpu/drm/tve200/Kconfig index e2d163c74ed6..47a7dbe6c114 100644 --- a/drivers/gpu/drm/tve200/Kconfig +++ b/drivers/gpu/drm/tve200/Kconfig @@ -8,7 +8,6 @@ config DRM_TVE200 select DRM_BRIDGE select DRM_PANEL_BRIDGE select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE help diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 0d9af62f69ad..6e3113f419f4 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -70,11 +70,11 @@ struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size) struct drm_gem_object *obj; if (size == 0) - return NULL; + return ERR_PTR(-EINVAL); bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) - return NULL; + return ERR_PTR(-ENOMEM); obj = &bo->base.base; obj->funcs = &v3d_gem_funcs; diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index a6c81af37345..f35d9e44c6b7 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -7,7 +7,6 @@ * Michael Thayer <michael.thayer@oracle.com, * Hans de Goede <hdegoede@redhat.com> */ -#include <linux/console.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/vt_kern.h> @@ -193,10 +192,8 @@ static const struct drm_driver driver = { static int __init vbox_init(void) { -#ifdef CONFIG_VGA_CONSOLE - if (vgacon_text_force() && vbox_modeset == -1) + if (drm_firmware_drivers_only() && vbox_modeset == -1) return -EINVAL; -#endif if (vbox_modeset == 0) return -EINVAL; diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c index f28779715ccd..c9e8b3a63c62 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_main.c +++ b/drivers/gpu/drm/vboxvideo/vbox_main.c @@ -127,8 +127,8 @@ int vbox_hw_init(struct vbox_private *vbox) /* Create guest-heap mem-pool use 2^4 = 16 byte chunks */ vbox->guest_pool = devm_gen_pool_create(vbox->ddev.dev, 4, -1, "vboxvideo-accel"); - if (!vbox->guest_pool) - return -ENOMEM; + if (IS_ERR(vbox->guest_pool)) + return PTR_ERR(vbox->guest_pool); ret = gen_pool_add_virt(vbox->guest_pool, (unsigned long)vbox->guest_heap, diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 345a5570a3da..de3424fed2fc 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -6,7 +6,6 @@ config DRM_VC4 depends on SND && SND_SOC depends on COMMON_CLK select DRM_KMS_HELPER - select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_PANEL_BRIDGE select SND_PCM diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index fddaeb0b09c1..6d1281a343e9 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -177,7 +177,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo) bo->validated_shader = NULL; } - drm_gem_cma_free_object(obj); + drm_gem_cma_free(&bo->base); } static void vc4_bo_remove_from_cache(struct vc4_bo *bo) @@ -720,7 +720,7 @@ static int vc4_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct return -EINVAL; } - return drm_gem_cma_mmap(obj, vma); + return drm_gem_cma_mmap(&bo->base, vma); } static const struct vm_operations_struct vc4_vm_ops = { @@ -732,8 +732,8 @@ static const struct vm_operations_struct vc4_vm_ops = { static const struct drm_gem_object_funcs vc4_gem_object_funcs = { .free = vc4_free_object, .export = vc4_prime_export, - .get_sg_table = drm_gem_cma_get_sg_table, - .vmap = drm_gem_cma_vmap, + .get_sg_table = drm_gem_cma_object_get_sg_table, + .vmap = drm_gem_cma_object_vmap, .mmap = vc4_gem_object_mmap, .vm_ops = &vc4_vm_ops, }; diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 79d4d9dd1394..24de29bc1cda 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -340,19 +340,19 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_hvs *hvs = vc4->hvs; - struct drm_crtc_state *old_crtc_state; struct drm_crtc_state *new_crtc_state; struct vc4_hvs_state *new_hvs_state; struct drm_crtc *crtc; struct vc4_hvs_state *old_hvs_state; + unsigned int channel; int i; old_hvs_state = vc4_hvs_get_old_global_state(state); - if (WARN_ON(!old_hvs_state)) + if (WARN_ON(IS_ERR(old_hvs_state))) return; new_hvs_state = vc4_hvs_get_new_global_state(state); - if (WARN_ON(!new_hvs_state)) + if (WARN_ON(IS_ERR(new_hvs_state))) return; for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { @@ -365,31 +365,32 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) vc4_hvs_mask_underrun(dev, vc4_crtc_state->assigned_channel); } - if (vc4->hvs->hvs5) { - unsigned long core_rate = max_t(unsigned long, - 500000000, - new_hvs_state->core_clock_rate); - - clk_set_min_rate(hvs->core_clk, core_rate); - } - - for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { - struct vc4_crtc_state *vc4_crtc_state = - to_vc4_crtc_state(old_crtc_state); - unsigned int channel = vc4_crtc_state->assigned_channel; + for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) { + struct drm_crtc_commit *commit; int ret; - if (channel == VC4_HVS_CHANNEL_DISABLED) + if (!old_hvs_state->fifo_state[channel].in_use) continue; - if (!old_hvs_state->fifo_state[channel].in_use) + commit = old_hvs_state->fifo_state[channel].pending_commit; + if (!commit) continue; - ret = drm_crtc_commit_wait(old_hvs_state->fifo_state[channel].pending_commit); + ret = drm_crtc_commit_wait(commit); if (ret) drm_err(dev, "Timed out waiting for commit\n"); + + drm_crtc_commit_put(commit); + old_hvs_state->fifo_state[channel].pending_commit = NULL; } + if (vc4->hvs->hvs5) { + unsigned long core_rate = max_t(unsigned long, + 500000000, + new_hvs_state->core_clock_rate); + + clk_set_min_rate(hvs->core_clk, core_rate); + } drm_atomic_helper_commit_modeset_disables(dev, state); vc4_ctm_commit(vc4, state); @@ -427,8 +428,8 @@ static int vc4_atomic_commit_setup(struct drm_atomic_state *state) unsigned int i; hvs_state = vc4_hvs_get_new_global_state(state); - if (!hvs_state) - return -EINVAL; + if (WARN_ON(IS_ERR(hvs_state))) + return PTR_ERR(hvs_state); for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct vc4_crtc_state *vc4_crtc_state = @@ -676,12 +677,6 @@ vc4_hvs_channels_duplicate_state(struct drm_private_obj *obj) for (i = 0; i < HVS_NUM_CHANNELS; i++) { state->fifo_state[i].in_use = old_state->fifo_state[i].in_use; state->fifo_state[i].fifo_load = old_state->fifo_state[i].fifo_load; - - if (!old_state->fifo_state[i].pending_commit) - continue; - - state->fifo_state[i].pending_commit = - drm_crtc_commit_get(old_state->fifo_state[i].pending_commit); } state->core_clock_rate = old_state->core_clock_rate; @@ -772,8 +767,8 @@ static int vc4_pv_muxing_atomic_check(struct drm_device *dev, unsigned int i; hvs_new_state = vc4_hvs_get_global_state(state); - if (!hvs_new_state) - return -EINVAL; + if (IS_ERR(hvs_new_state)) + return PTR_ERR(hvs_new_state); for (i = 0; i < ARRAY_SIZE(hvs_new_state->fifo_state); i++) if (!hvs_new_state->fifo_state[i].in_use) @@ -862,8 +857,8 @@ vc4_core_clock_atomic_check(struct drm_atomic_state *state) load_state = to_vc4_load_tracker_state(priv_state); hvs_new_state = vc4_hvs_get_global_state(state); - if (!hvs_new_state) - return -EINVAL; + if (IS_ERR(hvs_new_state)) + return PTR_ERR(hvs_new_state); for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ac761c683663..920a9eefe426 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -33,6 +33,7 @@ static const struct hvs_format { u32 hvs; /* HVS_FORMAT_* */ u32 pixel_order; u32 pixel_order_hvs5; + bool hvs5_only; } hvs_formats[] = { { .drm = DRM_FORMAT_XRGB8888, @@ -128,6 +129,12 @@ static const struct hvs_format { .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, .pixel_order = HVS_PIXEL_ORDER_XYCRCB, }, + { + .drm = DRM_FORMAT_P030, + .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, + .pixel_order = HVS_PIXEL_ORDER_XYCBCR, + .hvs5_only = true, + }, }; static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) @@ -616,6 +623,51 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state) return 0; } +/* + * The colorspace conversion matrices are held in 3 entries in the dlist. + * Create an array of them, with entries for each full and limited mode, and + * each supported colorspace. + */ +static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { + { + /* Limited range */ + { + /* BT601 */ + SCALER_CSC0_ITR_R_601_5, + SCALER_CSC1_ITR_R_601_5, + SCALER_CSC2_ITR_R_601_5, + }, { + /* BT709 */ + SCALER_CSC0_ITR_R_709_3, + SCALER_CSC1_ITR_R_709_3, + SCALER_CSC2_ITR_R_709_3, + }, { + /* BT2020 */ + SCALER_CSC0_ITR_R_2020, + SCALER_CSC1_ITR_R_2020, + SCALER_CSC2_ITR_R_2020, + } + }, { + /* Full range */ + { + /* JFIF */ + SCALER_CSC0_JPEG_JFIF, + SCALER_CSC1_JPEG_JFIF, + SCALER_CSC2_JPEG_JFIF, + }, { + /* BT709 */ + SCALER_CSC0_ITR_R_709_3_FR, + SCALER_CSC1_ITR_R_709_3_FR, + SCALER_CSC2_ITR_R_709_3_FR, + }, { + /* BT2020 */ + SCALER_CSC0_ITR_R_2020_FR, + SCALER_CSC1_ITR_R_2020_FR, + SCALER_CSC2_ITR_R_2020_FR, + } + } +}; + /* Writes out a full display list for an active plane to the plane's * private dlist state. */ @@ -762,47 +814,90 @@ static int vc4_plane_mode_set(struct drm_plane *plane, case DRM_FORMAT_MOD_BROADCOM_SAND128: case DRM_FORMAT_MOD_BROADCOM_SAND256: { uint32_t param = fourcc_mod_broadcom_param(fb->modifier); - u32 tile_w, tile, x_off, pix_per_tile; - - hvs_format = HVS_PIXEL_FORMAT_H264; - - switch (base_format_mod) { - case DRM_FORMAT_MOD_BROADCOM_SAND64: - tiling = SCALER_CTL0_TILING_64B; - tile_w = 64; - break; - case DRM_FORMAT_MOD_BROADCOM_SAND128: - tiling = SCALER_CTL0_TILING_128B; - tile_w = 128; - break; - case DRM_FORMAT_MOD_BROADCOM_SAND256: - tiling = SCALER_CTL0_TILING_256B_OR_T; - tile_w = 256; - break; - default: - break; - } if (param > SCALER_TILE_HEIGHT_MASK) { - DRM_DEBUG_KMS("SAND height too large (%d)\n", param); + DRM_DEBUG_KMS("SAND height too large (%d)\n", + param); return -EINVAL; } - pix_per_tile = tile_w / fb->format->cpp[0]; - tile = vc4_state->src_x / pix_per_tile; - x_off = vc4_state->src_x % pix_per_tile; + if (fb->format->format == DRM_FORMAT_P030) { + hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; + tiling = SCALER_CTL0_TILING_128B; + } else { + hvs_format = HVS_PIXEL_FORMAT_H264; + + switch (base_format_mod) { + case DRM_FORMAT_MOD_BROADCOM_SAND64: + tiling = SCALER_CTL0_TILING_64B; + break; + case DRM_FORMAT_MOD_BROADCOM_SAND128: + tiling = SCALER_CTL0_TILING_128B; + break; + case DRM_FORMAT_MOD_BROADCOM_SAND256: + tiling = SCALER_CTL0_TILING_256B_OR_T; + break; + default: + return -EINVAL; + } + } /* Adjust the base pointer to the first pixel to be scanned * out. + * + * For P030, y_ptr [31:4] is the 128bit word for the start pixel + * y_ptr [3:0] is the pixel (0-11) contained within that 128bit + * word that should be taken as the first pixel. + * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the + * element within the 128bit word, eg for pixel 3 the value + * should be 6. */ for (i = 0; i < num_planes; i++) { + u32 tile_w, tile, x_off, pix_per_tile; + + if (fb->format->format == DRM_FORMAT_P030) { + /* + * Spec says: bits [31:4] of the given address + * should point to the 128-bit word containing + * the desired starting pixel, and bits[3:0] + * should be between 0 and 11, indicating which + * of the 12-pixels in that 128-bit word is the + * first pixel to be used + */ + u32 remaining_pixels = vc4_state->src_x % 96; + u32 aligned = remaining_pixels / 12; + u32 last_bits = remaining_pixels % 12; + + x_off = aligned * 16 + last_bits; + tile_w = 128; + pix_per_tile = 96; + } else { + switch (base_format_mod) { + case DRM_FORMAT_MOD_BROADCOM_SAND64: + tile_w = 64; + break; + case DRM_FORMAT_MOD_BROADCOM_SAND128: + tile_w = 128; + break; + case DRM_FORMAT_MOD_BROADCOM_SAND256: + tile_w = 256; + break; + default: + return -EINVAL; + } + pix_per_tile = tile_w / fb->format->cpp[0]; + x_off = (vc4_state->src_x % pix_per_tile) / + (i ? h_subsample : 1) * + fb->format->cpp[i]; + } + + tile = vc4_state->src_x / pix_per_tile; + vc4_state->offsets[i] += param * tile_w * tile; vc4_state->offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; - vc4_state->offsets[i] += x_off / - (i ? h_subsample : 1) * - fb->format->cpp[i]; + vc4_state->offsets[i] += x_off & ~(i ? 1 : 0); } pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); @@ -955,7 +1050,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Pitch word 1/2 */ for (i = 1; i < num_planes; i++) { - if (hvs_format != HVS_PIXEL_FORMAT_H264) { + if (hvs_format != HVS_PIXEL_FORMAT_H264 && + hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { vc4_dlist_write(vc4_state, VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); @@ -966,9 +1062,20 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Colorspace conversion words */ if (vc4_state->is_yuv) { - vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); - vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); - vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); + enum drm_color_encoding color_encoding = state->color_encoding; + enum drm_color_range color_range = state->color_range; + const u32 *ccm; + + if (color_encoding >= DRM_COLOR_ENCODING_MAX) + color_encoding = DRM_COLOR_YCBCR_BT601; + if (color_range >= DRM_COLOR_RANGE_MAX) + color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; + + ccm = colorspace_coeffs[color_range][color_encoding]; + + vc4_dlist_write(vc4_state, ccm[0]); + vc4_dlist_write(vc4_state, ccm[1]); + vc4_dlist_write(vc4_state, ccm[2]); } vc4_state->lbm_offset = 0; @@ -1315,6 +1422,13 @@ static bool vc4_format_mod_supported(struct drm_plane *plane, default: return false; } + case DRM_FORMAT_P030: + switch (fourcc_mod_broadcom_mod(modifier)) { + case DRM_FORMAT_MOD_BROADCOM_SAND128: + return true; + default: + return false; + } case DRM_FORMAT_RGBX1010102: case DRM_FORMAT_BGRX1010102: case DRM_FORMAT_RGBA1010102: @@ -1347,8 +1461,11 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, struct drm_plane *plane = NULL; struct vc4_plane *vc4_plane; u32 formats[ARRAY_SIZE(hvs_formats)]; + int num_formats = 0; int ret = 0; unsigned i; + bool hvs5 = of_device_is_compatible(dev->dev->of_node, + "brcm,bcm2711-vc5"); static const uint64_t modifiers[] = { DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, DRM_FORMAT_MOD_BROADCOM_SAND128, @@ -1363,13 +1480,17 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, if (!vc4_plane) return ERR_PTR(-ENOMEM); - for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) - formats[i] = hvs_formats[i].drm; + for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { + if (!hvs_formats[i].hvs5_only || hvs5) { + formats[num_formats] = hvs_formats[i].drm; + num_formats++; + } + } plane = &vc4_plane->base; ret = drm_universal_plane_init(dev, plane, 0, &vc4_plane_funcs, - formats, ARRAY_SIZE(formats), + formats, num_formats, modifiers, type, NULL); if (ret) return ERR_PTR(ret); @@ -1383,6 +1504,15 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y); + drm_plane_create_color_properties(plane, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709) | + BIT(DRM_COLOR_YCBCR_BT2020), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, + DRM_COLOR_YCBCR_LIMITED_RANGE); + return plane; } diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 489f921ef44d..7538b84a6dca 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -975,7 +975,10 @@ enum hvs_pixel_format { #define SCALER_CSC0_COEF_CR_OFS_SHIFT 0 #define SCALER_CSC0_ITR_R_601_5 0x00f00000 #define SCALER_CSC0_ITR_R_709_3 0x00f00000 +#define SCALER_CSC0_ITR_R_2020 0x00f00000 #define SCALER_CSC0_JPEG_JFIF 0x00000000 +#define SCALER_CSC0_ITR_R_709_3_FR 0x00000000 +#define SCALER_CSC0_ITR_R_2020_FR 0x00000000 /* S2.8 contribution of Cb to Green */ #define SCALER_CSC1_COEF_CB_GRN_MASK VC4_MASK(31, 22) @@ -990,8 +993,11 @@ enum hvs_pixel_format { #define SCALER_CSC1_COEF_CR_BLU_MASK VC4_MASK(1, 0) #define SCALER_CSC1_COEF_CR_BLU_SHIFT 0 #define SCALER_CSC1_ITR_R_601_5 0xe73304a8 -#define SCALER_CSC1_ITR_R_709_3 0xf2b784a8 -#define SCALER_CSC1_JPEG_JFIF 0xea34a400 +#define SCALER_CSC1_ITR_R_709_3 0xf27784a8 +#define SCALER_CSC1_ITR_R_2020 0xf43594a8 +#define SCALER_CSC1_JPEG_JFIF 0xea349400 +#define SCALER_CSC1_ITR_R_709_3_FR 0xf4388400 +#define SCALER_CSC1_ITR_R_2020_FR 0xf5b6d400 /* S2.8 contribution of Cb to Red */ #define SCALER_CSC2_COEF_CB_RED_MASK VC4_MASK(29, 20) @@ -1002,9 +1008,12 @@ enum hvs_pixel_format { /* S2.8 contribution of Cb to Blue */ #define SCALER_CSC2_COEF_CB_BLU_MASK VC4_MASK(19, 10) #define SCALER_CSC2_COEF_CB_BLU_SHIFT 10 -#define SCALER_CSC2_ITR_R_601_5 0x00066204 -#define SCALER_CSC2_ITR_R_709_3 0x00072a1c -#define SCALER_CSC2_JPEG_JFIF 0x000599c5 +#define SCALER_CSC2_ITR_R_601_5 0x00066604 +#define SCALER_CSC2_ITR_R_709_3 0x00072e1d +#define SCALER_CSC2_ITR_R_2020 0x0006b624 +#define SCALER_CSC2_JPEG_JFIF 0x00059dc6 +#define SCALER_CSC2_ITR_R_709_3_FR 0x00064ddb +#define SCALER_CSC2_ITR_R_2020_FR 0x0005e5e2 #define SCALER_TPZ0_VERT_RECALC BIT(31) #define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index a87eafa89e9f..c5e3e5457737 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -97,7 +97,7 @@ static struct drm_gem_object *vgem_gem_create_object(struct drm_device *dev, siz obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (!obj) - return NULL; + return ERR_PTR(-ENOMEM); /* * vgem doesn't have any begin/end cpu access ioctls, therefore must use diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index d86e1ad4a972..5f25a8d15464 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -27,7 +27,6 @@ */ #include <linux/module.h> -#include <linux/console.h> #include <linux/pci.h> #include <linux/poll.h> #include <linux/wait.h> @@ -104,7 +103,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) struct drm_device *dev; int ret; - if (vgacon_text_force() && virtio_gpu_modeset == -1) + if (drm_firmware_drivers_only() && virtio_gpu_modeset == -1) return -EINVAL; if (virtio_gpu_modeset == 0) @@ -157,36 +156,6 @@ static void virtio_gpu_config_changed(struct virtio_device *vdev) schedule_work(&vgdev->config_changed_work); } -static __poll_t virtio_gpu_poll(struct file *filp, - struct poll_table_struct *wait) -{ - struct drm_file *drm_file = filp->private_data; - struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv; - struct drm_device *dev = drm_file->minor->dev; - struct virtio_gpu_device *vgdev = dev->dev_private; - struct drm_pending_event *e = NULL; - __poll_t mask = 0; - - if (!vgdev->has_virgl_3d || !vfpriv || !vfpriv->ring_idx_mask) - return drm_poll(filp, wait); - - poll_wait(filp, &drm_file->event_wait, wait); - - if (!list_empty(&drm_file->event_list)) { - spin_lock_irq(&dev->event_lock); - e = list_first_entry(&drm_file->event_list, - struct drm_pending_event, link); - drm_file->event_space += e->event->length; - list_del(&e->link); - spin_unlock_irq(&dev->event_lock); - - kfree(e); - mask |= EPOLLIN | EPOLLRDNORM; - } - - return mask; -} - static struct virtio_device_id id_table[] = { { VIRTIO_ID_GPU, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -226,17 +195,7 @@ MODULE_AUTHOR("Dave Airlie <airlied@redhat.com>"); MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>"); MODULE_AUTHOR("Alon Levy"); -static const struct file_operations virtio_gpu_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = virtio_gpu_poll, - .read = drm_read, - .llseek = noop_llseek, - .mmap = drm_gem_mmap -}; +DEFINE_DRM_GEM_FOPS(virtio_gpu_driver_fops); static const struct drm_driver driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index e0265fe74aa5..0a194aaad419 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -138,7 +138,6 @@ struct virtio_gpu_fence_driver { spinlock_t lock; }; -#define VIRTGPU_EVENT_FENCE_SIGNALED_INTERNAL 0x10000000 struct virtio_gpu_fence_event { struct drm_pending_event base; struct drm_event event; diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 0007e423d885..c708bab555c6 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -54,7 +54,7 @@ static int virtio_gpu_fence_event_create(struct drm_device *dev, if (!e) return -ENOMEM; - e->event.type = VIRTGPU_EVENT_FENCE_SIGNALED_INTERNAL; + e->event.type = VIRTGPU_EVENT_FENCE_SIGNALED; e->event.length = sizeof(e->event); ret = drm_event_reserve_init(dev, file, &e->base, &e->event); diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 187e10da2f17..baef2c5f2aaf 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -139,7 +139,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, shmem = kzalloc(sizeof(*shmem), GFP_KERNEL); if (!shmem) - return NULL; + return ERR_PTR(-ENOMEM); dshmem = &shmem->base.base; dshmem->base.funcs = &virtio_gpu_shmem_funcs; diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig index c9ce47c448e0..a4fabe208d9f 100644 --- a/drivers/gpu/drm/vmwgfx/Kconfig +++ b/drivers/gpu/drm/vmwgfx/Kconfig @@ -4,6 +4,7 @@ config DRM_VMWGFX depends on DRM && PCI && MMU depends on X86 || ARM64 select DRM_TTM + select DRM_TTM_HELPER select MAPPING_DIRTY_HELPERS # Only needed for the transitional use of drm_crtc_init - can be removed # again once vmwgfx sets up the primary plane itself. diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index bc323f7d4032..eee73b9aa404 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ +vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_hashtab.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o \ vmwgfx_cmd.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \ vmwgfx_overlay.o vmwgfx_gmrid_manager.o vmwgfx_fence.o \ @@ -9,9 +9,9 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \ vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \ vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \ - vmwgfx_devcaps.o ttm_object.o ttm_memory.o + vmwgfx_devcaps.o ttm_object.o vmwgfx_system_manager.o \ + vmwgfx_gem.o vmwgfx-$(CONFIG_DRM_FBDEV_EMULATION) += vmwgfx_fb.o -vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h index 945c84b27e81..d90d940ad3f4 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 2012-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_cmd.h -- diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h index 379ec15c7758..815d0ab0053f 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 1998-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_devcaps.h -- @@ -347,6 +347,10 @@ typedef uint32 SVGA3dDevCapIndex; #define SVGA3D_DEVCAP_SM5 258 #define SVGA3D_DEVCAP_MULTISAMPLE_8X 259 +#define SVGA3D_DEVCAP_MAX_FORCED_SAMPLE_COUNT 260 + +#define SVGA3D_DEVCAP_GL43 261 + #define SVGA3D_DEVCAP_MAX 262 #define SVGA3D_DXFMT_SUPPORTED (1 << 0) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h index 5af442dad542..925bf4b93f01 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 2012-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_dx.h -- @@ -508,11 +508,11 @@ typedef struct SVGA3dCmdDXSetPredication { #pragma pack(pop) #pragma pack(push, 1) -typedef struct MKS3dDXSOState { +typedef struct SVGA3dDXSOState { uint32 offset; uint32 intOffset; - uint32 vertexCount; - uint32 dead; + uint32 dead1; + uint32 dead2; } SVGA3dDXSOState; #pragma pack(pop) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h index 35494a728c7a..6103b41fe92b 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 2012-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_limits.h -- @@ -82,4 +82,6 @@ #define SVGA3D_MIN_SBX_DATA_SIZE (GBYTES_2_BYTES(1)) #define SVGA3D_MAX_SBX_DATA_SIZE (GBYTES_2_BYTES(4)) +#define SVGA3D_MIN_SBX_DATA_SIZE_DVM (MBYTES_2_BYTES(900)) +#define SVGA3D_MAX_SBX_DATA_SIZE_DVM (MBYTES_2_BYTES(910)) #endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h index 988d8509c472..b24b4f55c941 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 1998-2015 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_reg.h -- diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h index 70b88ee16cf6..e9219eb380a2 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 2012-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga3d_types.h -- @@ -370,7 +370,6 @@ typedef enum SVGA3dSurfaceFormat { #define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (CONST64U(1) << 30) #define SVGA3D_SURFACE_RESERVED1 (CONST64U(1) << 31) -#define SVGA3D_SURFACE_VADECODE SVGA3D_SURFACE_RESERVED1 #define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h b/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h index bf242c21f352..405f20fc26c2 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 2007,2020 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga_escape.h -- diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h b/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h index aec17c3c6c29..691f48f77e33 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 2007-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga_overlay.h -- diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h b/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h index b3602557de2e..acabdb550c10 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h @@ -1,6 +1,6 @@ -/********************************************************** +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* * Copyright 1998-2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -22,7 +22,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - **********************************************************/ + */ /* * svga_reg.h -- @@ -442,6 +442,7 @@ typedef struct { #define SVGA_CAP2_TRACE_FULL_FB 0x00002000 #define SVGA_CAP2_EXTRA_REGS 0x00004000 #define SVGA_CAP2_LO_STAGING 0x00008000 +#define SVGA_CAP2_VIDEO_BLT 0x00010000 #define SVGA_CAP2_RESERVED 0x80000000 typedef enum { @@ -450,9 +451,10 @@ typedef enum { SVGABackdoorCap3dHWVersion = 2, SVGABackdoorCapDeviceCaps2 = 3, SVGABackdoorCapDevelCaps = 4, - SVGABackdoorDevelRenderer = 5, - SVGABackdoorDevelUsingISB = 6, - SVGABackdoorCapMax = 7, + SVGABackdoorCapDevCaps = 5, + SVGABackdoorDevelRenderer = 6, + SVGABackdoorDevelUsingISB = 7, + SVGABackdoorCapMax = 8, } SVGABackdoorCapType; enum { diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c deleted file mode 100644 index 7f7fe35fc21d..000000000000 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ /dev/null @@ -1,683 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/************************************************************************** - * - * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#define pr_fmt(fmt) "[TTM] " fmt - -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/swap.h> - -#include <drm/drm_device.h> -#include <drm/drm_file.h> -#include <drm/ttm/ttm_device.h> - -#include "ttm_memory.h" - -#define TTM_MEMORY_ALLOC_RETRIES 4 - -struct ttm_mem_global ttm_mem_glob; -EXPORT_SYMBOL(ttm_mem_glob); - -struct ttm_mem_zone { - struct kobject kobj; - struct ttm_mem_global *glob; - const char *name; - uint64_t zone_mem; - uint64_t emer_mem; - uint64_t max_mem; - uint64_t swap_limit; - uint64_t used_mem; -}; - -static struct attribute ttm_mem_sys = { - .name = "zone_memory", - .mode = S_IRUGO -}; -static struct attribute ttm_mem_emer = { - .name = "emergency_memory", - .mode = S_IRUGO | S_IWUSR -}; -static struct attribute ttm_mem_max = { - .name = "available_memory", - .mode = S_IRUGO | S_IWUSR -}; -static struct attribute ttm_mem_swap = { - .name = "swap_limit", - .mode = S_IRUGO | S_IWUSR -}; -static struct attribute ttm_mem_used = { - .name = "used_memory", - .mode = S_IRUGO -}; - -static void ttm_mem_zone_kobj_release(struct kobject *kobj) -{ - struct ttm_mem_zone *zone = - container_of(kobj, struct ttm_mem_zone, kobj); - - pr_info("Zone %7s: Used memory at exit: %llu KiB\n", - zone->name, (unsigned long long)zone->used_mem >> 10); - kfree(zone); -} - -static ssize_t ttm_mem_zone_show(struct kobject *kobj, - struct attribute *attr, - char *buffer) -{ - struct ttm_mem_zone *zone = - container_of(kobj, struct ttm_mem_zone, kobj); - uint64_t val = 0; - - spin_lock(&zone->glob->lock); - if (attr == &ttm_mem_sys) - val = zone->zone_mem; - else if (attr == &ttm_mem_emer) - val = zone->emer_mem; - else if (attr == &ttm_mem_max) - val = zone->max_mem; - else if (attr == &ttm_mem_swap) - val = zone->swap_limit; - else if (attr == &ttm_mem_used) - val = zone->used_mem; - spin_unlock(&zone->glob->lock); - - return snprintf(buffer, PAGE_SIZE, "%llu\n", - (unsigned long long) val >> 10); -} - -static void ttm_check_swapping(struct ttm_mem_global *glob); - -static ssize_t ttm_mem_zone_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t size) -{ - struct ttm_mem_zone *zone = - container_of(kobj, struct ttm_mem_zone, kobj); - int chars; - unsigned long val; - uint64_t val64; - - chars = sscanf(buffer, "%lu", &val); - if (chars == 0) - return size; - - val64 = val; - val64 <<= 10; - - spin_lock(&zone->glob->lock); - if (val64 > zone->zone_mem) - val64 = zone->zone_mem; - if (attr == &ttm_mem_emer) { - zone->emer_mem = val64; - if (zone->max_mem > val64) - zone->max_mem = val64; - } else if (attr == &ttm_mem_max) { - zone->max_mem = val64; - if (zone->emer_mem < val64) - zone->emer_mem = val64; - } else if (attr == &ttm_mem_swap) - zone->swap_limit = val64; - spin_unlock(&zone->glob->lock); - - ttm_check_swapping(zone->glob); - - return size; -} - -static struct attribute *ttm_mem_zone_attrs[] = { - &ttm_mem_sys, - &ttm_mem_emer, - &ttm_mem_max, - &ttm_mem_swap, - &ttm_mem_used, - NULL -}; - -static const struct sysfs_ops ttm_mem_zone_ops = { - .show = &ttm_mem_zone_show, - .store = &ttm_mem_zone_store -}; - -static struct kobj_type ttm_mem_zone_kobj_type = { - .release = &ttm_mem_zone_kobj_release, - .sysfs_ops = &ttm_mem_zone_ops, - .default_attrs = ttm_mem_zone_attrs, -}; - -static struct attribute ttm_mem_global_lower_mem_limit = { - .name = "lower_mem_limit", - .mode = S_IRUGO | S_IWUSR -}; - -static ssize_t ttm_mem_global_show(struct kobject *kobj, - struct attribute *attr, - char *buffer) -{ - struct ttm_mem_global *glob = - container_of(kobj, struct ttm_mem_global, kobj); - uint64_t val = 0; - - spin_lock(&glob->lock); - val = glob->lower_mem_limit; - spin_unlock(&glob->lock); - /* convert from number of pages to KB */ - val <<= (PAGE_SHIFT - 10); - return snprintf(buffer, PAGE_SIZE, "%llu\n", - (unsigned long long) val); -} - -static ssize_t ttm_mem_global_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t size) -{ - int chars; - uint64_t val64; - unsigned long val; - struct ttm_mem_global *glob = - container_of(kobj, struct ttm_mem_global, kobj); - - chars = sscanf(buffer, "%lu", &val); - if (chars == 0) - return size; - - val64 = val; - /* convert from KB to number of pages */ - val64 >>= (PAGE_SHIFT - 10); - - spin_lock(&glob->lock); - glob->lower_mem_limit = val64; - spin_unlock(&glob->lock); - - return size; -} - -static struct attribute *ttm_mem_global_attrs[] = { - &ttm_mem_global_lower_mem_limit, - NULL -}; - -static const struct sysfs_ops ttm_mem_global_ops = { - .show = &ttm_mem_global_show, - .store = &ttm_mem_global_store, -}; - -static struct kobj_type ttm_mem_glob_kobj_type = { - .sysfs_ops = &ttm_mem_global_ops, - .default_attrs = ttm_mem_global_attrs, -}; - -static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob, - bool from_wq, uint64_t extra) -{ - unsigned int i; - struct ttm_mem_zone *zone; - uint64_t target; - - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - - if (from_wq) - target = zone->swap_limit; - else if (capable(CAP_SYS_ADMIN)) - target = zone->emer_mem; - else - target = zone->max_mem; - - target = (extra > target) ? 0ULL : target; - - if (zone->used_mem > target) - return true; - } - return false; -} - -/* - * At this point we only support a single shrink callback. - * Extend this if needed, perhaps using a linked list of callbacks. - * Note that this function is reentrant: - * many threads may try to swap out at any given time. - */ - -static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, - uint64_t extra, struct ttm_operation_ctx *ctx) -{ - int ret; - - spin_lock(&glob->lock); - - while (ttm_zones_above_swap_target(glob, from_wq, extra)) { - spin_unlock(&glob->lock); - ret = ttm_global_swapout(ctx, GFP_KERNEL); - spin_lock(&glob->lock); - if (unlikely(ret <= 0)) - break; - } - - spin_unlock(&glob->lock); -} - -static void ttm_shrink_work(struct work_struct *work) -{ - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; - struct ttm_mem_global *glob = - container_of(work, struct ttm_mem_global, work); - - ttm_shrink(glob, true, 0ULL, &ctx); -} - -static int ttm_mem_init_kernel_zone(struct ttm_mem_global *glob, - const struct sysinfo *si) -{ - struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL); - uint64_t mem; - int ret; - - if (unlikely(!zone)) - return -ENOMEM; - - mem = si->totalram - si->totalhigh; - mem *= si->mem_unit; - - zone->name = "kernel"; - zone->zone_mem = mem; - zone->max_mem = mem >> 1; - zone->emer_mem = (mem >> 1) + (mem >> 2); - zone->swap_limit = zone->max_mem - (mem >> 3); - zone->used_mem = 0; - zone->glob = glob; - glob->zone_kernel = zone; - ret = kobject_init_and_add( - &zone->kobj, &ttm_mem_zone_kobj_type, &glob->kobj, zone->name); - if (unlikely(ret != 0)) { - kobject_put(&zone->kobj); - return ret; - } - glob->zones[glob->num_zones++] = zone; - return 0; -} - -#ifdef CONFIG_HIGHMEM -static int ttm_mem_init_highmem_zone(struct ttm_mem_global *glob, - const struct sysinfo *si) -{ - struct ttm_mem_zone *zone; - uint64_t mem; - int ret; - - if (si->totalhigh == 0) - return 0; - - zone = kzalloc(sizeof(*zone), GFP_KERNEL); - if (unlikely(!zone)) - return -ENOMEM; - - mem = si->totalram; - mem *= si->mem_unit; - - zone->name = "highmem"; - zone->zone_mem = mem; - zone->max_mem = mem >> 1; - zone->emer_mem = (mem >> 1) + (mem >> 2); - zone->swap_limit = zone->max_mem - (mem >> 3); - zone->used_mem = 0; - zone->glob = glob; - glob->zone_highmem = zone; - ret = kobject_init_and_add( - &zone->kobj, &ttm_mem_zone_kobj_type, &glob->kobj, "%s", - zone->name); - if (unlikely(ret != 0)) { - kobject_put(&zone->kobj); - return ret; - } - glob->zones[glob->num_zones++] = zone; - return 0; -} -#else -static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob, - const struct sysinfo *si) -{ - struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL); - uint64_t mem; - int ret; - - if (unlikely(!zone)) - return -ENOMEM; - - mem = si->totalram; - mem *= si->mem_unit; - - /** - * No special dma32 zone needed. - */ - - if (mem <= ((uint64_t) 1ULL << 32)) { - kfree(zone); - return 0; - } - - /* - * Limit max dma32 memory to 4GB for now - * until we can figure out how big this - * zone really is. - */ - - mem = ((uint64_t) 1ULL << 32); - zone->name = "dma32"; - zone->zone_mem = mem; - zone->max_mem = mem >> 1; - zone->emer_mem = (mem >> 1) + (mem >> 2); - zone->swap_limit = zone->max_mem - (mem >> 3); - zone->used_mem = 0; - zone->glob = glob; - glob->zone_dma32 = zone; - ret = kobject_init_and_add( - &zone->kobj, &ttm_mem_zone_kobj_type, &glob->kobj, zone->name); - if (unlikely(ret != 0)) { - kobject_put(&zone->kobj); - return ret; - } - glob->zones[glob->num_zones++] = zone; - return 0; -} -#endif - -int ttm_mem_global_init(struct ttm_mem_global *glob, struct device *dev) -{ - struct sysinfo si; - int ret; - int i; - struct ttm_mem_zone *zone; - - spin_lock_init(&glob->lock); - glob->swap_queue = create_singlethread_workqueue("ttm_swap"); - INIT_WORK(&glob->work, ttm_shrink_work); - - ret = kobject_init_and_add(&glob->kobj, &ttm_mem_glob_kobj_type, - &dev->kobj, "memory_accounting"); - if (unlikely(ret != 0)) { - kobject_put(&glob->kobj); - return ret; - } - - si_meminfo(&si); - - spin_lock(&glob->lock); - /* set it as 0 by default to keep original behavior of OOM */ - glob->lower_mem_limit = 0; - spin_unlock(&glob->lock); - - ret = ttm_mem_init_kernel_zone(glob, &si); - if (unlikely(ret != 0)) - goto out_no_zone; -#ifdef CONFIG_HIGHMEM - ret = ttm_mem_init_highmem_zone(glob, &si); - if (unlikely(ret != 0)) - goto out_no_zone; -#else - ret = ttm_mem_init_dma32_zone(glob, &si); - if (unlikely(ret != 0)) - goto out_no_zone; -#endif - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - pr_info("Zone %7s: Available graphics memory: %llu KiB\n", - zone->name, (unsigned long long)zone->max_mem >> 10); - } - return 0; -out_no_zone: - ttm_mem_global_release(glob); - return ret; -} - -void ttm_mem_global_release(struct ttm_mem_global *glob) -{ - struct ttm_mem_zone *zone; - unsigned int i; - - destroy_workqueue(glob->swap_queue); - glob->swap_queue = NULL; - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - kobject_del(&zone->kobj); - kobject_put(&zone->kobj); - } - kobject_del(&glob->kobj); - kobject_put(&glob->kobj); - memset(glob, 0, sizeof(*glob)); -} - -static void ttm_check_swapping(struct ttm_mem_global *glob) -{ - bool needs_swapping = false; - unsigned int i; - struct ttm_mem_zone *zone; - - spin_lock(&glob->lock); - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - if (zone->used_mem > zone->swap_limit) { - needs_swapping = true; - break; - } - } - - spin_unlock(&glob->lock); - - if (unlikely(needs_swapping)) - (void)queue_work(glob->swap_queue, &glob->work); - -} - -static void ttm_mem_global_free_zone(struct ttm_mem_global *glob, - struct ttm_mem_zone *single_zone, - uint64_t amount) -{ - unsigned int i; - struct ttm_mem_zone *zone; - - spin_lock(&glob->lock); - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - if (single_zone && zone != single_zone) - continue; - zone->used_mem -= amount; - } - spin_unlock(&glob->lock); -} - -void ttm_mem_global_free(struct ttm_mem_global *glob, - uint64_t amount) -{ - return ttm_mem_global_free_zone(glob, glob->zone_kernel, amount); -} -EXPORT_SYMBOL(ttm_mem_global_free); - -/* - * check if the available mem is under lower memory limit - * - * a. if no swap disk at all or free swap space is under swap_mem_limit - * but available system mem is bigger than sys_mem_limit, allow TTM - * allocation; - * - * b. if the available system mem is less than sys_mem_limit but free - * swap disk is bigger than swap_mem_limit, allow TTM allocation. - */ -bool -ttm_check_under_lowerlimit(struct ttm_mem_global *glob, - uint64_t num_pages, - struct ttm_operation_ctx *ctx) -{ - int64_t available; - - /* We allow over commit during suspend */ - if (ctx->force_alloc) - return false; - - available = get_nr_swap_pages() + si_mem_available(); - available -= num_pages; - if (available < glob->lower_mem_limit) - return true; - - return false; -} - -static int ttm_mem_global_reserve(struct ttm_mem_global *glob, - struct ttm_mem_zone *single_zone, - uint64_t amount, bool reserve) -{ - uint64_t limit; - int ret = -ENOMEM; - unsigned int i; - struct ttm_mem_zone *zone; - - spin_lock(&glob->lock); - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - if (single_zone && zone != single_zone) - continue; - - limit = (capable(CAP_SYS_ADMIN)) ? - zone->emer_mem : zone->max_mem; - - if (zone->used_mem > limit) - goto out_unlock; - } - - if (reserve) { - for (i = 0; i < glob->num_zones; ++i) { - zone = glob->zones[i]; - if (single_zone && zone != single_zone) - continue; - zone->used_mem += amount; - } - } - - ret = 0; -out_unlock: - spin_unlock(&glob->lock); - ttm_check_swapping(glob); - - return ret; -} - - -static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob, - struct ttm_mem_zone *single_zone, - uint64_t memory, - struct ttm_operation_ctx *ctx) -{ - int count = TTM_MEMORY_ALLOC_RETRIES; - - while (unlikely(ttm_mem_global_reserve(glob, - single_zone, - memory, true) - != 0)) { - if (ctx->no_wait_gpu) - return -ENOMEM; - if (unlikely(count-- == 0)) - return -ENOMEM; - ttm_shrink(glob, false, memory + (memory >> 2) + 16, ctx); - } - - return 0; -} - -int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory, - struct ttm_operation_ctx *ctx) -{ - /** - * Normal allocations of kernel memory are registered in - * the kernel zone. - */ - - return ttm_mem_global_alloc_zone(glob, glob->zone_kernel, memory, ctx); -} -EXPORT_SYMBOL(ttm_mem_global_alloc); - -int ttm_mem_global_alloc_page(struct ttm_mem_global *glob, - struct page *page, uint64_t size, - struct ttm_operation_ctx *ctx) -{ - struct ttm_mem_zone *zone = NULL; - - /** - * Page allocations may be registed in a single zone - * only if highmem or !dma32. - */ - -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page) && glob->zone_highmem != NULL) - zone = glob->zone_highmem; -#else - if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL) - zone = glob->zone_kernel; -#endif - return ttm_mem_global_alloc_zone(glob, zone, size, ctx); -} - -void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page, - uint64_t size) -{ - struct ttm_mem_zone *zone = NULL; - -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page) && glob->zone_highmem != NULL) - zone = glob->zone_highmem; -#else - if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL) - zone = glob->zone_kernel; -#endif - ttm_mem_global_free_zone(glob, zone, size); -} - -size_t ttm_round_pot(size_t size) -{ - if ((size & (size - 1)) == 0) - return size; - else if (size > PAGE_SIZE) - return PAGE_ALIGN(size); - else { - size_t tmp_size = 4; - - while (tmp_size < size) - tmp_size <<= 1; - - return tmp_size; - } - return 0; -} -EXPORT_SYMBOL(ttm_round_pot); diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.h b/drivers/gpu/drm/vmwgfx/ttm_memory.h deleted file mode 100644 index c50dba774485..000000000000 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.h +++ /dev/null @@ -1,96 +0,0 @@ -/************************************************************************** - * - * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TTM_MEMORY_H -#define TTM_MEMORY_H - -#include <linux/workqueue.h> -#include <linux/spinlock.h> -#include <linux/bug.h> -#include <linux/wait.h> -#include <linux/errno.h> -#include <linux/kobject.h> -#include <linux/mm.h> - -#include <drm/ttm/ttm_bo_api.h> - -/** - * struct ttm_mem_global - Global memory accounting structure. - * - * @shrink: A single callback to shrink TTM memory usage. Extend this - * to a linked list to be able to handle multiple callbacks when needed. - * @swap_queue: A workqueue to handle shrinking in low memory situations. We - * need a separate workqueue since it will spend a lot of time waiting - * for the GPU, and this will otherwise block other workqueue tasks(?) - * At this point we use only a single-threaded workqueue. - * @work: The workqueue callback for the shrink queue. - * @lock: Lock to protect the @shrink - and the memory accounting members, - * that is, essentially the whole structure with some exceptions. - * @lower_mem_limit: include lower limit of swap space and lower limit of - * system memory. - * @zones: Array of pointers to accounting zones. - * @num_zones: Number of populated entries in the @zones array. - * @zone_kernel: Pointer to the kernel zone. - * @zone_highmem: Pointer to the highmem zone if there is one. - * @zone_dma32: Pointer to the dma32 zone if there is one. - * - * Note that this structure is not per device. It should be global for all - * graphics devices. - */ - -#define TTM_MEM_MAX_ZONES 2 -struct ttm_mem_zone; -extern struct ttm_mem_global { - struct kobject kobj; - struct workqueue_struct *swap_queue; - struct work_struct work; - spinlock_t lock; - uint64_t lower_mem_limit; - struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES]; - unsigned int num_zones; - struct ttm_mem_zone *zone_kernel; -#ifdef CONFIG_HIGHMEM - struct ttm_mem_zone *zone_highmem; -#else - struct ttm_mem_zone *zone_dma32; -#endif -} ttm_mem_glob; - -int ttm_mem_global_init(struct ttm_mem_global *glob, struct device *dev); -void ttm_mem_global_release(struct ttm_mem_global *glob); -int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory, - struct ttm_operation_ctx *ctx); -void ttm_mem_global_free(struct ttm_mem_global *glob, uint64_t amount); -int ttm_mem_global_alloc_page(struct ttm_mem_global *glob, - struct page *page, uint64_t size, - struct ttm_operation_ctx *ctx); -void ttm_mem_global_free_page(struct ttm_mem_global *glob, - struct page *page, uint64_t size); -size_t ttm_round_pot(size_t size); -bool ttm_check_under_lowerlimit(struct ttm_mem_global *glob, uint64_t num_pages, - struct ttm_operation_ctx *ctx); -#endif diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 899945f54dc7..26a55fef1ab5 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -50,6 +50,7 @@ #include <linux/atomic.h> #include <linux/module.h> #include "ttm_object.h" +#include "vmwgfx_drv.h" MODULE_IMPORT_NS(DMA_BUF); @@ -73,7 +74,7 @@ struct ttm_object_file { struct ttm_object_device *tdev; spinlock_t lock; struct list_head ref_list; - struct drm_open_hash ref_hash[TTM_REF_NUM]; + struct vmwgfx_open_hash ref_hash; struct kref refcount; }; @@ -91,12 +92,10 @@ struct ttm_object_file { struct ttm_object_device { spinlock_t object_lock; - struct drm_open_hash object_hash; + struct vmwgfx_open_hash object_hash; atomic_t object_count; - struct ttm_mem_global *mem_glob; struct dma_buf_ops ops; void (*dmabuf_release)(struct dma_buf *dma_buf); - size_t dma_buf_size; struct idr idr; }; @@ -123,10 +122,9 @@ struct ttm_object_device { struct ttm_ref_object { struct rcu_head rcu_head; - struct drm_hash_item hash; + struct vmwgfx_hash_item hash; struct list_head head; struct kref kref; - enum ttm_ref_type ref_type; struct ttm_base_object *obj; struct ttm_object_file *tfile; }; @@ -162,9 +160,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile, struct ttm_base_object *base, bool shareable, enum ttm_object_type object_type, - void (*refcount_release) (struct ttm_base_object **), - void (*ref_obj_release) (struct ttm_base_object *, - enum ttm_ref_type ref_type)) + void (*refcount_release) (struct ttm_base_object **)) { struct ttm_object_device *tdev = tfile->tdev; int ret; @@ -172,7 +168,6 @@ int ttm_base_object_init(struct ttm_object_file *tfile, base->shareable = shareable; base->tfile = ttm_object_file_ref(tfile); base->refcount_release = refcount_release; - base->ref_obj_release = ref_obj_release; base->object_type = object_type; kref_init(&base->refcount); idr_preload(GFP_KERNEL); @@ -184,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile, return ret; base->handle = ret; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false); + ret = ttm_ref_object_add(tfile, base, NULL, false); if (unlikely(ret != 0)) goto out_err1; @@ -247,12 +242,12 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) struct ttm_base_object * ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key) { - struct drm_hash_item *hash; - struct drm_open_hash *ht = &tfile->ref_hash[TTM_REF_USAGE]; + struct vmwgfx_hash_item *hash; + struct vmwgfx_open_hash *ht = &tfile->ref_hash; int ret; rcu_read_lock(); - ret = drm_ht_find_item_rcu(ht, key, &hash); + ret = vmwgfx_ht_find_item_rcu(ht, key, &hash); if (ret) { rcu_read_unlock(); return NULL; @@ -267,12 +262,12 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, uint32_t key) { struct ttm_base_object *base = NULL; - struct drm_hash_item *hash; - struct drm_open_hash *ht = &tfile->ref_hash[TTM_REF_USAGE]; + struct vmwgfx_hash_item *hash; + struct vmwgfx_open_hash *ht = &tfile->ref_hash; int ret; rcu_read_lock(); - ret = drm_ht_find_item_rcu(ht, key, &hash); + ret = vmwgfx_ht_find_item_rcu(ht, key, &hash); if (likely(ret == 0)) { base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; @@ -299,64 +294,14 @@ ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint32_t key) return base; } -/** - * ttm_ref_object_exists - Check whether a caller has a valid ref object - * (has opened) a base object. - * - * @tfile: Pointer to a struct ttm_object_file identifying the caller. - * @base: Pointer to a struct base object. - * - * Checks wether the caller identified by @tfile has put a valid USAGE - * reference object on the base object identified by @base. - */ -bool ttm_ref_object_exists(struct ttm_object_file *tfile, - struct ttm_base_object *base) -{ - struct drm_open_hash *ht = &tfile->ref_hash[TTM_REF_USAGE]; - struct drm_hash_item *hash; - struct ttm_ref_object *ref; - - rcu_read_lock(); - if (unlikely(drm_ht_find_item_rcu(ht, base->handle, &hash) != 0)) - goto out_false; - - /* - * Verify that the ref object is really pointing to our base object. - * Our base object could actually be dead, and the ref object pointing - * to another base object with the same handle. - */ - ref = drm_hash_entry(hash, struct ttm_ref_object, hash); - if (unlikely(base != ref->obj)) - goto out_false; - - /* - * Verify that the ref->obj pointer was actually valid! - */ - rmb(); - if (unlikely(kref_read(&ref->kref) == 0)) - goto out_false; - - rcu_read_unlock(); - return true; - - out_false: - rcu_read_unlock(); - return false; -} - int ttm_ref_object_add(struct ttm_object_file *tfile, struct ttm_base_object *base, - enum ttm_ref_type ref_type, bool *existed, + bool *existed, bool require_existed) { - struct drm_open_hash *ht = &tfile->ref_hash[ref_type]; + struct vmwgfx_open_hash *ht = &tfile->ref_hash; struct ttm_ref_object *ref; - struct drm_hash_item *hash; - struct ttm_mem_global *mem_glob = tfile->tdev->mem_glob; - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; + struct vmwgfx_hash_item *hash; int ret = -EINVAL; if (base->tfile != tfile && !base->shareable) @@ -367,7 +312,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, while (ret == -EINVAL) { rcu_read_lock(); - ret = drm_ht_find_item_rcu(ht, base->handle, &hash); + ret = vmwgfx_ht_find_item_rcu(ht, base->handle, &hash); if (ret == 0) { ref = drm_hash_entry(hash, struct ttm_ref_object, hash); @@ -381,24 +326,18 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, if (require_existed) return -EPERM; - ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref), - &ctx); - if (unlikely(ret != 0)) - return ret; ref = kmalloc(sizeof(*ref), GFP_KERNEL); if (unlikely(ref == NULL)) { - ttm_mem_global_free(mem_glob, sizeof(*ref)); return -ENOMEM; } ref->hash.key = base->handle; ref->obj = base; ref->tfile = tfile; - ref->ref_type = ref_type; kref_init(&ref->kref); spin_lock(&tfile->lock); - ret = drm_ht_insert_item_rcu(ht, &ref->hash); + ret = vmwgfx_ht_insert_item_rcu(ht, &ref->hash); if (likely(ret == 0)) { list_add_tail(&ref->head, &tfile->ref_list); @@ -412,7 +351,6 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, spin_unlock(&tfile->lock); BUG_ON(ret != -EINVAL); - ttm_mem_global_free(mem_glob, sizeof(*ref)); kfree(ref); } @@ -424,35 +362,29 @@ ttm_ref_object_release(struct kref *kref) { struct ttm_ref_object *ref = container_of(kref, struct ttm_ref_object, kref); - struct ttm_base_object *base = ref->obj; struct ttm_object_file *tfile = ref->tfile; - struct drm_open_hash *ht; - struct ttm_mem_global *mem_glob = tfile->tdev->mem_glob; + struct vmwgfx_open_hash *ht; - ht = &tfile->ref_hash[ref->ref_type]; - (void)drm_ht_remove_item_rcu(ht, &ref->hash); + ht = &tfile->ref_hash; + (void)vmwgfx_ht_remove_item_rcu(ht, &ref->hash); list_del(&ref->head); spin_unlock(&tfile->lock); - if (ref->ref_type != TTM_REF_USAGE && base->ref_obj_release) - base->ref_obj_release(base, ref->ref_type); - ttm_base_object_unref(&ref->obj); - ttm_mem_global_free(mem_glob, sizeof(*ref)); kfree_rcu(ref, rcu_head); spin_lock(&tfile->lock); } int ttm_ref_object_base_unref(struct ttm_object_file *tfile, - unsigned long key, enum ttm_ref_type ref_type) + unsigned long key) { - struct drm_open_hash *ht = &tfile->ref_hash[ref_type]; + struct vmwgfx_open_hash *ht = &tfile->ref_hash; struct ttm_ref_object *ref; - struct drm_hash_item *hash; + struct vmwgfx_hash_item *hash; int ret; spin_lock(&tfile->lock); - ret = drm_ht_find_item(ht, key, &hash); + ret = vmwgfx_ht_find_item(ht, key, &hash); if (unlikely(ret != 0)) { spin_unlock(&tfile->lock); return -EINVAL; @@ -467,7 +399,6 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile) { struct ttm_ref_object *ref; struct list_head *list; - unsigned int i; struct ttm_object_file *tfile = *p_tfile; *p_tfile = NULL; @@ -485,8 +416,7 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile) } spin_unlock(&tfile->lock); - for (i = 0; i < TTM_REF_NUM; ++i) - drm_ht_remove(&tfile->ref_hash[i]); + vmwgfx_ht_remove(&tfile->ref_hash); ttm_object_file_unref(&tfile); } @@ -495,8 +425,6 @@ struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev, unsigned int hash_order) { struct ttm_object_file *tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); - unsigned int i; - unsigned int j = 0; int ret; if (unlikely(tfile == NULL)) @@ -507,18 +435,13 @@ struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev, kref_init(&tfile->refcount); INIT_LIST_HEAD(&tfile->ref_list); - for (i = 0; i < TTM_REF_NUM; ++i) { - ret = drm_ht_create(&tfile->ref_hash[i], hash_order); - if (ret) { - j = i; - goto out_err; - } - } + ret = vmwgfx_ht_create(&tfile->ref_hash, hash_order); + if (ret) + goto out_err; return tfile; out_err: - for (i = 0; i < j; ++i) - drm_ht_remove(&tfile->ref_hash[i]); + vmwgfx_ht_remove(&tfile->ref_hash); kfree(tfile); @@ -526,8 +449,7 @@ out_err: } struct ttm_object_device * -ttm_object_device_init(struct ttm_mem_global *mem_glob, - unsigned int hash_order, +ttm_object_device_init(unsigned int hash_order, const struct dma_buf_ops *ops) { struct ttm_object_device *tdev = kmalloc(sizeof(*tdev), GFP_KERNEL); @@ -536,19 +458,24 @@ ttm_object_device_init(struct ttm_mem_global *mem_glob, if (unlikely(tdev == NULL)) return NULL; - tdev->mem_glob = mem_glob; spin_lock_init(&tdev->object_lock); atomic_set(&tdev->object_count, 0); - ret = drm_ht_create(&tdev->object_hash, hash_order); + ret = vmwgfx_ht_create(&tdev->object_hash, hash_order); if (ret != 0) goto out_no_object_hash; - idr_init_base(&tdev->idr, 1); + /* + * Our base is at VMWGFX_NUM_MOB + 1 because we want to create + * a seperate namespace for GEM handles (which are + * 1..VMWGFX_NUM_MOB) and the surface handles. Some ioctl's + * can take either handle as an argument so we want to + * easily be able to tell whether the handle refers to a + * GEM buffer or a surface. + */ + idr_init_base(&tdev->idr, VMWGFX_NUM_MOB + 1); tdev->ops = *ops; tdev->dmabuf_release = tdev->ops.release; tdev->ops.release = ttm_prime_dmabuf_release; - tdev->dma_buf_size = ttm_round_pot(sizeof(struct dma_buf)) + - ttm_round_pot(sizeof(struct file)); return tdev; out_no_object_hash: @@ -564,7 +491,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev) WARN_ON_ONCE(!idr_is_empty(&tdev->idr)); idr_destroy(&tdev->idr); - drm_ht_remove(&tdev->object_hash); + vmwgfx_ht_remove(&tdev->object_hash); kfree(tdev); } @@ -633,7 +560,6 @@ static void ttm_prime_dmabuf_release(struct dma_buf *dma_buf) if (prime->dma_buf == dma_buf) prime->dma_buf = NULL; mutex_unlock(&prime->mutex); - ttm_mem_global_free(tdev->mem_glob, tdev->dma_buf_size); ttm_base_object_unref(&base); } @@ -667,7 +593,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile, prime = (struct ttm_prime_object *) dma_buf->priv; base = &prime->base; *handle = base->handle; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false); + ret = ttm_ref_object_add(tfile, base, NULL, false); dma_buf_put(dma_buf); @@ -715,30 +641,18 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, dma_buf = prime->dma_buf; if (!dma_buf || !get_dma_buf_unless_doomed(dma_buf)) { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; exp_info.ops = &tdev->ops; exp_info.size = prime->size; exp_info.flags = flags; exp_info.priv = prime; /* - * Need to create a new dma_buf, with memory accounting. + * Need to create a new dma_buf */ - ret = ttm_mem_global_alloc(tdev->mem_glob, tdev->dma_buf_size, - &ctx); - if (unlikely(ret != 0)) { - mutex_unlock(&prime->mutex); - goto out_unref; - } dma_buf = dma_buf_export(&exp_info); if (IS_ERR(dma_buf)) { ret = PTR_ERR(dma_buf); - ttm_mem_global_free(tdev->mem_glob, - tdev->dma_buf_size); mutex_unlock(&prime->mutex); goto out_unref; } @@ -773,7 +687,6 @@ out_unref: * @shareable: See ttm_base_object_init * @type: See ttm_base_object_init * @refcount_release: See ttm_base_object_init - * @ref_obj_release: See ttm_base_object_init * * Initializes an object which is compatible with the drm_prime model * for data sharing between processes and devices. @@ -781,9 +694,7 @@ out_unref: int ttm_prime_object_init(struct ttm_object_file *tfile, size_t size, struct ttm_prime_object *prime, bool shareable, enum ttm_object_type type, - void (*refcount_release) (struct ttm_base_object **), - void (*ref_obj_release) (struct ttm_base_object *, - enum ttm_ref_type ref_type)) + void (*refcount_release) (struct ttm_base_object **)) { mutex_init(&prime->mutex); prime->size = PAGE_ALIGN(size); @@ -792,6 +703,5 @@ int ttm_prime_object_init(struct ttm_object_file *tfile, size_t size, prime->refcount_release = refcount_release; return ttm_base_object_init(tfile, &prime->base, shareable, ttm_prime_type, - ttm_prime_refcount_release, - ref_obj_release); + ttm_prime_refcount_release); } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 49b064f0cb19..4c8700027c6d 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -42,31 +42,7 @@ #include <linux/list.h> #include <linux/rcupdate.h> -#include <drm/drm_hashtab.h> - -#include "ttm_memory.h" - -/** - * enum ttm_ref_type - * - * Describes what type of reference a ref object holds. - * - * TTM_REF_USAGE is a simple refcount on a base object. - * - * TTM_REF_SYNCCPU_READ is a SYNCCPU_READ reference on a - * buffer object. - * - * TTM_REF_SYNCCPU_WRITE is a SYNCCPU_WRITE reference on a - * buffer object. - * - */ - -enum ttm_ref_type { - TTM_REF_USAGE, - TTM_REF_SYNCCPU_READ, - TTM_REF_SYNCCPU_WRITE, - TTM_REF_NUM -}; +#include "vmwgfx_hashtab.h" /** * enum ttm_object_type @@ -78,7 +54,6 @@ enum ttm_ref_type { enum ttm_object_type { ttm_fence_type, - ttm_buffer_type, ttm_lock_type, ttm_prime_type, ttm_driver_type0 = 256, @@ -129,8 +104,6 @@ struct ttm_base_object { struct ttm_object_file *tfile; struct kref refcount; void (*refcount_release) (struct ttm_base_object **base); - void (*ref_obj_release) (struct ttm_base_object *base, - enum ttm_ref_type ref_type); u32 handle; enum ttm_object_type object_type; u32 shareable; @@ -179,11 +152,7 @@ extern int ttm_base_object_init(struct ttm_object_file *tfile, bool shareable, enum ttm_object_type type, void (*refcount_release) (struct ttm_base_object - **), - void (*ref_obj_release) (struct ttm_base_object - *, - enum ttm_ref_type - ref_type)); + **)); /** * ttm_base_object_lookup @@ -247,12 +216,9 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base); */ extern int ttm_ref_object_add(struct ttm_object_file *tfile, struct ttm_base_object *base, - enum ttm_ref_type ref_type, bool *existed, + bool *existed, bool require_existed); -extern bool ttm_ref_object_exists(struct ttm_object_file *tfile, - struct ttm_base_object *base); - /** * ttm_ref_object_base_unref * @@ -265,8 +231,7 @@ extern bool ttm_ref_object_exists(struct ttm_object_file *tfile, * will be unreferenced. */ extern int ttm_ref_object_base_unref(struct ttm_object_file *tfile, - unsigned long key, - enum ttm_ref_type ref_type); + unsigned long key); /** * ttm_object_file_init - initialize a struct ttm_object file @@ -297,7 +262,6 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); /** * ttm_object device init - initialize a struct ttm_object_device * - * @mem_glob: struct ttm_mem_global for memory accounting. * @hash_order: Order of hash table used to hash the base objects. * @ops: DMA buf ops for prime objects of this device. * @@ -306,8 +270,7 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); */ extern struct ttm_object_device * -ttm_object_device_init(struct ttm_mem_global *mem_glob, - unsigned int hash_order, +ttm_object_device_init(unsigned int hash_order, const struct dma_buf_ops *ops); /** @@ -332,10 +295,7 @@ extern int ttm_prime_object_init(struct ttm_object_file *tfile, bool shareable, enum ttm_object_type type, void (*refcount_release) - (struct ttm_base_object **), - void (*ref_obj_release) - (struct ttm_base_object *, - enum ttm_ref_type ref_type)); + (struct ttm_base_object **)); static inline enum ttm_object_type ttm_base_object_type(struct ttm_base_object *base) @@ -353,13 +313,6 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, #define ttm_prime_object_kfree(__obj, __prime) \ kfree_rcu(__obj, __prime.base.rhead) -/* - * Extra memory required by the base object's idr storage, which is allocated - * separately from the base object itself. We estimate an on-average 128 bytes - * per idr. - */ -#define TTM_OBJ_EXTRA_SIZE 128 - struct ttm_base_object * ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c index 6f27d69bad0e..ae2de914eb89 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c @@ -354,6 +354,27 @@ void vmw_binding_add(struct vmw_ctx_binding_state *cbs, } /** + * vmw_binding_cb_offset_update: Update the offset of a cb binding + * + * @cbs: Pointer to the context binding state tracker. + * @shader_slot: The shader slot of the binding. + * @slot: The slot of the binding. + * @offsetInBytes: The new offset of the binding. + * + * Updates the offset of an existing cb binding in the context binding + * state structure @cbs. + */ +void vmw_binding_cb_offset_update(struct vmw_ctx_binding_state *cbs, + u32 shader_slot, u32 slot, u32 offsetInBytes) +{ + struct vmw_ctx_bindinfo *loc = + vmw_binding_loc(cbs, vmw_ctx_binding_cb, shader_slot, slot); + struct vmw_ctx_bindinfo_cb *loc_cb = + (struct vmw_ctx_bindinfo_cb *)((u8 *) loc); + loc_cb->offset = offsetInBytes; +} + +/** * vmw_binding_add_uav_index - Add UAV index for tracking. * @cbs: Pointer to the context binding state tracker. * @slot: UAV type to which bind this index. @@ -1070,7 +1091,7 @@ static int vmw_emit_set_uav(struct vmw_ctx_binding_state *cbs) size_t cmd_size, view_id_size; const struct vmw_resource *ctx = vmw_cbs_context(cbs); - vmw_collect_view_ids(cbs, loc, SVGA3D_MAX_UAVIEWS); + vmw_collect_view_ids(cbs, loc, vmw_max_num_uavs(cbs->dev_priv)); view_id_size = cbs->bind_cmd_count*sizeof(uint32); cmd_size = sizeof(*cmd) + view_id_size; cmd = VMW_CMD_CTX_RESERVE(ctx->dev_priv, cmd_size, ctx->id); @@ -1100,7 +1121,7 @@ static int vmw_emit_set_cs_uav(struct vmw_ctx_binding_state *cbs) size_t cmd_size, view_id_size; const struct vmw_resource *ctx = vmw_cbs_context(cbs); - vmw_collect_view_ids(cbs, loc, SVGA3D_MAX_UAVIEWS); + vmw_collect_view_ids(cbs, loc, vmw_max_num_uavs(cbs->dev_priv)); view_id_size = cbs->bind_cmd_count*sizeof(uint32); cmd_size = sizeof(*cmd) + view_id_size; cmd = VMW_CMD_CTX_RESERVE(ctx->dev_priv, cmd_size, ctx->id); @@ -1327,8 +1348,7 @@ static int vmw_binding_scrub_so(struct vmw_ctx_bindinfo *bi, bool rebind) } /** - * vmw_binding_state_alloc - Allocate a struct vmw_ctx_binding_state with - * memory accounting. + * vmw_binding_state_alloc - Allocate a struct vmw_ctx_binding_state. * * @dev_priv: Pointer to a device private structure. * @@ -1338,20 +1358,9 @@ struct vmw_ctx_binding_state * vmw_binding_state_alloc(struct vmw_private *dev_priv) { struct vmw_ctx_binding_state *cbs; - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; - int ret; - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), sizeof(*cbs), - &ctx); - if (ret) - return ERR_PTR(ret); cbs = vzalloc(sizeof(*cbs)); if (!cbs) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), sizeof(*cbs)); return ERR_PTR(-ENOMEM); } @@ -1362,17 +1371,13 @@ vmw_binding_state_alloc(struct vmw_private *dev_priv) } /** - * vmw_binding_state_free - Free a struct vmw_ctx_binding_state and its - * memory accounting info. + * vmw_binding_state_free - Free a struct vmw_ctx_binding_state. * * @cbs: Pointer to the struct vmw_ctx_binding_state to be freed. */ void vmw_binding_state_free(struct vmw_ctx_binding_state *cbs) { - struct vmw_private *dev_priv = cbs->dev_priv; - vfree(cbs); - ttm_mem_global_free(vmw_mem_glob(dev_priv), sizeof(*cbs)); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.h b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.h index dcb71fd0bb3b..85b90f7d398d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.h @@ -200,7 +200,7 @@ struct vmw_dx_shader_bindings { * @splice_index: The device splice index set by user-space. */ struct vmw_ctx_bindinfo_uav { - struct vmw_ctx_bindinfo_view views[SVGA3D_MAX_UAVIEWS]; + struct vmw_ctx_bindinfo_view views[SVGA3D_DX11_1_MAX_UAVIEWS]; uint32 index; }; @@ -217,6 +217,8 @@ struct vmw_ctx_bindinfo_so { extern void vmw_binding_add(struct vmw_ctx_binding_state *cbs, const struct vmw_ctx_bindinfo *ci, u32 shader_slot, u32 slot); +extern void vmw_binding_cb_offset_update(struct vmw_ctx_binding_state *cbs, + u32 shader_slot, u32 slot, u32 offsetInBytes); extern void vmw_binding_add_uav_index(struct vmw_ctx_binding_state *cbs, uint32 slot, uint32 splice_index); extern void diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index fd007f1c1776..31aecc46624b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -33,18 +33,6 @@ /** - * struct vmw_user_buffer_object - User-space-visible buffer object - * - * @prime: The prime object providing user visibility. - * @vbo: The struct vmw_buffer_object - */ -struct vmw_user_buffer_object { - struct ttm_prime_object prime; - struct vmw_buffer_object vbo; -}; - - -/** * vmw_buffer_object - Convert a struct ttm_buffer_object to a struct * vmw_buffer_object. * @@ -60,23 +48,6 @@ vmw_buffer_object(struct ttm_buffer_object *bo) /** - * vmw_user_buffer_object - Convert a struct ttm_buffer_object to a struct - * vmw_user_buffer_object. - * - * @bo: Pointer to the TTM buffer object. - * Return: Pointer to the struct vmw_buffer_object embedding the TTM buffer - * object. - */ -static struct vmw_user_buffer_object * -vmw_user_buffer_object(struct ttm_buffer_object *bo) -{ - struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); - - return container_of(vmw_bo, struct vmw_user_buffer_object, vbo); -} - - -/** * vmw_bo_pin_in_placement - Validate a buffer to placement. * * @dev_priv: Driver private. @@ -392,39 +363,6 @@ void vmw_bo_unmap(struct vmw_buffer_object *vbo) /** - * vmw_bo_acc_size - Calculate the pinned memory usage of buffers - * - * @dev_priv: Pointer to a struct vmw_private identifying the device. - * @size: The requested buffer size. - * @user: Whether this is an ordinary dma buffer or a user dma buffer. - */ -static size_t vmw_bo_acc_size(struct vmw_private *dev_priv, size_t size, - bool user) -{ - static size_t struct_size, user_struct_size; - size_t num_pages = PFN_UP(size); - size_t page_array_size = ttm_round_pot(num_pages * sizeof(void *)); - - if (unlikely(struct_size == 0)) { - size_t backend_size = ttm_round_pot(vmw_tt_size); - - struct_size = backend_size + - ttm_round_pot(sizeof(struct vmw_buffer_object)); - user_struct_size = backend_size + - ttm_round_pot(sizeof(struct vmw_user_buffer_object)) + - TTM_OBJ_EXTRA_SIZE; - } - - if (dev_priv->map_mode == vmw_dma_alloc_coherent) - page_array_size += - ttm_round_pot(num_pages * sizeof(dma_addr_t)); - - return ((user) ? user_struct_size : struct_size) + - page_array_size; -} - - -/** * vmw_bo_bo_free - vmw buffer object destructor * * @bo: Pointer to the embedded struct ttm_buffer_object @@ -436,27 +374,10 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) WARN_ON(vmw_bo->dirty); WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); - dma_resv_fini(&bo->base._resv); + drm_gem_object_release(&bo->base); kfree(vmw_bo); } - -/** - * vmw_user_bo_destroy - vmw buffer object destructor - * - * @bo: Pointer to the embedded struct ttm_buffer_object - */ -static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) -{ - struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo); - struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; - - WARN_ON(vbo->dirty); - WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); - vmw_bo_unmap(vbo); - ttm_prime_object_kfree(vmw_user_bo, prime); -} - /** * vmw_bo_create_kernel - Create a pinned BO for internal kernel use. * @@ -471,33 +392,27 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size, struct ttm_placement *placement, struct ttm_buffer_object **p_bo) { - struct ttm_operation_ctx ctx = { false, false }; + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; struct ttm_buffer_object *bo; - size_t acc_size; + struct drm_device *vdev = &dev_priv->drm; int ret; bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (unlikely(!bo)) return -ENOMEM; - acc_size = ttm_round_pot(sizeof(*bo)); - acc_size += ttm_round_pot(PFN_UP(size) * sizeof(void *)); - acc_size += ttm_round_pot(sizeof(struct ttm_tt)); - - ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); - if (unlikely(ret)) - goto error_free; - + size = ALIGN(size, PAGE_SIZE); - bo->base.size = size; - dma_resv_init(&bo->base._resv); - drm_vma_node_reset(&bo->base.vma_node); + drm_gem_private_object_init(vdev, &bo->base, size); ret = ttm_bo_init_reserved(&dev_priv->bdev, bo, size, - ttm_bo_type_device, placement, 0, + ttm_bo_type_kernel, placement, 0, &ctx, NULL, NULL, NULL); if (unlikely(ret)) - goto error_account; + goto error_free; ttm_bo_pin(bo); ttm_bo_unreserve(bo); @@ -505,14 +420,38 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size, return 0; -error_account: - ttm_mem_global_free(&ttm_mem_glob, acc_size); - error_free: kfree(bo); return ret; } +int vmw_bo_create(struct vmw_private *vmw, + size_t size, struct ttm_placement *placement, + bool interruptible, bool pin, + void (*bo_free)(struct ttm_buffer_object *bo), + struct vmw_buffer_object **p_bo) +{ + int ret; + + *p_bo = kmalloc(sizeof(**p_bo), GFP_KERNEL); + if (unlikely(!*p_bo)) { + DRM_ERROR("Failed to allocate a buffer.\n"); + return -ENOMEM; + } + + ret = vmw_bo_init(vmw, *p_bo, size, + placement, interruptible, pin, + bo_free); + if (unlikely(ret != 0)) + goto out_error; + + return ret; +out_error: + kfree(*p_bo); + *p_bo = NULL; + return ret; +} + /** * vmw_bo_init - Initialize a vmw buffer object * @@ -533,192 +472,44 @@ int vmw_bo_init(struct vmw_private *dev_priv, bool interruptible, bool pin, void (*bo_free)(struct ttm_buffer_object *bo)) { - struct ttm_operation_ctx ctx = { interruptible, false }; + struct ttm_operation_ctx ctx = { + .interruptible = interruptible, + .no_wait_gpu = false + }; struct ttm_device *bdev = &dev_priv->bdev; - size_t acc_size; + struct drm_device *vdev = &dev_priv->drm; int ret; - bool user = (bo_free == &vmw_user_bo_destroy); - - WARN_ON_ONCE(!bo_free && (!user && (bo_free != vmw_bo_bo_free))); - acc_size = vmw_bo_acc_size(dev_priv, size, user); + WARN_ON_ONCE(!bo_free); memset(vmw_bo, 0, sizeof(*vmw_bo)); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->base.priority = 3; vmw_bo->res_tree = RB_ROOT; - ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); - if (unlikely(ret)) - return ret; - - vmw_bo->base.base.size = size; - dma_resv_init(&vmw_bo->base.base._resv); - drm_vma_node_reset(&vmw_bo->base.base.vma_node); + size = ALIGN(size, PAGE_SIZE); + drm_gem_private_object_init(vdev, &vmw_bo->base.base, size); ret = ttm_bo_init_reserved(bdev, &vmw_bo->base, size, - ttm_bo_type_device, placement, + ttm_bo_type_device, + placement, 0, &ctx, NULL, NULL, bo_free); if (unlikely(ret)) { - ttm_mem_global_free(&ttm_mem_glob, acc_size); return ret; } if (pin) ttm_bo_pin(&vmw_bo->base); ttm_bo_unreserve(&vmw_bo->base); - return 0; -} - - -/** - * vmw_user_bo_release - TTM reference base object release callback for - * vmw user buffer objects - * - * @p_base: The TTM base object pointer about to be unreferenced. - * - * Clears the TTM base object pointer and drops the reference the - * base object has on the underlying struct vmw_buffer_object. - */ -static void vmw_user_bo_release(struct ttm_base_object **p_base) -{ - struct vmw_user_buffer_object *vmw_user_bo; - struct ttm_base_object *base = *p_base; - - *p_base = NULL; - - if (unlikely(base == NULL)) - return; - - vmw_user_bo = container_of(base, struct vmw_user_buffer_object, - prime.base); - ttm_bo_put(&vmw_user_bo->vbo.base); -} - - -/** - * vmw_user_bo_ref_obj_release - TTM synccpu reference object release callback - * for vmw user buffer objects - * - * @base: Pointer to the TTM base object - * @ref_type: Reference type of the reference reaching zero. - * - * Called when user-space drops its last synccpu reference on the buffer - * object, Either explicitly or as part of a cleanup file close. - */ -static void vmw_user_bo_ref_obj_release(struct ttm_base_object *base, - enum ttm_ref_type ref_type) -{ - struct vmw_user_buffer_object *user_bo; - - user_bo = container_of(base, struct vmw_user_buffer_object, prime.base); - - switch (ref_type) { - case TTM_REF_SYNCCPU_WRITE: - atomic_dec(&user_bo->vbo.cpu_writers); - break; - default: - WARN_ONCE(true, "Undefined buffer object reference release.\n"); - } -} - - -/** - * vmw_user_bo_alloc - Allocate a user buffer object - * - * @dev_priv: Pointer to a struct device private. - * @tfile: Pointer to a struct ttm_object_file on which to register the user - * object. - * @size: Size of the buffer object. - * @shareable: Boolean whether the buffer is shareable with other open files. - * @handle: Pointer to where the handle value should be assigned. - * @p_vbo: Pointer to where the refcounted struct vmw_buffer_object pointer - * should be assigned. - * @p_base: The TTM base object pointer about to be allocated. - * Return: Zero on success, negative error code on error. - */ -int vmw_user_bo_alloc(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t size, - bool shareable, - uint32_t *handle, - struct vmw_buffer_object **p_vbo, - struct ttm_base_object **p_base) -{ - struct vmw_user_buffer_object *user_bo; - int ret; - - user_bo = kzalloc(sizeof(*user_bo), GFP_KERNEL); - if (unlikely(!user_bo)) { - DRM_ERROR("Failed to allocate a buffer.\n"); - return -ENOMEM; - } - - ret = vmw_bo_init(dev_priv, &user_bo->vbo, size, - (dev_priv->has_mob) ? - &vmw_sys_placement : - &vmw_vram_sys_placement, true, false, - &vmw_user_bo_destroy); - if (unlikely(ret != 0)) - return ret; - - ttm_bo_get(&user_bo->vbo.base); - ret = ttm_prime_object_init(tfile, - size, - &user_bo->prime, - shareable, - ttm_buffer_type, - &vmw_user_bo_release, - &vmw_user_bo_ref_obj_release); - if (unlikely(ret != 0)) { - ttm_bo_put(&user_bo->vbo.base); - goto out_no_base_object; - } - - *p_vbo = &user_bo->vbo; - if (p_base) { - *p_base = &user_bo->prime.base; - kref_get(&(*p_base)->refcount); - } - *handle = user_bo->prime.base.handle; - -out_no_base_object: - return ret; -} - -/** - * vmw_user_bo_verify_access - verify access permissions on this - * buffer object. - * - * @bo: Pointer to the buffer object being accessed - * @tfile: Identifying the caller. - */ -int vmw_user_bo_verify_access(struct ttm_buffer_object *bo, - struct ttm_object_file *tfile) -{ - struct vmw_user_buffer_object *vmw_user_bo; - - if (unlikely(bo->destroy != vmw_user_bo_destroy)) - return -EPERM; - - vmw_user_bo = vmw_user_buffer_object(bo); - - /* Check that the caller has opened the object. */ - if (likely(ttm_ref_object_exists(tfile, &vmw_user_bo->prime.base))) - return 0; - - DRM_ERROR("Could not grant buffer access.\n"); - return -EPERM; + return 0; } - /** - * vmw_user_bo_synccpu_grab - Grab a struct vmw_user_buffer_object for cpu + * vmw_user_bo_synccpu_grab - Grab a struct vmw_buffer_object for cpu * access, idling previous GPU operations on the buffer and optionally * blocking it for further command submissions. * - * @user_bo: Pointer to the buffer object being grabbed for CPU access - * @tfile: Identifying the caller. + * @vmw_bo: Pointer to the buffer object being grabbed for CPU access * @flags: Flags indicating how the grab should be performed. * Return: Zero on success, Negative error code on error. In particular, * -EBUSY will be returned if a dontblock operation is requested and the @@ -727,13 +518,11 @@ int vmw_user_bo_verify_access(struct ttm_buffer_object *bo, * * A blocking grab will be automatically released when @tfile is closed. */ -static int vmw_user_bo_synccpu_grab(struct vmw_user_buffer_object *user_bo, - struct ttm_object_file *tfile, +static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo, uint32_t flags) { bool nonblock = !!(flags & drm_vmw_synccpu_dontblock); - struct ttm_buffer_object *bo = &user_bo->vbo.base; - bool existed; + struct ttm_buffer_object *bo = &vmw_bo->base; int ret; if (flags & drm_vmw_synccpu_allow_cs) { @@ -755,17 +544,12 @@ static int vmw_user_bo_synccpu_grab(struct vmw_user_buffer_object *user_bo, ret = ttm_bo_wait(bo, true, nonblock); if (likely(ret == 0)) - atomic_inc(&user_bo->vbo.cpu_writers); + atomic_inc(&vmw_bo->cpu_writers); ttm_bo_unreserve(bo); if (unlikely(ret != 0)) return ret; - ret = ttm_ref_object_add(tfile, &user_bo->prime.base, - TTM_REF_SYNCCPU_WRITE, &existed, false); - if (ret != 0 || existed) - atomic_dec(&user_bo->vbo.cpu_writers); - return ret; } @@ -773,19 +557,25 @@ static int vmw_user_bo_synccpu_grab(struct vmw_user_buffer_object *user_bo, * vmw_user_bo_synccpu_release - Release a previous grab for CPU access, * and unblock command submission on the buffer if blocked. * + * @filp: Identifying the caller. * @handle: Handle identifying the buffer object. - * @tfile: Identifying the caller. * @flags: Flags indicating the type of release. */ -static int vmw_user_bo_synccpu_release(uint32_t handle, - struct ttm_object_file *tfile, - uint32_t flags) +static int vmw_user_bo_synccpu_release(struct drm_file *filp, + uint32_t handle, + uint32_t flags) { - if (!(flags & drm_vmw_synccpu_allow_cs)) - return ttm_ref_object_base_unref(tfile, handle, - TTM_REF_SYNCCPU_WRITE); + struct vmw_buffer_object *vmw_bo; + int ret = vmw_user_bo_lookup(filp, handle, &vmw_bo); - return 0; + if (!ret) { + if (!(flags & drm_vmw_synccpu_allow_cs)) { + atomic_dec(&vmw_bo->cpu_writers); + } + ttm_bo_put(&vmw_bo->base); + } + + return ret; } @@ -807,9 +597,6 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, struct drm_vmw_synccpu_arg *arg = (struct drm_vmw_synccpu_arg *) data; struct vmw_buffer_object *vbo; - struct vmw_user_buffer_object *user_bo; - struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct ttm_base_object *buffer_base; int ret; if ((arg->flags & (drm_vmw_synccpu_read | drm_vmw_synccpu_write)) == 0 @@ -822,16 +609,12 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, switch (arg->op) { case drm_vmw_synccpu_grab: - ret = vmw_user_bo_lookup(tfile, arg->handle, &vbo, - &buffer_base); + ret = vmw_user_bo_lookup(file_priv, arg->handle, &vbo); if (unlikely(ret != 0)) return ret; - user_bo = container_of(vbo, struct vmw_user_buffer_object, - vbo); - ret = vmw_user_bo_synccpu_grab(user_bo, tfile, arg->flags); + ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); vmw_bo_unreference(&vbo); - ttm_base_object_unref(&buffer_base); if (unlikely(ret != 0 && ret != -ERESTARTSYS && ret != -EBUSY)) { DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n", @@ -840,7 +623,8 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, } break; case drm_vmw_synccpu_release: - ret = vmw_user_bo_synccpu_release(arg->handle, tfile, + ret = vmw_user_bo_synccpu_release(file_priv, + arg->handle, arg->flags); if (unlikely(ret != 0)) { DRM_ERROR("Failed synccpu release on handle 0x%08x.\n", @@ -856,50 +640,6 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, return 0; } - -/** - * vmw_bo_alloc_ioctl - ioctl function implementing the buffer object - * allocation functionality. - * - * @dev: Identifies the drm device. - * @data: Pointer to the ioctl argument. - * @file_priv: Identifies the caller. - * Return: Zero on success, negative error code on error. - * - * This function checks the ioctl arguments for validity and allocates a - * struct vmw_user_buffer_object bo. - */ -int vmw_bo_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct vmw_private *dev_priv = vmw_priv(dev); - union drm_vmw_alloc_dmabuf_arg *arg = - (union drm_vmw_alloc_dmabuf_arg *)data; - struct drm_vmw_alloc_dmabuf_req *req = &arg->req; - struct drm_vmw_dmabuf_rep *rep = &arg->rep; - struct vmw_buffer_object *vbo; - uint32_t handle; - int ret; - - ret = vmw_user_bo_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, - req->size, false, &handle, &vbo, - NULL); - if (unlikely(ret != 0)) - goto out_no_bo; - - rep->handle = handle; - rep->map_handle = drm_vma_node_offset_addr(&vbo->base.base.vma_node); - rep->cur_gmr_id = handle; - rep->cur_gmr_offset = 0; - - vmw_bo_unreference(&vbo); - -out_no_bo: - - return ret; -} - - /** * vmw_bo_unref_ioctl - Generic handle close ioctl. * @@ -917,65 +657,48 @@ int vmw_bo_unref_ioctl(struct drm_device *dev, void *data, struct drm_vmw_unref_dmabuf_arg *arg = (struct drm_vmw_unref_dmabuf_arg *)data; - return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile, - arg->handle, - TTM_REF_USAGE); + drm_gem_handle_delete(file_priv, arg->handle); + return 0; } /** * vmw_user_bo_lookup - Look up a vmw user buffer object from a handle. * - * @tfile: The TTM object file the handle is registered with. + * @filp: The file the handle is registered with. * @handle: The user buffer object handle * @out: Pointer to a where a pointer to the embedded * struct vmw_buffer_object should be placed. - * @p_base: Pointer to where a pointer to the TTM base object should be - * placed, or NULL if no such pointer is required. * Return: Zero on success, Negative error code on error. * - * Both the output base object pointer and the vmw buffer object pointer - * will be refcounted. + * The vmw buffer object pointer will be refcounted. */ -int vmw_user_bo_lookup(struct ttm_object_file *tfile, - uint32_t handle, struct vmw_buffer_object **out, - struct ttm_base_object **p_base) +int vmw_user_bo_lookup(struct drm_file *filp, + uint32_t handle, + struct vmw_buffer_object **out) { - struct vmw_user_buffer_object *vmw_user_bo; - struct ttm_base_object *base; + struct drm_gem_object *gobj; - base = ttm_base_object_lookup(tfile, handle); - if (unlikely(base == NULL)) { + gobj = drm_gem_object_lookup(filp, handle); + if (!gobj) { DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", (unsigned long)handle); return -ESRCH; } - if (unlikely(ttm_base_object_type(base) != ttm_buffer_type)) { - ttm_base_object_unref(&base); - DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", - (unsigned long)handle); - return -EINVAL; - } - - vmw_user_bo = container_of(base, struct vmw_user_buffer_object, - prime.base); - ttm_bo_get(&vmw_user_bo->vbo.base); - if (p_base) - *p_base = base; - else - ttm_base_object_unref(&base); - *out = &vmw_user_bo->vbo; + *out = gem_to_vmw_bo(gobj); + ttm_bo_get(&(*out)->base); + drm_gem_object_put(gobj); return 0; } /** * vmw_user_bo_noref_lookup - Look up a vmw user buffer object without reference - * @tfile: The TTM object file the handle is registered with. + * @filp: The TTM object file the handle is registered with. * @handle: The user buffer object handle. * - * This function looks up a struct vmw_user_bo and returns a pointer to the + * This function looks up a struct vmw_bo and returns a pointer to the * struct vmw_buffer_object it derives from without refcounting the pointer. * The returned pointer is only valid until vmw_user_bo_noref_release() is * called, and the object pointed to by the returned pointer may be doomed. @@ -988,52 +711,23 @@ int vmw_user_bo_lookup(struct ttm_object_file *tfile, * error pointer on failure. */ struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct ttm_object_file *tfile, u32 handle) +vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle) { - struct vmw_user_buffer_object *vmw_user_bo; - struct ttm_base_object *base; + struct vmw_buffer_object *vmw_bo; + struct ttm_buffer_object *bo; + struct drm_gem_object *gobj = drm_gem_object_lookup(filp, handle); - base = ttm_base_object_noref_lookup(tfile, handle); - if (!base) { + if (!gobj) { DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", (unsigned long)handle); return ERR_PTR(-ESRCH); } + vmw_bo = gem_to_vmw_bo(gobj); + bo = ttm_bo_get_unless_zero(&vmw_bo->base); + vmw_bo = vmw_buffer_object(bo); + drm_gem_object_put(gobj); - if (unlikely(ttm_base_object_type(base) != ttm_buffer_type)) { - ttm_base_object_noref_release(); - DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", - (unsigned long)handle); - return ERR_PTR(-EINVAL); - } - - vmw_user_bo = container_of(base, struct vmw_user_buffer_object, - prime.base); - return &vmw_user_bo->vbo; -} - -/** - * vmw_user_bo_reference - Open a handle to a vmw user buffer object. - * - * @tfile: The TTM object file to register the handle with. - * @vbo: The embedded vmw buffer object. - * @handle: Pointer to where the new handle should be placed. - * Return: Zero on success, Negative error code on error. - */ -int vmw_user_bo_reference(struct ttm_object_file *tfile, - struct vmw_buffer_object *vbo, - uint32_t *handle) -{ - struct vmw_user_buffer_object *user_bo; - - if (vbo->base.destroy != vmw_user_bo_destroy) - return -EINVAL; - - user_bo = container_of(vbo, struct vmw_user_buffer_object, vbo); - - *handle = user_bo->prime.base.handle; - return ttm_ref_object_add(tfile, &user_bo->prime.base, - TTM_REF_USAGE, NULL, false); + return vmw_bo; } @@ -1087,68 +781,15 @@ int vmw_dumb_create(struct drm_file *file_priv, int ret; args->pitch = args->width * ((args->bpp + 7) / 8); - args->size = args->pitch * args->height; + args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); - ret = vmw_user_bo_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, - args->size, false, &args->handle, - &vbo, NULL); - if (unlikely(ret != 0)) - goto out_no_bo; + ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, + args->size, &args->handle, + &vbo); - vmw_bo_unreference(&vbo); -out_no_bo: return ret; } - -/** - * vmw_dumb_map_offset - Return the address space offset of a dumb buffer - * - * @file_priv: Pointer to a struct drm_file identifying the caller. - * @dev: Pointer to the drm device. - * @handle: Handle identifying the dumb buffer. - * @offset: The address space offset returned. - * Return: Zero on success, negative error code on failure. - * - * This is a driver callback for the core drm dumb_map_offset functionality. - */ -int vmw_dumb_map_offset(struct drm_file *file_priv, - struct drm_device *dev, uint32_t handle, - uint64_t *offset) -{ - struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct vmw_buffer_object *out_buf; - int ret; - - ret = vmw_user_bo_lookup(tfile, handle, &out_buf, NULL); - if (ret != 0) - return -EINVAL; - - *offset = drm_vma_node_offset_addr(&out_buf->base.base.vma_node); - vmw_bo_unreference(&out_buf); - return 0; -} - - -/** - * vmw_dumb_destroy - Destroy a dumb boffer - * - * @file_priv: Pointer to a struct drm_file identifying the caller. - * @dev: Pointer to the drm device. - * @handle: Handle identifying the dumb buffer. - * Return: Zero on success, negative error code on failure. - * - * This is a driver callback for the core drm dumb_destroy functionality. - */ -int vmw_dumb_destroy(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle) -{ - return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile, - handle, TTM_REF_USAGE); -} - - /** * vmw_bo_swap_notify - swapout notify callback. * @@ -1157,8 +798,7 @@ int vmw_dumb_destroy(struct drm_file *file_priv, void vmw_bo_swap_notify(struct ttm_buffer_object *bo) { /* Is @bo embedded in a struct vmw_buffer_object? */ - if (bo->destroy != vmw_bo_bo_free && - bo->destroy != vmw_user_bo_destroy) + if (vmw_bo_is_vmw_bo(bo)) return; /* Kill any cached kernel maps before swapout */ @@ -1182,8 +822,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct vmw_buffer_object *vbo; /* Make sure @bo is embedded in a struct vmw_buffer_object? */ - if (bo->destroy != vmw_bo_bo_free && - bo->destroy != vmw_user_bo_destroy) + if (vmw_bo_is_vmw_bo(bo)) return; vbo = container_of(bo, struct vmw_buffer_object, base); @@ -1204,3 +843,22 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB) vmw_resource_unbind_list(vbo); } + +/** + * vmw_bo_is_vmw_bo - check if the buffer object is a &vmw_buffer_object + * @bo: buffer object to be checked + * + * Uses destroy function associated with the object to determine if this is + * a &vmw_buffer_object. + * + * Returns: + * true if the object is of &vmw_buffer_object type, false if not. + */ +bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo) +{ + if (bo->destroy == &vmw_bo_bo_free || + bo->destroy == &vmw_gem_destroy) + return true; + + return false; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index 67db472d3493..a3bfbb6c3e14 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -145,6 +145,13 @@ struct vmw_fifo_state *vmw_fifo_create(struct vmw_private *dev_priv) (unsigned int) max, (unsigned int) min, (unsigned int) fifo->capabilities); + + if (unlikely(min >= max)) { + drm_warn(&dev_priv->drm, + "FIFO memory is not usable. Driver failed to initialize."); + return ERR_PTR(-ENXIO); + } + return fifo; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 8381750db81b..415774fde796 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -42,7 +42,7 @@ */ struct vmw_cmdbuf_res { struct vmw_resource *res; - struct drm_hash_item hash; + struct vmwgfx_hash_item hash; struct list_head head; enum vmw_cmdbuf_res_state state; struct vmw_cmdbuf_res_manager *man; @@ -59,7 +59,7 @@ struct vmw_cmdbuf_res { * @resources and @list are protected by the cmdbuf mutex for now. */ struct vmw_cmdbuf_res_manager { - struct drm_open_hash resources; + struct vmwgfx_open_hash resources; struct list_head list; struct vmw_private *dev_priv; }; @@ -81,11 +81,11 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man, enum vmw_cmdbuf_res_type res_type, u32 user_key) { - struct drm_hash_item *hash; + struct vmwgfx_hash_item *hash; int ret; unsigned long key = user_key | (res_type << 24); - ret = drm_ht_find_item(&man->resources, key, &hash); + ret = vmwgfx_ht_find_item(&man->resources, key, &hash); if (unlikely(ret != 0)) return ERR_PTR(ret); @@ -105,7 +105,7 @@ static void vmw_cmdbuf_res_free(struct vmw_cmdbuf_res_manager *man, struct vmw_cmdbuf_res *entry) { list_del(&entry->head); - WARN_ON(drm_ht_remove_item(&man->resources, &entry->hash)); + WARN_ON(vmwgfx_ht_remove_item(&man->resources, &entry->hash)); vmw_resource_unreference(&entry->res); kfree(entry); } @@ -167,7 +167,7 @@ void vmw_cmdbuf_res_revert(struct list_head *list) vmw_cmdbuf_res_free(entry->man, entry); break; case VMW_CMDBUF_RES_DEL: - ret = drm_ht_insert_item(&entry->man->resources, &entry->hash); + ret = vmwgfx_ht_insert_item(&entry->man->resources, &entry->hash); BUG_ON(ret); list_move_tail(&entry->head, &entry->man->list); entry->state = VMW_CMDBUF_RES_COMMITTED; @@ -206,7 +206,7 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, return -ENOMEM; cres->hash.key = user_key | (res_type << 24); - ret = drm_ht_insert_item(&man->resources, &cres->hash); + ret = vmwgfx_ht_insert_item(&man->resources, &cres->hash); if (unlikely(ret != 0)) { kfree(cres); goto out_invalid_key; @@ -244,10 +244,10 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man, struct vmw_resource **res_p) { struct vmw_cmdbuf_res *entry; - struct drm_hash_item *hash; + struct vmwgfx_hash_item *hash; int ret; - ret = drm_ht_find_item(&man->resources, user_key | (res_type << 24), + ret = vmwgfx_ht_find_item(&man->resources, user_key | (res_type << 24), &hash); if (likely(ret != 0)) return -EINVAL; @@ -260,7 +260,7 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man, *res_p = NULL; break; case VMW_CMDBUF_RES_COMMITTED: - (void) drm_ht_remove_item(&man->resources, &entry->hash); + (void) vmwgfx_ht_remove_item(&man->resources, &entry->hash); list_del(&entry->head); entry->state = VMW_CMDBUF_RES_DEL; list_add_tail(&entry->head, list); @@ -295,7 +295,7 @@ vmw_cmdbuf_res_man_create(struct vmw_private *dev_priv) man->dev_priv = dev_priv; INIT_LIST_HEAD(&man->list); - ret = drm_ht_create(&man->resources, VMW_CMDBUF_RES_MAN_HT_ORDER); + ret = vmwgfx_ht_create(&man->resources, VMW_CMDBUF_RES_MAN_HT_ORDER); if (ret == 0) return man; @@ -320,26 +320,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man) list_for_each_entry_safe(entry, next, &man->list, head) vmw_cmdbuf_res_free(man, entry); - drm_ht_remove(&man->resources); + vmwgfx_ht_remove(&man->resources); kfree(man); } -/** - * vmw_cmdbuf_res_man_size - Return the size of a command buffer managed - * resource manager - * - * Returns the approximate allocation size of a command buffer managed - * resource manager. - */ -size_t vmw_cmdbuf_res_man_size(void) -{ - static size_t res_man_size; - - if (unlikely(res_man_size == 0)) - res_man_size = - ttm_round_pot(sizeof(struct vmw_cmdbuf_res_manager)) + - ttm_round_pot(sizeof(struct hlist_head) << - VMW_CMDBUF_RES_MAN_HT_ORDER); - - return res_man_size; -} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c index 4446758b6880..e0f48cd9529b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c @@ -60,8 +60,6 @@ static int vmw_dx_context_unbind(struct vmw_resource *res, struct ttm_validate_buffer *val_buf); static int vmw_dx_context_destroy(struct vmw_resource *res); -static uint64_t vmw_user_context_size; - static const struct vmw_user_resource_conv user_context_conv = { .object_type = VMW_RES_CONTEXT, .base_obj_to_res = vmw_user_context_base_to_res, @@ -686,7 +684,6 @@ static void vmw_user_context_free(struct vmw_resource *res) { struct vmw_user_context *ctx = container_of(res, struct vmw_user_context, res); - struct vmw_private *dev_priv = res->dev_priv; if (ctx->cbs) vmw_binding_state_free(ctx->cbs); @@ -694,8 +691,6 @@ static void vmw_user_context_free(struct vmw_resource *res) (void) vmw_context_bind_dx_query(res, NULL); ttm_base_object_kfree(ctx, base); - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_user_context_size); } /* @@ -720,7 +715,7 @@ int vmw_context_destroy_ioctl(struct drm_device *dev, void *data, struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - return ttm_ref_object_base_unref(tfile, arg->cid, TTM_REF_USAGE); + return ttm_ref_object_base_unref(tfile, arg->cid); } static int vmw_context_define(struct drm_device *dev, void *data, @@ -732,10 +727,6 @@ static int vmw_context_define(struct drm_device *dev, void *data, struct vmw_resource *tmp; struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct ttm_operation_ctx ttm_opt_ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; if (!has_sm4_context(dev_priv) && dx) { @@ -743,25 +734,8 @@ static int vmw_context_define(struct drm_device *dev, void *data, return -EINVAL; } - if (unlikely(vmw_user_context_size == 0)) - vmw_user_context_size = ttm_round_pot(sizeof(*ctx)) + - ((dev_priv->has_mob) ? vmw_cmdbuf_res_man_size() : 0) + - + VMW_IDA_ACC_SIZE + TTM_OBJ_EXTRA_SIZE; - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - vmw_user_context_size, - &ttm_opt_ctx); - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for context" - " creation.\n"); - goto out_ret; - } - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (unlikely(!ctx)) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_user_context_size); ret = -ENOMEM; goto out_ret; } @@ -780,7 +754,7 @@ static int vmw_context_define(struct drm_device *dev, void *data, tmp = vmw_resource_reference(&ctx->res); ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT, - &vmw_user_context_base_release, NULL); + &vmw_user_context_base_release); if (unlikely(ret != 0)) { vmw_resource_unreference(&tmp); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index 17a98db00017..16f986b6cbea 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -407,12 +407,8 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) * for the new COTable. Initially pin the buffer object to make sure * we can use tryreserve without failure. */ - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ret = vmw_bo_init(dev_priv, buf, new_size, &vmw_mob_placement, - true, true, vmw_bo_bo_free); + ret = vmw_bo_create(dev_priv, new_size, &vmw_mob_placement, + true, true, vmw_bo_bo_free, &buf); if (ret) { DRM_ERROR("Failed initializing new cotable MOB.\n"); return ret; @@ -546,8 +542,6 @@ static void vmw_hw_cotable_destroy(struct vmw_resource *res) (void) vmw_cotable_destroy(res); } -static size_t cotable_acc_size; - /** * vmw_cotable_free - Cotable resource destructor * @@ -555,10 +549,7 @@ static size_t cotable_acc_size; */ static void vmw_cotable_free(struct vmw_resource *res) { - struct vmw_private *dev_priv = res->dev_priv; - kfree(res); - ttm_mem_global_free(vmw_mem_glob(dev_priv), cotable_acc_size); } /** @@ -574,21 +565,9 @@ struct vmw_resource *vmw_cotable_alloc(struct vmw_private *dev_priv, u32 type) { struct vmw_cotable *vcotbl; - struct ttm_operation_ctx ttm_opt_ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; u32 num_entries; - if (unlikely(cotable_acc_size == 0)) - cotable_acc_size = ttm_round_pot(sizeof(struct vmw_cotable)); - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - cotable_acc_size, &ttm_opt_ctx); - if (unlikely(ret)) - return ERR_PTR(ret); - vcotbl = kzalloc(sizeof(*vcotbl), GFP_KERNEL); if (unlikely(!vcotbl)) { ret = -ENOMEM; @@ -622,7 +601,6 @@ struct vmw_resource *vmw_cotable_alloc(struct vmw_private *dev_priv, out_no_init: kfree(vcotbl); out_no_alloc: - ttm_mem_global_free(vmw_mem_glob(dev_priv), cotable_acc_size); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index bfd71c86faa5..fe36efdb7ff5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,7 +25,6 @@ * **************************************************************************/ -#include <linux/console.h> #include <linux/dma-mapping.h> #include <linux/module.h> #include <linux/pci.h> @@ -35,6 +34,7 @@ #include <drm/drm_drv.h> #include <drm/drm_ioctl.h> #include <drm/drm_sysfs.h> +#include <drm/drm_gem_ttm_helper.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_range_manager.h> #include <drm/ttm/ttm_placement.h> @@ -51,9 +51,6 @@ #define VMW_MIN_INITIAL_WIDTH 800 #define VMW_MIN_INITIAL_HEIGHT 600 -#define VMWGFX_VALIDATION_MEM_GRAN (16*PAGE_SIZE) - - /* * Fully encoded drm commands. Might move to vmw_drm.h */ @@ -166,7 +163,7 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { DRM_IOCTL_DEF_DRV(VMW_GET_PARAM, vmw_getparam_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl, + DRM_IOCTL_DEF_DRV(VMW_ALLOC_DMABUF, vmw_gem_object_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl, DRM_RENDER_ALLOW), @@ -258,8 +255,8 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { }; static const struct pci_device_id vmw_pci_id_list[] = { - { PCI_DEVICE(0x15ad, VMWGFX_PCI_ID_SVGA2) }, - { PCI_DEVICE(0x15ad, VMWGFX_PCI_ID_SVGA3) }, + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, VMWGFX_PCI_ID_SVGA2) }, + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, VMWGFX_PCI_ID_SVGA3) }, { } }; MODULE_DEVICE_TABLE(pci, vmw_pci_id_list); @@ -367,6 +364,7 @@ static void vmw_print_sm_type(struct vmw_private *dev_priv) [VMW_SM_4] = "SM4", [VMW_SM_4_1] = "SM4_1", [VMW_SM_5] = "SM_5", + [VMW_SM_5_1X] = "SM_5_1X", [VMW_SM_MAX] = "Invalid" }; BUILD_BUG_ON(ARRAY_SIZE(names) != (VMW_SM_MAX + 1)); @@ -400,13 +398,9 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) * immediately succeed. This is because we're the only * user of the bo currently. */ - vbo = kzalloc(sizeof(*vbo), GFP_KERNEL); - if (!vbo) - return -ENOMEM; - - ret = vmw_bo_init(dev_priv, vbo, PAGE_SIZE, - &vmw_sys_placement, false, true, - &vmw_bo_bo_free); + ret = vmw_bo_create(dev_priv, PAGE_SIZE, + &vmw_sys_placement, false, true, + &vmw_bo_bo_free, &vbo); if (unlikely(ret != 0)) return ret; @@ -707,23 +701,15 @@ static int vmw_dma_masks(struct vmw_private *dev_priv) static int vmw_vram_manager_init(struct vmw_private *dev_priv) { int ret; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - ret = vmw_thp_init(dev_priv); -#else ret = ttm_range_man_init(&dev_priv->bdev, TTM_PL_VRAM, false, dev_priv->vram_size >> PAGE_SHIFT); -#endif ttm_resource_manager_set_used(ttm_manager_type(&dev_priv->bdev, TTM_PL_VRAM), false); return ret; } static void vmw_vram_manager_fini(struct vmw_private *dev_priv) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - vmw_thp_fini(dev_priv); -#else ttm_range_man_fini(&dev_priv->bdev, TTM_PL_VRAM); -#endif } static int vmw_setup_pci_resources(struct vmw_private *dev, @@ -987,8 +973,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) goto out_err0; } - dev_priv->tdev = ttm_object_device_init(&ttm_mem_glob, 12, - &vmw_prime_dmabuf_ops); + dev_priv->tdev = ttm_object_device_init(12, &vmw_prime_dmabuf_ops); if (unlikely(dev_priv->tdev == NULL)) { drm_err(&dev_priv->drm, @@ -1071,6 +1056,12 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) "3D will be disabled.\n"); dev_priv->has_mob = false; } + if (vmw_sys_man_init(dev_priv) != 0) { + drm_info(&dev_priv->drm, + "No MOB page table memory available. " + "3D will be disabled.\n"); + dev_priv->has_mob = false; + } } if (dev_priv->has_mob && (dev_priv->capabilities & SVGA_CAP_DX)) { @@ -1078,8 +1069,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->sm_type = VMW_SM_4; } - vmw_validation_mem_init_ttm(dev_priv, VMWGFX_VALIDATION_MEM_GRAN); - /* SVGA_CAP2_DX2 (DefineGBSurface_v3) is needed for SM4_1 support */ if (has_sm4_context(dev_priv) && (dev_priv->capabilities2 & SVGA_CAP2_DX2)) { @@ -1087,8 +1076,11 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->sm_type = VMW_SM_4_1; if (has_sm4_1_context(dev_priv) && (dev_priv->capabilities2 & SVGA_CAP2_DX3)) { - if (vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_SM5)) + if (vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_SM5)) { dev_priv->sm_type = VMW_SM_5; + if (vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_GL43)) + dev_priv->sm_type = VMW_SM_5_1X; + } } } @@ -1121,8 +1113,10 @@ out_no_fifo: vmw_overlay_close(dev_priv); vmw_kms_close(dev_priv); out_no_kms: - if (dev_priv->has_mob) + if (dev_priv->has_mob) { vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB); + vmw_sys_man_fini(dev_priv); + } if (dev_priv->has_gmr) vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR); vmw_devcaps_destroy(dev_priv); @@ -1156,7 +1150,7 @@ static void vmw_driver_unload(struct drm_device *dev) unregister_pm_notifier(&dev_priv->pm_nb); if (dev_priv->ctx.res_ht_initialized) - drm_ht_remove(&dev_priv->ctx.res_ht); + vmwgfx_ht_remove(&dev_priv->ctx.res_ht); vfree(dev_priv->ctx.cmd_bounce); if (dev_priv->enable_fb) { vmw_fb_off(dev_priv); @@ -1172,8 +1166,10 @@ static void vmw_driver_unload(struct drm_device *dev) vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR); vmw_release_device_early(dev_priv); - if (dev_priv->has_mob) + if (dev_priv->has_mob) { vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB); + vmw_sys_man_fini(dev_priv); + } vmw_devcaps_destroy(dev_priv); vmw_vram_manager_fini(dev_priv); ttm_device_fini(&dev_priv->bdev); @@ -1388,7 +1384,6 @@ static void vmw_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - ttm_mem_global_release(&ttm_mem_glob); drm_dev_unregister(dev); vmw_driver_unload(dev); } @@ -1576,7 +1571,7 @@ static const struct file_operations vmwgfx_driver_fops = { static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC, + DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM, .ioctls = vmw_ioctls, .num_ioctls = ARRAY_SIZE(vmw_ioctls), .master_set = vmw_master_set, @@ -1585,8 +1580,7 @@ static const struct drm_driver driver = { .postclose = vmw_postclose, .dumb_create = vmw_dumb_create, - .dumb_map_offset = vmw_dumb_map_offset, - .dumb_destroy = vmw_dumb_destroy, + .dumb_map_offset = drm_gem_ttm_dumb_map_offset, .prime_fd_to_handle = vmw_prime_fd_to_handle, .prime_handle_to_fd = vmw_prime_handle_to_fd, @@ -1617,41 +1611,43 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); if (ret) - return ret; + goto out_error; ret = pcim_enable_device(pdev); if (ret) - return ret; + goto out_error; vmw = devm_drm_dev_alloc(&pdev->dev, &driver, struct vmw_private, drm); - if (IS_ERR(vmw)) - return PTR_ERR(vmw); + if (IS_ERR(vmw)) { + ret = PTR_ERR(vmw); + goto out_error; + } pci_set_drvdata(pdev, &vmw->drm); - ret = ttm_mem_global_init(&ttm_mem_glob, &pdev->dev); - if (ret) - return ret; - ret = vmw_driver_load(vmw, ent->device); if (ret) - return ret; + goto out_error; ret = drm_dev_register(&vmw->drm, 0); - if (ret) { - vmw_driver_unload(&vmw->drm); - return ret; - } + if (ret) + goto out_unload; + + vmw_debugfs_gem_init(vmw); return 0; +out_unload: + vmw_driver_unload(&vmw->drm); +out_error: + return ret; } static int __init vmwgfx_init(void) { int ret; - if (vgacon_text_force()) + if (drm_firmware_drivers_only()) return -EINVAL; ret = pci_register_driver(&vmw_pci_driver); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 858aff99a3fe..d6b66636a19b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -34,7 +34,6 @@ #include <drm/drm_auth.h> #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_hashtab.h> #include <drm/drm_rect.h> #include <drm/ttm/ttm_bo_driver.h> @@ -43,6 +42,7 @@ #include "ttm_object.h" #include "vmwgfx_fence.h" +#include "vmwgfx_hashtab.h" #include "vmwgfx_reg.h" #include "vmwgfx_validation.h" @@ -54,16 +54,13 @@ #define VMWGFX_DRIVER_NAME "vmwgfx" -#define VMWGFX_DRIVER_DATE "20210722" +#define VMWGFX_DRIVER_DATE "20211206" #define VMWGFX_DRIVER_MAJOR 2 -#define VMWGFX_DRIVER_MINOR 19 +#define VMWGFX_DRIVER_MINOR 20 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) -#define VMWGFX_MAX_RELOCATIONS 2048 -#define VMWGFX_MAX_VALIDATIONS 2048 #define VMWGFX_MAX_DISPLAYS 16 #define VMWGFX_CMD_BOUNCE_INIT_SIZE 32768 -#define VMWGFX_ENABLE_SCREEN_TARGET_OTABLE 1 #define VMWGFX_PCI_ID_SVGA2 0x0405 #define VMWGFX_PCI_ID_SVGA3 0x0406 @@ -82,8 +79,9 @@ VMWGFX_NUM_GB_SURFACE +\ VMWGFX_NUM_GB_SCREEN_TARGET) -#define VMW_PL_GMR (TTM_PL_PRIV + 0) -#define VMW_PL_MOB (TTM_PL_PRIV + 1) +#define VMW_PL_GMR (TTM_PL_PRIV + 0) +#define VMW_PL_MOB (TTM_PL_PRIV + 1) +#define VMW_PL_SYSTEM (TTM_PL_PRIV + 2) #define VMW_RES_CONTEXT ttm_driver_type0 #define VMW_RES_SURFACE ttm_driver_type1 @@ -133,7 +131,7 @@ struct vmw_buffer_object { */ struct vmw_validate_buffer { struct ttm_validate_buffer base; - struct drm_hash_item hash; + struct vmwgfx_hash_item hash; bool validate_as_mob; }; @@ -332,7 +330,6 @@ struct vmw_sg_table { struct page **pages; const dma_addr_t *addrs; struct sg_table *sgt; - unsigned long num_regions; unsigned long num_pages; }; @@ -360,6 +357,19 @@ struct vmw_piter { dma_addr_t (*dma_address)(struct vmw_piter *); }; + +struct vmw_ttm_tt { + struct ttm_tt dma_ttm; + struct vmw_private *dev_priv; + int gmr_id; + struct vmw_mob *mob; + int mem_type; + struct sg_table sgt; + struct vmw_sg_table vsgt; + bool mapped; + bool bound; +}; + /* * enum vmw_display_unit_type - Describes the display unit */ @@ -406,10 +416,11 @@ struct vmw_ctx_validation_info; * @ctx: The validation context */ struct vmw_sw_context{ - struct drm_open_hash res_ht; + struct vmwgfx_open_hash res_ht; bool res_ht_initialized; bool kernel; struct vmw_fpriv *fp; + struct drm_file *filp; uint32_t *cmd_bounce; uint32_t cmd_bounce_size; struct vmw_buffer_object *cur_query_bo; @@ -473,6 +484,7 @@ enum { * @VMW_SM_4: Context support upto SM4. * @VMW_SM_4_1: Context support upto SM4_1. * @VMW_SM_5: Context support up to SM5. + * @VMW_SM_5_1X: Adds support for sm5_1 and gl43 extensions. * @VMW_SM_MAX: Should be the last. */ enum vmw_sm_type { @@ -480,6 +492,7 @@ enum vmw_sm_type { VMW_SM_4, VMW_SM_4_1, VMW_SM_5, + VMW_SM_5_1X, VMW_SM_MAX }; @@ -627,9 +640,6 @@ struct vmw_private { struct vmw_cmdbuf_man *cman; DECLARE_BITMAP(irqthread_pending, VMW_IRQTHREAD_MAX); - /* Validation memory reservation */ - struct vmw_validation_mem vvm; - uint32 *devcaps; /* @@ -645,6 +655,11 @@ struct vmw_private { #endif }; +static inline struct vmw_buffer_object *gem_to_vmw_bo(struct drm_gem_object *gobj) +{ + return container_of((gobj), struct vmw_buffer_object, base.base); +} + static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) { return container_of(res, struct vmw_surface, res); @@ -738,6 +753,24 @@ static inline bool has_sm5_context(const struct vmw_private *dev_priv) return (dev_priv->sm_type >= VMW_SM_5); } +/** + * has_gl43_context - Does the device support GL43 context. + * @dev_priv: Device private. + * + * Return: Bool value if device support SM5 context or not. + */ +static inline bool has_gl43_context(const struct vmw_private *dev_priv) +{ + return (dev_priv->sm_type >= VMW_SM_5_1X); +} + + +static inline u32 vmw_max_num_uavs(struct vmw_private *dev_priv) +{ + return (has_gl43_context(dev_priv) ? + SVGA3D_DX11_1_MAX_UAVIEWS : SVGA3D_MAX_UAVIEWS); +} + extern void vmw_svga_enable(struct vmw_private *dev_priv); extern void vmw_svga_disable(struct vmw_private *dev_priv); @@ -767,7 +800,7 @@ extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); extern int vmw_user_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, + struct drm_file *filp, uint32_t handle, struct vmw_surface **out_surf, struct vmw_buffer_object **out_buf); @@ -833,6 +866,7 @@ static inline void vmw_user_resource_noref_release(void) /** * Buffer object helper functions - vmwgfx_bo.c */ +extern bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo); extern int vmw_bo_pin_in_placement(struct vmw_private *vmw_priv, struct vmw_buffer_object *bo, struct ttm_placement *placement, @@ -857,32 +891,23 @@ extern int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size, struct ttm_placement *placement, struct ttm_buffer_object **p_bo); +extern int vmw_bo_create(struct vmw_private *dev_priv, + size_t size, struct ttm_placement *placement, + bool interruptible, bool pin, + void (*bo_free)(struct ttm_buffer_object *bo), + struct vmw_buffer_object **p_bo); extern int vmw_bo_init(struct vmw_private *dev_priv, struct vmw_buffer_object *vmw_bo, size_t size, struct ttm_placement *placement, bool interruptible, bool pin, void (*bo_free)(struct ttm_buffer_object *bo)); -extern int vmw_user_bo_verify_access(struct ttm_buffer_object *bo, - struct ttm_object_file *tfile); -extern int vmw_user_bo_alloc(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t size, - bool shareable, - uint32_t *handle, - struct vmw_buffer_object **p_dma_buf, - struct ttm_base_object **p_base); -extern int vmw_user_bo_reference(struct ttm_object_file *tfile, - struct vmw_buffer_object *dma_buf, - uint32_t *handle); -extern int vmw_bo_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); extern int vmw_bo_unref_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern int vmw_user_bo_lookup(struct ttm_object_file *tfile, - uint32_t id, struct vmw_buffer_object **out, - struct ttm_base_object **base); +extern int vmw_user_bo_lookup(struct drm_file *filp, + uint32_t handle, + struct vmw_buffer_object **out); extern void vmw_bo_fence_single(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); extern void *vmw_bo_map_and_cache(struct vmw_buffer_object *vbo); @@ -891,16 +916,7 @@ extern void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); extern void vmw_bo_swap_notify(struct ttm_buffer_object *bo); extern struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct ttm_object_file *tfile, u32 handle); - -/** - * vmw_user_bo_noref_release - release a buffer object pointer looked up - * without reference - */ -static inline void vmw_user_bo_noref_release(void) -{ - ttm_base_object_noref_release(); -} +vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle); /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority @@ -952,6 +968,19 @@ static inline void vmw_bo_prio_del(struct vmw_buffer_object *vbo, int prio) } /** + * GEM related functionality - vmwgfx_gem.c + */ +extern int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, + struct drm_file *filp, + uint32_t size, + uint32_t *handle, + struct vmw_buffer_object **p_vbo); +extern int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +extern void vmw_gem_destroy(struct ttm_buffer_object *bo); +extern void vmw_debugfs_gem_init(struct vmw_private *vdev); + +/** * Misc Ioctl functionality - vmwgfx_ioctl.c */ @@ -1027,9 +1056,6 @@ vmw_is_cursor_bypass3_enabled(const struct vmw_private *dev_priv) extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma); -extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv, - size_t gran); - /** * TTM buffer object driver - vmwgfx_ttm_buffer.c */ @@ -1039,7 +1065,6 @@ extern struct ttm_placement vmw_vram_placement; extern struct ttm_placement vmw_vram_sys_placement; extern struct ttm_placement vmw_vram_gmr_placement; extern struct ttm_placement vmw_sys_placement; -extern struct ttm_placement vmw_evictable_placement; extern struct ttm_placement vmw_srf_placement; extern struct ttm_placement vmw_mob_placement; extern struct ttm_placement vmw_nonfixed_placement; @@ -1218,13 +1243,6 @@ void vmw_kms_lost_device(struct drm_device *dev); int vmw_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); - -int vmw_dumb_map_offset(struct drm_file *file_priv, - struct drm_device *dev, uint32_t handle, - uint64_t *offset); -int vmw_dumb_destroy(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle); extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); @@ -1252,6 +1270,12 @@ int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type); void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type); /** + * System memory manager + */ +int vmw_sys_man_init(struct vmw_private *dev_priv); +void vmw_sys_man_fini(struct vmw_private *dev_priv); + +/** * Prime - vmwgfx_prime.c */ @@ -1322,18 +1346,6 @@ extern int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_gb_surface_reference_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -int vmw_surface_gb_priv_define(struct drm_device *dev, - uint32_t user_accounting_size, - SVGA3dSurfaceAllFlags svga3d_flags, - SVGA3dSurfaceFormat format, - bool for_scanout, - uint32_t num_mip_levels, - uint32_t multisample_count, - uint32_t array_size, - struct drm_vmw_size size, - SVGA3dMSPattern multisample_pattern, - SVGA3dMSQualityLevel quality_level, - struct vmw_surface **srf_out); extern int vmw_gb_surface_define_ext_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -1342,7 +1354,6 @@ extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev, struct drm_file *file_priv); int vmw_gb_surface_define(struct vmw_private *dev_priv, - uint32_t user_accounting_size, const struct vmw_surface_metadata *req, struct vmw_surface **srf_out); @@ -1403,7 +1414,6 @@ void vmw_dx_streamoutput_cotable_list_scrub(struct vmw_private *dev_priv, extern struct vmw_cmdbuf_res_manager * vmw_cmdbuf_res_man_create(struct vmw_private *dev_priv); extern void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man); -extern size_t vmw_cmdbuf_res_man_size(void); extern struct vmw_resource * vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man, enum vmw_cmdbuf_res_type res_type, @@ -1551,11 +1561,6 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); -/* Transparent hugepage support - vmwgfx_thp.c */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern int vmw_thp_init(struct vmw_private *dev_priv); -void vmw_thp_fini(struct vmw_private *dev_priv); -#endif /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting @@ -1600,11 +1605,6 @@ vmw_bo_reference(struct vmw_buffer_object *buf) return buf; } -static inline struct ttm_mem_global *vmw_mem_glob(struct vmw_private *dev_priv) -{ - return &ttm_mem_glob; -} - static inline void vmw_fifo_resource_inc(struct vmw_private *dev_priv) { atomic_inc(&dev_priv->num_fifo_resources); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 5f2ffa9de5c8..44ca23b0ea4e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1171,14 +1171,13 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->fp->tfile, handle); - if (IS_ERR(vmw_bo)) { + vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); + if (IS_ERR_OR_NULL(vmw_bo)) { VMW_DEBUG_USER("Could not find or use MOB buffer.\n"); return PTR_ERR(vmw_bo); } - ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); - vmw_user_bo_noref_release(); + ttm_bo_put(&vmw_bo->base); if (unlikely(ret != 0)) return ret; @@ -1226,14 +1225,13 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->fp->tfile, handle); - if (IS_ERR(vmw_bo)) { + vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); + if (IS_ERR_OR_NULL(vmw_bo)) { VMW_DEBUG_USER("Could not find or use GMR region.\n"); return PTR_ERR(vmw_bo); } - ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); - vmw_user_bo_noref_release(); + ttm_bo_put(&vmw_bo->base); if (unlikely(ret != 0)) return ret; @@ -2166,6 +2164,44 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv, } /** + * vmw_cmd_dx_set_constant_buffer_offset - Validate + * SVGA_3D_CMD_DX_SET_VS/PS/GS/HS/DS/CS_CONSTANT_BUFFER_OFFSET command. + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int +vmw_cmd_dx_set_constant_buffer_offset(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetConstantBufferOffset); + + struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context); + u32 shader_slot; + + if (!has_sm5_context(dev_priv)) + return -EINVAL; + + if (!ctx_node) + return -EINVAL; + + cmd = container_of(header, typeof(*cmd), header); + if (cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) { + VMW_DEBUG_USER("Illegal const buffer slot %u.\n", + (unsigned int) cmd->body.slot); + return -EINVAL; + } + + shader_slot = cmd->header.id - SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET; + vmw_binding_cb_offset_update(ctx_node->staged, shader_slot, + cmd->body.slot, cmd->body.offsetInBytes); + + return 0; +} + +/** * vmw_cmd_dx_set_shader_res - Validate SVGA_3D_CMD_DX_SET_SHADER_RESOURCES * command * @@ -2918,7 +2954,7 @@ static int vmw_cmd_set_uav(struct vmw_private *dev_priv, if (!has_sm5_context(dev_priv)) return -EINVAL; - if (num_uav > SVGA3D_MAX_UAVIEWS) { + if (num_uav > vmw_max_num_uavs(dev_priv)) { VMW_DEBUG_USER("Invalid UAV binding.\n"); return -EINVAL; } @@ -2950,7 +2986,7 @@ static int vmw_cmd_set_cs_uav(struct vmw_private *dev_priv, if (!has_sm5_context(dev_priv)) return -EINVAL; - if (num_uav > SVGA3D_MAX_UAVIEWS) { + if (num_uav > vmw_max_num_uavs(dev_priv)) { VMW_DEBUG_USER("Invalid UAV binding.\n"); return -EINVAL; } @@ -3528,6 +3564,24 @@ static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = { VMW_CMD_DEF(SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER, &vmw_cmd_dx_transfer_from_buffer, true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET, + &vmw_cmd_dx_set_constant_buffer_offset, + true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET, + &vmw_cmd_dx_set_constant_buffer_offset, + true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET, + &vmw_cmd_dx_set_constant_buffer_offset, + true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_SET_HS_CONSTANT_BUFFER_OFFSET, + &vmw_cmd_dx_set_constant_buffer_offset, + true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_SET_DS_CONSTANT_BUFFER_OFFSET, + &vmw_cmd_dx_set_constant_buffer_offset, + true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_SET_CS_CONSTANT_BUFFER_OFFSET, + &vmw_cmd_dx_set_constant_buffer_offset, + true, false, true), VMW_CMD_DEF(SVGA_3D_CMD_INTRA_SURFACE_COPY, &vmw_cmd_intra_surface_copy, true, false, true), @@ -3561,6 +3615,8 @@ static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = { &vmw_cmd_dx_define_streamoutput, true, false, true), VMW_CMD_DEF(SVGA_3D_CMD_DX_BIND_STREAMOUTPUT, &vmw_cmd_dx_bind_streamoutput, true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE_V2, + &vmw_cmd_dx_so_define, true, false, true), }; bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd) @@ -3869,8 +3925,7 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, fence_rep.fd = -1; } - ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle, - TTM_REF_USAGE); + ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle); VMW_DEBUG_USER("Fence copy error. Syncing.\n"); (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); @@ -4054,8 +4109,6 @@ int vmw_execbuf_process(struct drm_file *file_priv, struct sync_file *sync_file = NULL; DECLARE_VAL_CONTEXT(val_ctx, &sw_context->res_ht, 1); - vmw_validation_set_val_mem(&val_ctx, &dev_priv->vvm); - if (flags & DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { @@ -4101,6 +4154,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, sw_context->kernel = true; } + sw_context->filp = file_priv; sw_context->fp = vmw_fpriv(file_priv); INIT_LIST_HEAD(&sw_context->ctx_list); sw_context->cur_query_bo = dev_priv->pinned_bo; @@ -4117,7 +4171,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, vmw_binding_state_reset(sw_context->staged_bindings); if (!sw_context->res_ht_initialized) { - ret = drm_ht_create(&sw_context->res_ht, VMW_RES_HT_ORDER); + ret = vmwgfx_ht_create(&sw_context->res_ht, VMW_RES_HT_ORDER); if (unlikely(ret != 0)) goto out_unlock; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index d18c6a56e3dc..8ee34576c7d0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -394,22 +394,15 @@ static int vmw_fb_create_bo(struct vmw_private *vmw_priv, struct vmw_buffer_object *vmw_bo; int ret; - vmw_bo = kmalloc(sizeof(*vmw_bo), GFP_KERNEL); - if (!vmw_bo) { - ret = -ENOMEM; - goto err_unlock; - } - - ret = vmw_bo_init(vmw_priv, vmw_bo, size, + ret = vmw_bo_create(vmw_priv, size, &vmw_sys_placement, false, false, - &vmw_bo_bo_free); + &vmw_bo_bo_free, &vmw_bo); if (unlikely(ret != 0)) - goto err_unlock; /* init frees the buffer on failure */ + return ret; *out = vmw_bo; -err_unlock: return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 9fe12329a4d5..c60d395f9e2e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -37,9 +37,6 @@ struct vmw_fence_manager { spinlock_t lock; struct list_head fence_list; struct work_struct work; - u32 user_fence_size; - u32 fence_size; - u32 event_fence_action_size; bool fifo_down; struct list_head cleanup_list; uint32_t pending_actions[VMW_ACTION_MAX]; @@ -304,11 +301,6 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv) INIT_LIST_HEAD(&fman->cleanup_list); INIT_WORK(&fman->work, &vmw_fence_work_func); fman->fifo_down = true; - fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence)) + - TTM_OBJ_EXTRA_SIZE; - fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj)); - fman->event_fence_action_size = - ttm_round_pot(sizeof(struct vmw_event_fence_action)); mutex_init(&fman->goal_irq_mutex); fman->ctx = dma_fence_context_alloc(1); @@ -560,14 +552,8 @@ static void vmw_user_fence_destroy(struct vmw_fence_obj *fence) { struct vmw_user_fence *ufence = container_of(fence, struct vmw_user_fence, fence); - struct vmw_fence_manager *fman = fman_from_fence(fence); ttm_base_object_kfree(ufence, base); - /* - * Free kernel space accounting. - */ - ttm_mem_global_free(vmw_mem_glob(fman->dev_priv), - fman->user_fence_size); } static void vmw_user_fence_base_release(struct ttm_base_object **p_base) @@ -590,23 +576,8 @@ int vmw_user_fence_create(struct drm_file *file_priv, struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; struct vmw_user_fence *ufence; struct vmw_fence_obj *tmp; - struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv); - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; int ret; - /* - * Kernel memory space accounting, since this object may - * be created by a user-space request. - */ - - ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size, - &ctx); - if (unlikely(ret != 0)) - return ret; - ufence = kzalloc(sizeof(*ufence), GFP_KERNEL); if (unlikely(!ufence)) { ret = -ENOMEM; @@ -625,9 +596,10 @@ int vmw_user_fence_create(struct drm_file *file_priv, * vmw_user_fence_base_release. */ tmp = vmw_fence_obj_reference(&ufence->fence); + ret = ttm_base_object_init(tfile, &ufence->base, false, VMW_RES_FENCE, - &vmw_user_fence_base_release, NULL); + &vmw_user_fence_base_release); if (unlikely(ret != 0)) { @@ -646,7 +618,6 @@ out_err: tmp = &ufence->fence; vmw_fence_obj_unreference(&tmp); out_no_object: - ttm_mem_global_free(mem_glob, fman->user_fence_size); return ret; } @@ -831,8 +802,7 @@ out: */ if (ret == 0 && (arg->wait_options & DRM_VMW_WAIT_OPTION_UNREF)) - return ttm_ref_object_base_unref(tfile, arg->handle, - TTM_REF_USAGE); + return ttm_ref_object_base_unref(tfile, arg->handle); return ret; } @@ -874,8 +844,7 @@ int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data, (struct drm_vmw_fence_arg *) data; return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile, - arg->handle, - TTM_REF_USAGE); + arg->handle); } /** @@ -1121,7 +1090,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, if (user_fence_rep != NULL) { ret = ttm_ref_object_add(vmw_fp->tfile, base, - TTM_REF_USAGE, NULL, false); + NULL, false); if (unlikely(ret != 0)) { DRM_ERROR("Failed to reference a fence " "object.\n"); @@ -1164,7 +1133,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, return 0; out_no_create: if (user_fence_rep != NULL) - ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE); + ttm_ref_object_base_unref(tfile, handle); out_no_ref_obj: vmw_fence_obj_unreference(&fence); return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c new file mode 100644 index 000000000000..ce609e7d758f --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2021 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "vmwgfx_drv.h" + +#include "drm/drm_prime.h" +#include "drm/drm_gem_ttm_helper.h" + +/** + * vmw_buffer_object - Convert a struct ttm_buffer_object to a struct + * vmw_buffer_object. + * + * @bo: Pointer to the TTM buffer object. + * Return: Pointer to the struct vmw_buffer_object embedding the + * TTM buffer object. + */ +static struct vmw_buffer_object * +vmw_buffer_object(struct ttm_buffer_object *bo) +{ + return container_of(bo, struct vmw_buffer_object, base); +} + +static void vmw_gem_object_free(struct drm_gem_object *gobj) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gobj); + if (bo) { + ttm_bo_put(bo); + } +} + +static int vmw_gem_object_open(struct drm_gem_object *obj, + struct drm_file *file_priv) +{ + return 0; +} + +static void vmw_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) +{ +} + +static int vmw_gem_pin_private(struct drm_gem_object *obj, bool do_pin) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + struct vmw_buffer_object *vbo = vmw_buffer_object(bo); + int ret; + + ret = ttm_bo_reserve(bo, false, false, NULL); + if (unlikely(ret != 0)) + goto err; + + vmw_bo_pin_reserved(vbo, do_pin); + + ttm_bo_unreserve(bo); + +err: + return ret; +} + + +static int vmw_gem_object_pin(struct drm_gem_object *obj) +{ + return vmw_gem_pin_private(obj, true); +} + +static void vmw_gem_object_unpin(struct drm_gem_object *obj) +{ + vmw_gem_pin_private(obj, false); +} + +static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + struct vmw_ttm_tt *vmw_tt = + container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm); + + if (vmw_tt->vsgt.sgt) + return vmw_tt->vsgt.sgt; + + return drm_prime_pages_to_sg(obj->dev, vmw_tt->dma_ttm.pages, vmw_tt->dma_ttm.num_pages); +} + + +static const struct drm_gem_object_funcs vmw_gem_object_funcs = { + .free = vmw_gem_object_free, + .open = vmw_gem_object_open, + .close = vmw_gem_object_close, + .print_info = drm_gem_ttm_print_info, + .pin = vmw_gem_object_pin, + .unpin = vmw_gem_object_unpin, + .get_sg_table = vmw_gem_object_get_sg_table, + .vmap = drm_gem_ttm_vmap, + .vunmap = drm_gem_ttm_vunmap, + .mmap = drm_gem_ttm_mmap, +}; + +/** + * vmw_gem_destroy - vmw buffer object destructor + * + * @bo: Pointer to the embedded struct ttm_buffer_object + */ +void vmw_gem_destroy(struct ttm_buffer_object *bo) +{ + struct vmw_buffer_object *vbo = vmw_buffer_object(bo); + + WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); + vmw_bo_unmap(vbo); + drm_gem_object_release(&vbo->base.base); + kfree(vbo); +} + +int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, + struct drm_file *filp, + uint32_t size, + uint32_t *handle, + struct vmw_buffer_object **p_vbo) +{ + int ret; + + ret = vmw_bo_create(dev_priv, size, + (dev_priv->has_mob) ? + &vmw_sys_placement : + &vmw_vram_sys_placement, + true, false, &vmw_gem_destroy, p_vbo); + + (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs; + if (ret != 0) + goto out_no_bo; + + ret = drm_gem_handle_create(filp, &(*p_vbo)->base.base, handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&(*p_vbo)->base.base); +out_no_bo: + return ret; +} + + +int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + union drm_vmw_alloc_dmabuf_arg *arg = + (union drm_vmw_alloc_dmabuf_arg *)data; + struct drm_vmw_alloc_dmabuf_req *req = &arg->req; + struct drm_vmw_dmabuf_rep *rep = &arg->rep; + struct vmw_buffer_object *vbo; + uint32_t handle; + int ret; + + ret = vmw_gem_object_create_with_handle(dev_priv, filp, + req->size, &handle, &vbo); + if (ret) + goto out_no_bo; + + rep->handle = handle; + rep->map_handle = drm_vma_node_offset_addr(&vbo->base.base.vma_node); + rep->cur_gmr_id = handle; + rep->cur_gmr_offset = 0; +out_no_bo: + return ret; +} + +#if defined(CONFIG_DEBUG_FS) + +static void vmw_bo_print_info(int id, struct vmw_buffer_object *bo, struct seq_file *m) +{ + const char *placement; + const char *type; + + switch (bo->base.resource->mem_type) { + case TTM_PL_SYSTEM: + placement = " CPU"; + break; + case VMW_PL_GMR: + placement = " GMR"; + break; + case VMW_PL_MOB: + placement = " MOB"; + break; + case VMW_PL_SYSTEM: + placement = "VCPU"; + break; + case TTM_PL_VRAM: + placement = "VRAM"; + break; + default: + placement = "None"; + break; + } + + switch (bo->base.type) { + case ttm_bo_type_device: + type = "device"; + break; + case ttm_bo_type_kernel: + type = "kernel"; + break; + case ttm_bo_type_sg: + type = "sg "; + break; + default: + type = "none "; + break; + } + + seq_printf(m, "\t\t0x%08x: %12zu bytes %s, type = %s", + id, bo->base.base.size, placement, type); + seq_printf(m, ", priority = %u, pin_count = %u, GEM refs = %d, TTM refs = %d", + bo->base.priority, + bo->base.pin_count, + kref_read(&bo->base.base.refcount), + kref_read(&bo->base.kref)); + seq_puts(m, "\n"); +} + +static int vmw_debugfs_gem_info_show(struct seq_file *m, void *unused) +{ + struct vmw_private *vdev = (struct vmw_private *)m->private; + struct drm_device *dev = &vdev->drm; + struct drm_file *file; + int r; + + r = mutex_lock_interruptible(&dev->filelist_mutex); + if (r) + return r; + + list_for_each_entry(file, &dev->filelist, lhead) { + struct task_struct *task; + struct drm_gem_object *gobj; + int id; + + /* + * Although we have a valid reference on file->pid, that does + * not guarantee that the task_struct who called get_pid() is + * still alive (e.g. get_pid(current) => fork() => exit()). + * Therefore, we need to protect this ->comm access using RCU. + */ + rcu_read_lock(); + task = pid_task(file->pid, PIDTYPE_PID); + seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid), + task ? task->comm : "<unknown>"); + rcu_read_unlock(); + + spin_lock(&file->table_lock); + idr_for_each_entry(&file->object_idr, gobj, id) { + struct vmw_buffer_object *bo = gem_to_vmw_bo(gobj); + + vmw_bo_print_info(id, bo, m); + } + spin_unlock(&file->table_lock); + } + + mutex_unlock(&dev->filelist_mutex); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(vmw_debugfs_gem_info); + +#endif + +void vmw_debugfs_gem_init(struct vmw_private *vdev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = vdev->drm.primary; + struct dentry *root = minor->debugfs_root; + + debugfs_create_file("vmwgfx_gem_info", 0444, root, vdev, + &vmw_debugfs_gem_info_fops); +#endif +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index b2c4af331c9d..ebb4505a31a3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -42,6 +42,7 @@ struct vmwgfx_gmrid_man { uint32_t max_gmr_ids; uint32_t max_gmr_pages; uint32_t used_gmr_pages; + uint8_t type; }; static struct vmwgfx_gmrid_man *to_gmrid_manager(struct ttm_resource_manager *man) @@ -132,6 +133,18 @@ static void vmw_gmrid_man_put_node(struct ttm_resource_manager *man, kfree(res); } +static void vmw_gmrid_man_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct vmwgfx_gmrid_man *gman = to_gmrid_manager(man); + + BUG_ON(gman->type != VMW_PL_GMR && gman->type != VMW_PL_MOB); + + drm_printf(printer, "%s's used: %u pages, max: %u pages, %u id's\n", + (gman->type == VMW_PL_MOB) ? "Mob" : "GMR", + gman->used_gmr_pages, gman->max_gmr_pages, gman->max_gmr_ids); +} + static const struct ttm_resource_manager_func vmw_gmrid_manager_func; int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type) @@ -146,12 +159,12 @@ int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type) man = &gman->manager; man->func = &vmw_gmrid_manager_func; - /* TODO: This is most likely not correct */ man->use_tt = true; ttm_resource_manager_init(man, 0); spin_lock_init(&gman->lock); gman->used_gmr_pages = 0; ida_init(&gman->gmr_ida); + gman->type = type; switch (type) { case VMW_PL_GMR: @@ -190,4 +203,5 @@ void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type) static const struct ttm_resource_manager_func vmw_gmrid_manager_func = { .alloc = vmw_gmrid_man_get_node, .free = vmw_gmrid_man_put_node, + .debug = vmw_gmrid_man_debug }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c new file mode 100644 index 000000000000..06aebc12774e --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c @@ -0,0 +1,199 @@ +/* + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Simple open hash tab implementation. + * + * Authors: + * Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + +#include <linux/export.h> +#include <linux/hash.h> +#include <linux/mm.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <drm/drm_print.h> + +#include "vmwgfx_hashtab.h" + +int vmwgfx_ht_create(struct vmwgfx_open_hash *ht, unsigned int order) +{ + unsigned int size = 1 << order; + + ht->order = order; + ht->table = NULL; + if (size <= PAGE_SIZE / sizeof(*ht->table)) + ht->table = kcalloc(size, sizeof(*ht->table), GFP_KERNEL); + else + ht->table = vzalloc(array_size(size, sizeof(*ht->table))); + if (!ht->table) { + DRM_ERROR("Out of memory for hash table\n"); + return -ENOMEM; + } + return 0; +} + +void vmwgfx_ht_verbose_list(struct vmwgfx_open_hash *ht, unsigned long key) +{ + struct vmwgfx_hash_item *entry; + struct hlist_head *h_list; + unsigned int hashed_key; + int count = 0; + + hashed_key = hash_long(key, ht->order); + DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); + h_list = &ht->table[hashed_key]; + hlist_for_each_entry(entry, h_list, head) + DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); +} + +static struct hlist_node *vmwgfx_ht_find_key(struct vmwgfx_open_hash *ht, unsigned long key) +{ + struct vmwgfx_hash_item *entry; + struct hlist_head *h_list; + unsigned int hashed_key; + + hashed_key = hash_long(key, ht->order); + h_list = &ht->table[hashed_key]; + hlist_for_each_entry(entry, h_list, head) { + if (entry->key == key) + return &entry->head; + if (entry->key > key) + break; + } + return NULL; +} + +static struct hlist_node *vmwgfx_ht_find_key_rcu(struct vmwgfx_open_hash *ht, unsigned long key) +{ + struct vmwgfx_hash_item *entry; + struct hlist_head *h_list; + unsigned int hashed_key; + + hashed_key = hash_long(key, ht->order); + h_list = &ht->table[hashed_key]; + hlist_for_each_entry_rcu(entry, h_list, head) { + if (entry->key == key) + return &entry->head; + if (entry->key > key) + break; + } + return NULL; +} + +int vmwgfx_ht_insert_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item) +{ + struct vmwgfx_hash_item *entry; + struct hlist_head *h_list; + struct hlist_node *parent; + unsigned int hashed_key; + unsigned long key = item->key; + + hashed_key = hash_long(key, ht->order); + h_list = &ht->table[hashed_key]; + parent = NULL; + hlist_for_each_entry(entry, h_list, head) { + if (entry->key == key) + return -EINVAL; + if (entry->key > key) + break; + parent = &entry->head; + } + if (parent) + hlist_add_behind_rcu(&item->head, parent); + else + hlist_add_head_rcu(&item->head, h_list); + return 0; +} + +/* + * Just insert an item and return any "bits" bit key that hasn't been + * used before. + */ +int vmwgfx_ht_just_insert_please(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item, + unsigned long seed, int bits, int shift, + unsigned long add) +{ + int ret; + unsigned long mask = (1UL << bits) - 1; + unsigned long first, unshifted_key; + + unshifted_key = hash_long(seed, bits); + first = unshifted_key; + do { + item->key = (unshifted_key << shift) + add; + ret = vmwgfx_ht_insert_item(ht, item); + if (ret) + unshifted_key = (unshifted_key + 1) & mask; + } while (ret && (unshifted_key != first)); + + if (ret) { + DRM_ERROR("Available key bit space exhausted\n"); + return -EINVAL; + } + return 0; +} + +int vmwgfx_ht_find_item(struct vmwgfx_open_hash *ht, unsigned long key, + struct vmwgfx_hash_item **item) +{ + struct hlist_node *list; + + list = vmwgfx_ht_find_key_rcu(ht, key); + if (!list) + return -EINVAL; + + *item = hlist_entry(list, struct vmwgfx_hash_item, head); + return 0; +} + +int vmwgfx_ht_remove_key(struct vmwgfx_open_hash *ht, unsigned long key) +{ + struct hlist_node *list; + + list = vmwgfx_ht_find_key(ht, key); + if (list) { + hlist_del_init_rcu(list); + return 0; + } + return -EINVAL; +} + +int vmwgfx_ht_remove_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item) +{ + hlist_del_init_rcu(&item->head); + return 0; +} + +void vmwgfx_ht_remove(struct vmwgfx_open_hash *ht) +{ + if (ht->table) { + kvfree(ht->table); + ht->table = NULL; + } +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h new file mode 100644 index 000000000000..a9ce12922e21 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h @@ -0,0 +1,83 @@ +/* + * Copyright 2006 Tungsten Graphics, Inc., Bismack, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Simple open hash tab implementation. + * + * Authors: + * Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + +/* + * TODO: Replace this hashtable with Linux' generic implementation + * from <linux/hashtable.h>. + */ + +#ifndef VMWGFX_HASHTAB_H +#define VMWGFX_HASHTAB_H + +#include <linux/list.h> + +#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) + +struct vmwgfx_hash_item { + struct hlist_node head; + unsigned long key; +}; + +struct vmwgfx_open_hash { + struct hlist_head *table; + u8 order; +}; + +int vmwgfx_ht_create(struct vmwgfx_open_hash *ht, unsigned int order); +int vmwgfx_ht_insert_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item); +int vmwgfx_ht_just_insert_please(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item, + unsigned long seed, int bits, int shift, + unsigned long add); +int vmwgfx_ht_find_item(struct vmwgfx_open_hash *ht, unsigned long key, + struct vmwgfx_hash_item **item); + +void vmwgfx_ht_verbose_list(struct vmwgfx_open_hash *ht, unsigned long key); +int vmwgfx_ht_remove_key(struct vmwgfx_open_hash *ht, unsigned long key); +int vmwgfx_ht_remove_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item); +void vmwgfx_ht_remove(struct vmwgfx_open_hash *ht); + +/* + * RCU-safe interface + * + * The user of this API needs to make sure that two or more instances of the + * hash table manipulation functions are never run simultaneously. + * The lookup function vmwgfx_ht_find_item_rcu may, however, run simultaneously + * with any of the manipulation functions as long as it's called from within + * an RCU read-locked section. + */ +#define vmwgfx_ht_insert_item_rcu vmwgfx_ht_insert_item +#define vmwgfx_ht_just_insert_please_rcu vmwgfx_ht_just_insert_please +#define vmwgfx_ht_remove_key_rcu vmwgfx_ht_remove_key +#define vmwgfx_ht_remove_item_rcu vmwgfx_ht_remove_item +#define vmwgfx_ht_find_item_rcu vmwgfx_ht_find_item + +#endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 28af34ab6ed6..471da2b4c177 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -105,6 +105,9 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, case DRM_VMW_PARAM_SM5: param->value = has_sm5_context(dev_priv); break; + case DRM_VMW_PARAM_GL43: + param->value = has_gl43_context(dev_priv); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 74fa41909213..4e693e8de2c3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -843,8 +843,6 @@ static void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer) drm_framebuffer_cleanup(framebuffer); vmw_surface_unreference(&vfbs->surface); - if (vfbs->base.user_obj) - ttm_base_object_unref(&vfbs->base.user_obj); kfree(vfbs); } @@ -989,6 +987,16 @@ out_err1: * Buffer-object framebuffer code */ +static int vmw_framebuffer_bo_create_handle(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) +{ + struct vmw_framebuffer_bo *vfbd = + vmw_framebuffer_to_vfbd(fb); + + return drm_gem_handle_create(file_priv, &vfbd->buffer->base.base, handle); +} + static void vmw_framebuffer_bo_destroy(struct drm_framebuffer *framebuffer) { struct vmw_framebuffer_bo *vfbd = @@ -996,8 +1004,6 @@ static void vmw_framebuffer_bo_destroy(struct drm_framebuffer *framebuffer) drm_framebuffer_cleanup(framebuffer); vmw_bo_unreference(&vfbd->buffer); - if (vfbd->base.user_obj) - ttm_base_object_unref(&vfbd->base.user_obj); kfree(vfbd); } @@ -1063,6 +1069,7 @@ static int vmw_framebuffer_bo_dirty_ext(struct drm_framebuffer *framebuffer, } static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { + .create_handle = vmw_framebuffer_bo_create_handle, .destroy = vmw_framebuffer_bo_destroy, .dirty = vmw_framebuffer_bo_dirty_ext, }; @@ -1188,7 +1195,7 @@ static int vmw_create_bo_proxy(struct drm_device *dev, metadata.base_size.depth = 1; metadata.scanout = true; - ret = vmw_gb_surface_define(vmw_priv(dev), 0, &metadata, srf_out); + ret = vmw_gb_surface_define(vmw_priv(dev), &metadata, srf_out); if (ret) { DRM_ERROR("Failed to allocate proxy content buffer\n"); return ret; @@ -1251,6 +1258,7 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, goto out_err1; } + vfbd->base.base.obj[0] = &bo->base.base; drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, mode_cmd); vfbd->base.bo = true; vfbd->buffer = vmw_bo_reference(bo); @@ -1368,34 +1376,13 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_private *dev_priv = vmw_priv(dev); - struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; struct vmw_framebuffer *vfb = NULL; struct vmw_surface *surface = NULL; struct vmw_buffer_object *bo = NULL; - struct ttm_base_object *user_obj; int ret; - /* - * Take a reference on the user object of the resource - * backing the kms fb. This ensures that user-space handle - * lookups on that resource will always work as long as - * it's registered with a kms framebuffer. This is important, - * since vmw_execbuf_process identifies resources in the - * command stream using user-space handles. - */ - - user_obj = ttm_base_object_lookup(tfile, mode_cmd->handles[0]); - if (unlikely(user_obj == NULL)) { - DRM_ERROR("Could not locate requested kms frame buffer.\n"); - return ERR_PTR(-ENOENT); - } - - /** - * End conditioned code. - */ - /* returns either a bo or surface */ - ret = vmw_user_lookup_handle(dev_priv, tfile, + ret = vmw_user_lookup_handle(dev_priv, file_priv, mode_cmd->handles[0], &surface, &bo); if (ret) @@ -1428,10 +1415,8 @@ err_out: if (ret) { DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret); - ttm_base_object_unref(&user_obj); return ERR_PTR(ret); - } else - vfb->user_obj = user_obj; + } return &vfb->base; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index bbc809f7bd8a..4d36e8507380 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -219,7 +219,6 @@ struct vmw_framebuffer { int (*pin)(struct vmw_framebuffer *fb); int (*unpin)(struct vmw_framebuffer *fb); bool bo; - struct ttm_base_object *user_obj; uint32_t user_handle; }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index f9394207dd3c..0a8cc28d6606 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2012-2015 VMware, Inc., Palo Alto, CA., USA + * Copyright 2012-2021 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -29,12 +29,6 @@ #include "vmwgfx_drv.h" -/* - * If we set up the screen target otable, screen objects stop working. - */ - -#define VMW_OTABLE_SETUP_SUB ((VMWGFX_ENABLE_SCREEN_TARGET_OTABLE ? 0 : 1)) - #ifdef CONFIG_64BIT #define VMW_PPN_SIZE 8 #define VMW_MOBFMT_PTDEPTH_0 SVGA3D_MOBFMT_PT64_0 @@ -75,7 +69,7 @@ static const struct vmw_otable pre_dx_tables[] = { {VMWGFX_NUM_GB_CONTEXT * sizeof(SVGAOTableContextEntry), NULL, true}, {VMWGFX_NUM_GB_SHADER * sizeof(SVGAOTableShaderEntry), NULL, true}, {VMWGFX_NUM_GB_SCREEN_TARGET * sizeof(SVGAOTableScreenTargetEntry), - NULL, VMWGFX_ENABLE_SCREEN_TARGET_OTABLE} + NULL, true} }; static const struct vmw_otable dx_tables[] = { @@ -84,7 +78,7 @@ static const struct vmw_otable dx_tables[] = { {VMWGFX_NUM_GB_CONTEXT * sizeof(SVGAOTableContextEntry), NULL, true}, {VMWGFX_NUM_GB_SHADER * sizeof(SVGAOTableShaderEntry), NULL, true}, {VMWGFX_NUM_GB_SCREEN_TARGET * sizeof(SVGAOTableScreenTargetEntry), - NULL, VMWGFX_ENABLE_SCREEN_TARGET_OTABLE}, + NULL, true}, {VMWGFX_NUM_DXCONTEXT * sizeof(SVGAOTableDXContextEntry), NULL, true}, }; @@ -146,9 +140,6 @@ static int vmw_setup_otable_base(struct vmw_private *dev_priv, if (otable->size <= PAGE_SIZE) { mob->pt_level = VMW_MOBFMT_PTDEPTH_0; mob->pt_root_page = vmw_piter_dma_addr(&iter); - } else if (vsgt->num_regions == 1) { - mob->pt_level = SVGA3D_MOBFMT_RANGE; - mob->pt_root_page = vmw_piter_dma_addr(&iter); } else { ret = vmw_mob_pt_populate(dev_priv, mob); if (unlikely(ret != 0)) @@ -413,10 +404,9 @@ struct vmw_mob *vmw_mob_create(unsigned long data_pages) * @mob: Pointer to the mob the pagetable of which we want to * populate. * - * This function allocates memory to be used for the pagetable, and - * adjusts TTM memory accounting accordingly. Returns ENOMEM if - * memory resources aren't sufficient and may cause TTM buffer objects - * to be swapped out by using the TTM memory accounting function. + * This function allocates memory to be used for the pagetable. + * Returns ENOMEM if memory resources aren't sufficient and may + * cause TTM buffer objects to be swapped out. */ static int vmw_mob_pt_populate(struct vmw_private *dev_priv, struct vmw_mob *mob) @@ -624,9 +614,6 @@ int vmw_mob_bind(struct vmw_private *dev_priv, if (likely(num_data_pages == 1)) { mob->pt_level = VMW_MOBFMT_PTDEPTH_0; mob->pt_root_page = vmw_piter_dma_addr(&data_iter); - } else if (vsgt->num_regions == 1) { - mob->pt_level = SVGA3D_MOBFMT_RANGE; - mob->pt_root_page = vmw_piter_dma_addr(&data_iter); } else if (unlikely(mob->pt_bo == NULL)) { ret = vmw_mob_pt_populate(dev_priv, mob); if (unlikely(ret != 0)) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index 54c5d16eb3b7..e9f5c89b4ca6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -451,7 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, goto out_unlock; } - ret = vmw_user_bo_lookup(tfile, arg->handle, &buf, NULL); + ret = vmw_user_bo_lookup(file_priv, arg->handle, &buf); if (ret) goto out_unlock; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 922317d1acc8..7bc99b1279f7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -57,7 +57,6 @@ enum vmw_bo_dirty_method { * @ref_count: Reference count for this structure * @bitmap_size: The size of the bitmap in bits. Typically equal to the * nuber of pages in the bo. - * @size: The accounting size for this struct. * @bitmap: A bitmap where each bit represents a page. A set bit means a * dirty page. */ @@ -68,7 +67,6 @@ struct vmw_bo_dirty { unsigned int change_count; unsigned int ref_count; unsigned long bitmap_size; - size_t size; unsigned long bitmap[]; }; @@ -233,12 +231,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo) { struct vmw_bo_dirty *dirty = vbo->dirty; pgoff_t num_pages = vbo->base.resource->num_pages; - size_t size, acc_size; + size_t size; int ret; - static struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; if (dirty) { dirty->ref_count++; @@ -246,20 +240,12 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo) } size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long); - acc_size = ttm_round_pot(size); - ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); - if (ret) { - VMW_DEBUG_USER("Out of graphics memory for buffer object " - "dirty tracker.\n"); - return ret; - } dirty = kvzalloc(size, GFP_KERNEL); if (!dirty) { ret = -ENOMEM; goto out_no_dirty; } - dirty->size = acc_size; dirty->bitmap_size = num_pages; dirty->start = dirty->bitmap_size; dirty->end = 0; @@ -285,7 +271,6 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo) return 0; out_no_dirty: - ttm_mem_global_free(&ttm_mem_glob, acc_size); return ret; } @@ -304,10 +289,7 @@ void vmw_bo_dirty_release(struct vmw_buffer_object *vbo) struct vmw_bo_dirty *dirty = vbo->dirty; if (dirty && --dirty->ref_count == 0) { - size_t acc_size = dirty->size; - kvfree(dirty); - ttm_mem_global_free(&ttm_mem_glob, acc_size); vbo->dirty = NULL; } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index d9552a1efd13..2d72a5ee7c0c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -85,6 +85,5 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, int *prime_fd) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 8d1e869cc196..708899ba2102 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -320,11 +320,12 @@ vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, * The pointer this pointed at by out_surf and out_buf needs to be null. */ int vmw_user_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, + struct drm_file *filp, uint32_t handle, struct vmw_surface **out_surf, struct vmw_buffer_object **out_buf) { + struct ttm_object_file *tfile = vmw_fpriv(filp)->tfile; struct vmw_resource *res; int ret; @@ -339,7 +340,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv, } *out_surf = NULL; - ret = vmw_user_bo_lookup(tfile, handle, out_buf, NULL); + ret = vmw_user_bo_lookup(filp, handle, out_buf); return ret; } @@ -362,14 +363,10 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res, return 0; } - backup = kzalloc(sizeof(*backup), GFP_KERNEL); - if (unlikely(!backup)) - return -ENOMEM; - - ret = vmw_bo_init(res->dev_priv, backup, res->backup_size, - res->func->backup_placement, - interruptible, false, - &vmw_bo_bo_free); + ret = vmw_bo_create(res->dev_priv, res->backup_size, + res->func->backup_placement, + interruptible, false, + &vmw_bo_bo_free, &backup); if (unlikely(ret != 0)) goto out_no_bo; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index bd157fb21b45..3004c7a719e9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -442,19 +442,15 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, vps->bo_size = 0; } - vps->bo = kzalloc(sizeof(*vps->bo), GFP_KERNEL); - if (!vps->bo) - return -ENOMEM; - vmw_svga_enable(dev_priv); /* After we have alloced the backing store might not be able to * resume the overlays, this is preferred to failing to alloc. */ vmw_overlay_pause_all(dev_priv); - ret = vmw_bo_init(dev_priv, vps->bo, size, - &vmw_vram_placement, - false, true, &vmw_bo_bo_free); + ret = vmw_bo_create(dev_priv, size, + &vmw_vram_placement, + false, true, &vmw_bo_bo_free, &vps->bo); vmw_overlay_resume_all(dev_priv); if (ret) { vps->bo = NULL; /* vmw_bo_init frees on error */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index b8dd62529104..108a496b5d18 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -53,10 +53,6 @@ struct vmw_dx_shader { struct list_head cotable_head; }; -static uint64_t vmw_user_shader_size; -static uint64_t vmw_shader_size; -static size_t vmw_shader_dx_size; - static void vmw_user_shader_free(struct vmw_resource *res); static struct vmw_resource * vmw_user_shader_base_to_res(struct ttm_base_object *base); @@ -79,7 +75,6 @@ static void vmw_dx_shader_commit_notify(struct vmw_resource *res, enum vmw_cmdbuf_res_state state); static bool vmw_shader_id_ok(u32 user_key, SVGA3dShaderType shader_type); static u32 vmw_shader_key(u32 user_key, SVGA3dShaderType shader_type); -static uint64_t vmw_user_shader_size; static const struct vmw_user_resource_conv user_shader_conv = { .object_type = VMW_RES_SHADER, @@ -563,16 +558,14 @@ void vmw_dx_shader_cotable_list_scrub(struct vmw_private *dev_priv, * * @res: The shader resource * - * Frees the DX shader resource and updates memory accounting. + * Frees the DX shader resource. */ static void vmw_dx_shader_res_free(struct vmw_resource *res) { - struct vmw_private *dev_priv = res->dev_priv; struct vmw_dx_shader *shader = vmw_res_to_dx_shader(res); vmw_resource_unreference(&shader->cotable); kfree(shader); - ttm_mem_global_free(vmw_mem_glob(dev_priv), vmw_shader_dx_size); } /** @@ -594,30 +587,13 @@ int vmw_dx_shader_add(struct vmw_cmdbuf_res_manager *man, struct vmw_dx_shader *shader; struct vmw_resource *res; struct vmw_private *dev_priv = ctx->dev_priv; - struct ttm_operation_ctx ttm_opt_ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; - if (!vmw_shader_dx_size) - vmw_shader_dx_size = ttm_round_pot(sizeof(*shader)); - if (!vmw_shader_id_ok(user_key, shader_type)) return -EINVAL; - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), vmw_shader_dx_size, - &ttm_opt_ctx); - if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for shader " - "creation.\n"); - return ret; - } - shader = kmalloc(sizeof(*shader), GFP_KERNEL); if (!shader) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), vmw_shader_dx_size); return -ENOMEM; } @@ -669,21 +645,15 @@ static void vmw_user_shader_free(struct vmw_resource *res) { struct vmw_user_shader *ushader = container_of(res, struct vmw_user_shader, shader.res); - struct vmw_private *dev_priv = res->dev_priv; ttm_base_object_kfree(ushader, base); - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_user_shader_size); } static void vmw_shader_free(struct vmw_resource *res) { struct vmw_shader *shader = vmw_res_to_shader(res); - struct vmw_private *dev_priv = res->dev_priv; kfree(shader); - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_shader_size); } /* @@ -706,8 +676,7 @@ int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data, struct drm_vmw_shader_arg *arg = (struct drm_vmw_shader_arg *)data; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - return ttm_ref_object_base_unref(tfile, arg->handle, - TTM_REF_USAGE); + return ttm_ref_object_base_unref(tfile, arg->handle); } static int vmw_user_shader_alloc(struct vmw_private *dev_priv, @@ -722,31 +691,10 @@ static int vmw_user_shader_alloc(struct vmw_private *dev_priv, { struct vmw_user_shader *ushader; struct vmw_resource *res, *tmp; - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; - if (unlikely(vmw_user_shader_size == 0)) - vmw_user_shader_size = - ttm_round_pot(sizeof(struct vmw_user_shader)) + - VMW_IDA_ACC_SIZE + TTM_OBJ_EXTRA_SIZE; - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - vmw_user_shader_size, - &ctx); - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for shader " - "creation.\n"); - goto out; - } - ushader = kzalloc(sizeof(*ushader), GFP_KERNEL); if (unlikely(!ushader)) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_user_shader_size); ret = -ENOMEM; goto out; } @@ -769,7 +717,7 @@ static int vmw_user_shader_alloc(struct vmw_private *dev_priv, tmp = vmw_resource_reference(res); ret = ttm_base_object_init(tfile, &ushader->base, false, VMW_RES_SHADER, - &vmw_user_shader_base_release, NULL); + &vmw_user_shader_base_release); if (unlikely(ret != 0)) { vmw_resource_unreference(&tmp); @@ -793,31 +741,10 @@ static struct vmw_resource *vmw_shader_alloc(struct vmw_private *dev_priv, { struct vmw_shader *shader; struct vmw_resource *res; - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; - if (unlikely(vmw_shader_size == 0)) - vmw_shader_size = - ttm_round_pot(sizeof(struct vmw_shader)) + - VMW_IDA_ACC_SIZE; - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - vmw_shader_size, - &ctx); - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for shader " - "creation.\n"); - goto out_err; - } - shader = kzalloc(sizeof(*shader), GFP_KERNEL); if (unlikely(!shader)) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_shader_size); ret = -ENOMEM; goto out_err; } @@ -849,8 +776,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv, int ret; if (buffer_handle != SVGA3D_INVALID_ID) { - ret = vmw_user_bo_lookup(tfile, buffer_handle, - &buffer, NULL); + ret = vmw_user_bo_lookup(file_priv, buffer_handle, &buffer); if (unlikely(ret != 0)) { VMW_DEBUG_USER("Couldn't find buffer for shader creation.\n"); return ret; @@ -966,13 +892,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, if (!vmw_shader_id_ok(user_key, shader_type)) return -EINVAL; - /* Allocate and pin a DMA buffer */ - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (unlikely(!buf)) - return -ENOMEM; - - ret = vmw_bo_init(dev_priv, buf, size, &vmw_sys_placement, - true, true, vmw_bo_bo_free); + ret = vmw_bo_create(dev_priv, size, &vmw_sys_placement, + true, true, vmw_bo_bo_free, &buf); if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c index 33b69a70cfe3..483ad544ea54 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_simple_resource.c @@ -32,12 +32,10 @@ * struct vmw_user_simple_resource - User-space simple resource struct * * @base: The TTM base object implementing user-space visibility. - * @account_size: How much memory was accounted for this object. * @simple: The embedded struct vmw_simple_resource. */ struct vmw_user_simple_resource { struct ttm_base_object base; - size_t account_size; struct vmw_simple_resource simple; /* * Nothing to be placed after @simple, since size of @simple is @@ -91,18 +89,15 @@ static int vmw_simple_resource_init(struct vmw_private *dev_priv, * * @res: The struct vmw_resource member of the simple resource object. * - * Frees memory and memory accounting for the object. + * Frees memory for the object. */ static void vmw_simple_resource_free(struct vmw_resource *res) { struct vmw_user_simple_resource *usimple = container_of(res, struct vmw_user_simple_resource, simple.res); - struct vmw_private *dev_priv = res->dev_priv; - size_t size = usimple->account_size; ttm_base_object_kfree(usimple, base); - ttm_mem_global_free(vmw_mem_glob(dev_priv), size); } /** @@ -149,39 +144,19 @@ vmw_simple_resource_create_ioctl(struct drm_device *dev, void *data, struct vmw_resource *res; struct vmw_resource *tmp; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; size_t alloc_size; - size_t account_size; int ret; alloc_size = offsetof(struct vmw_user_simple_resource, simple) + func->size; - account_size = ttm_round_pot(alloc_size) + VMW_IDA_ACC_SIZE + - TTM_OBJ_EXTRA_SIZE; - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), account_size, - &ctx); - if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for %s" - " creation.\n", func->res_func.type_name); - - goto out_ret; - } usimple = kzalloc(alloc_size, GFP_KERNEL); if (!usimple) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - account_size); ret = -ENOMEM; goto out_ret; } usimple->simple.func = func; - usimple->account_size = account_size; res = &usimple->simple.res; usimple->base.shareable = false; usimple->base.tfile = NULL; @@ -197,7 +172,7 @@ vmw_simple_resource_create_ioctl(struct drm_device *dev, void *data, tmp = vmw_resource_reference(res); ret = ttm_base_object_init(tfile, &usimple->base, false, func->ttm_res_type, - &vmw_simple_resource_base_release, NULL); + &vmw_simple_resource_base_release); if (ret) { vmw_resource_unreference(&tmp); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c index 9efb4463ce99..4ea32b01efc0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c @@ -279,18 +279,15 @@ static bool vmw_view_id_ok(u32 user_key, enum vmw_view_type view_type) * * @res: Pointer to a struct vmw_resource * - * Frees memory and memory accounting held by a struct vmw_view. + * Frees memory held by the struct vmw_view. */ static void vmw_view_res_free(struct vmw_resource *res) { struct vmw_view *view = vmw_view(res); - size_t size = offsetof(struct vmw_view, cmd) + view->cmd_size; - struct vmw_private *dev_priv = res->dev_priv; vmw_resource_unreference(&view->cotable); vmw_resource_unreference(&view->srf); kfree_rcu(view, rcu); - ttm_mem_global_free(vmw_mem_glob(dev_priv), size); } /** @@ -327,10 +324,6 @@ int vmw_view_add(struct vmw_cmdbuf_res_manager *man, struct vmw_private *dev_priv = ctx->dev_priv; struct vmw_resource *res; struct vmw_view *view; - struct ttm_operation_ctx ttm_opt_ctx = { - .interruptible = true, - .no_wait_gpu = false - }; size_t size; int ret; @@ -347,16 +340,8 @@ int vmw_view_add(struct vmw_cmdbuf_res_manager *man, size = offsetof(struct vmw_view, cmd) + cmd_size; - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), size, &ttm_opt_ctx); - if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for view creation\n"); - return ret; - } - view = kmalloc(size, GFP_KERNEL); if (!view) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), size); return -ENOMEM; } @@ -582,4 +567,8 @@ static void vmw_so_build_asserts(void) offsetof(SVGA3dCmdDXDefineRenderTargetView, sid)); BUILD_BUG_ON(offsetof(struct vmw_view_define, sid) != offsetof(SVGA3dCmdDXDefineDepthStencilView, sid)); + BUILD_BUG_ON(offsetof(struct vmw_view_define, sid) != + offsetof(SVGA3dCmdDXDefineUAView, sid)); + BUILD_BUG_ON(offsetof(struct vmw_view_define, sid) != + offsetof(SVGA3dCmdDXDefineDepthStencilView_v2, sid)); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_so.h b/drivers/gpu/drm/vmwgfx/vmwgfx_so.h index f48b84bfeeac..01c701e7466e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_so.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_so.h @@ -93,7 +93,10 @@ static inline enum vmw_view_type vmw_view_cmd_to_type(u32 id) id == SVGA_3D_CMD_DX_DESTROY_UA_VIEW) return vmw_view_ua; - if (tmp > (u32)vmw_view_max) + if (id == SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW_V2) + return vmw_view_ds; + + if (tmp > (u32)vmw_view_ds) return vmw_view_max; return (enum vmw_view_type) tmp; @@ -123,6 +126,7 @@ static inline enum vmw_so_type vmw_so_cmd_to_type(u32 id) case SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE: return vmw_so_ds; case SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE: + case SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE_V2: case SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE: return vmw_so_rs; case SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index d85310b2608d..ae9a6044d448 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1123,7 +1123,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, } if (!vps->surf) { - ret = vmw_gb_surface_define(dev_priv, 0, &metadata, + ret = vmw_gb_surface_define(dev_priv, &metadata, &vps->surf); if (ret != 0) { DRM_ERROR("Couldn't allocate STDU surface.\n"); @@ -1872,8 +1872,8 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv) int i, ret; - /* Do nothing if Screen Target support is turned off */ - if (!VMWGFX_ENABLE_SCREEN_TARGET_OTABLE || !dev_priv->has_mob) + /* Do nothing if there's no support for MOBs */ + if (!dev_priv->has_mob) return -ENOSYS; if (!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c b/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c index c8efa4a6c995..2de97419d5c9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_streamoutput.c @@ -60,8 +60,6 @@ static int vmw_dx_streamoutput_unbind(struct vmw_resource *res, bool readback, static void vmw_dx_streamoutput_commit_notify(struct vmw_resource *res, enum vmw_cmdbuf_res_state state); -static size_t vmw_streamoutput_size; - static const struct vmw_res_func vmw_dx_streamoutput_func = { .res_type = vmw_res_streamoutput, .needs_backup = true, @@ -254,12 +252,10 @@ vmw_dx_streamoutput_lookup(struct vmw_cmdbuf_res_manager *man, static void vmw_dx_streamoutput_res_free(struct vmw_resource *res) { - struct vmw_private *dev_priv = res->dev_priv; struct vmw_dx_streamoutput *so = vmw_res_to_dx_streamoutput(res); vmw_resource_unreference(&so->cotable); kfree(so); - ttm_mem_global_free(vmw_mem_glob(dev_priv), vmw_streamoutput_size); } static void vmw_dx_streamoutput_hw_destroy(struct vmw_resource *res) @@ -284,27 +280,10 @@ int vmw_dx_streamoutput_add(struct vmw_cmdbuf_res_manager *man, struct vmw_dx_streamoutput *so; struct vmw_resource *res; struct vmw_private *dev_priv = ctx->dev_priv; - struct ttm_operation_ctx ttm_opt_ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; - if (!vmw_streamoutput_size) - vmw_streamoutput_size = ttm_round_pot(sizeof(*so)); - - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - vmw_streamoutput_size, &ttm_opt_ctx); - if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for streamout.\n"); - return ret; - } - so = kmalloc(sizeof(*so), GFP_KERNEL); if (!so) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_streamoutput_size); return -ENOMEM; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5d53a5f9d123..00e8e27e4884 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -45,16 +45,12 @@ * @prime: The TTM prime object. * @base: The TTM base object handling user-space visibility. * @srf: The surface metadata. - * @size: TTM accounting size for the surface. * @master: Master of the creating client. Used for security check. - * @backup_base: The TTM base object of the backup buffer. */ struct vmw_user_surface { struct ttm_prime_object prime; struct vmw_surface srf; - uint32_t size; struct drm_master *master; - struct ttm_base_object *backup_base; }; /** @@ -74,13 +70,11 @@ struct vmw_surface_offset { /** * struct vmw_surface_dirty - Surface dirty-tracker * @cache: Cached layout information of the surface. - * @size: Accounting size for the struct vmw_surface_dirty. * @num_subres: Number of subresources. * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource. */ struct vmw_surface_dirty { struct vmw_surface_cache cache; - size_t size; u32 num_subres; SVGA3dBox boxes[]; }; @@ -129,9 +123,6 @@ static const struct vmw_user_resource_conv user_surface_conv = { const struct vmw_user_resource_conv *user_surface_converter = &user_surface_conv; - -static uint64_t vmw_user_surface_size; - static const struct vmw_res_func vmw_legacy_surface_func = { .res_type = vmw_res_surface, .needs_backup = false, @@ -359,7 +350,7 @@ static void vmw_surface_dma_encode(struct vmw_surface *srf, * vmw_surface. * * Destroys a the device surface associated with a struct vmw_surface if - * any, and adjusts accounting and resource count accordingly. + * any, and adjusts resource count accordingly. */ static void vmw_hw_surface_destroy(struct vmw_resource *res) { @@ -666,8 +657,6 @@ static void vmw_user_surface_free(struct vmw_resource *res) struct vmw_surface *srf = vmw_res_to_srf(res); struct vmw_user_surface *user_srf = container_of(srf, struct vmw_user_surface, srf); - struct vmw_private *dev_priv = srf->res.dev_priv; - uint32_t size = user_srf->size; WARN_ON_ONCE(res->dirty); if (user_srf->master) @@ -676,7 +665,6 @@ static void vmw_user_surface_free(struct vmw_resource *res) kfree(srf->metadata.sizes); kfree(srf->snooper.image); ttm_prime_object_kfree(user_srf, prime); - ttm_mem_global_free(vmw_mem_glob(dev_priv), size); } /** @@ -696,8 +684,6 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; - if (user_srf->backup_base) - ttm_base_object_unref(&user_srf->backup_base); vmw_resource_unreference(&res); } @@ -715,7 +701,7 @@ int vmw_surface_destroy_ioctl(struct drm_device *dev, void *data, struct drm_vmw_surface_arg *arg = (struct drm_vmw_surface_arg *)data; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - return ttm_ref_object_base_unref(tfile, arg->sid, TTM_REF_USAGE); + return ttm_ref_object_base_unref(tfile, arg->sid); } /** @@ -740,23 +726,14 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, struct drm_vmw_surface_create_req *req = &arg->req; struct drm_vmw_surface_arg *rep = &arg->rep; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; int ret; int i, j; uint32_t cur_bo_offset; struct drm_vmw_size *cur_size; struct vmw_surface_offset *cur_offset; uint32_t num_sizes; - uint32_t size; const SVGA3dSurfaceDesc *desc; - if (unlikely(vmw_user_surface_size == 0)) - vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) + - VMW_IDA_ACC_SIZE + TTM_OBJ_EXTRA_SIZE; - num_sizes = 0; for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) { if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS) @@ -768,10 +745,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, num_sizes == 0) return -EINVAL; - size = vmw_user_surface_size + - ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) + - ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset)); - desc = vmw_surface_get_desc(req->format); if (unlikely(desc->blockDesc == SVGA3DBLOCKDESC_NONE)) { VMW_DEBUG_USER("Invalid format %d for surface creation.\n", @@ -779,18 +752,10 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - size, &ctx); - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for surface.\n"); - goto out_unlock; - } - user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL); if (unlikely(!user_srf)) { ret = -ENOMEM; - goto out_no_user_srf; + goto out_unlock; } srf = &user_srf->srf; @@ -805,7 +770,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, memcpy(metadata->mip_levels, req->mip_levels, sizeof(metadata->mip_levels)); metadata->num_sizes = num_sizes; - user_srf->size = size; metadata->sizes = memdup_user((struct drm_vmw_size __user *)(unsigned long) req->size_addr, @@ -883,22 +847,22 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, if (dev_priv->has_mob && req->shareable) { uint32_t backup_handle; - ret = vmw_user_bo_alloc(dev_priv, tfile, - res->backup_size, - true, - &backup_handle, - &res->backup, - &user_srf->backup_base); + ret = vmw_gem_object_create_with_handle(dev_priv, + file_priv, + res->backup_size, + &backup_handle, + &res->backup); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); goto out_unlock; } + vmw_bo_reference(res->backup); } tmp = vmw_resource_reference(&srf->res); ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, req->shareable, VMW_RES_SURFACE, - &vmw_user_surface_base_release, NULL); + &vmw_user_surface_base_release); if (unlikely(ret != 0)) { vmw_resource_unreference(&tmp); @@ -916,8 +880,6 @@ out_no_offsets: kfree(metadata->sizes); out_no_sizes: ttm_prime_object_kfree(user_srf, prime); -out_no_user_srf: - ttm_mem_global_free(vmw_mem_glob(dev_priv), size); out_unlock: return ret; } @@ -955,7 +917,6 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, VMW_DEBUG_USER("Referenced object is not a surface.\n"); goto out_bad_resource; } - if (handle_type != DRM_VMW_HANDLE_PRIME) { bool require_exist = false; @@ -980,8 +941,7 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, if (unlikely(drm_is_render_client(file_priv))) require_exist = true; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, - require_exist); + ret = ttm_ref_object_add(tfile, base, NULL, require_exist); if (unlikely(ret != 0)) { DRM_ERROR("Could not add a reference to a surface.\n"); goto out_bad_resource; @@ -995,7 +955,7 @@ out_bad_resource: ttm_base_object_unref(&base); out_no_lookup: if (handle_type == DRM_VMW_HANDLE_PRIME) - (void) ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE); + (void) ttm_ref_object_base_unref(tfile, handle); return ret; } @@ -1045,7 +1005,7 @@ int vmw_surface_reference_ioctl(struct drm_device *dev, void *data, if (unlikely(ret != 0)) { VMW_DEBUG_USER("copy_to_user failed %p %u\n", user_sizes, srf->metadata.num_sizes); - ttm_ref_object_base_unref(tfile, base->handle, TTM_REF_USAGE); + ttm_ref_object_base_unref(tfile, base->handle); ret = -EFAULT; } @@ -1459,7 +1419,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, struct vmw_resource *res; struct vmw_resource *tmp; int ret = 0; - uint32_t size; uint32_t backup_handle = 0; SVGA3dSurfaceAllFlags svga3d_flags_64 = SVGA3D_FLAGS_64(req->svga3d_flags_upper_32_bits, @@ -1506,12 +1465,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, return -EINVAL; } - if (unlikely(vmw_user_surface_size == 0)) - vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) + - VMW_IDA_ACC_SIZE + TTM_OBJ_EXTRA_SIZE; - - size = vmw_user_surface_size; - metadata.flags = svga3d_flags_64; metadata.format = req->base.format; metadata.mip_levels[0] = req->base.mip_levels; @@ -1526,7 +1479,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, drm_vmw_surface_flag_scanout; /* Define a surface based on the parameters. */ - ret = vmw_gb_surface_define(dev_priv, size, &metadata, &srf); + ret = vmw_gb_surface_define(dev_priv, &metadata, &srf); if (ret != 0) { VMW_DEBUG_USER("Failed to define surface.\n"); return ret; @@ -1539,9 +1492,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev, res = &user_srf->srf.res; if (req->base.buffer_handle != SVGA3D_INVALID_ID) { - ret = vmw_user_bo_lookup(tfile, req->base.buffer_handle, - &res->backup, - &user_srf->backup_base); + ret = vmw_user_bo_lookup(file_priv, req->base.buffer_handle, + &res->backup); if (ret == 0) { if (res->backup->base.base.size < res->backup_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); @@ -1554,14 +1506,15 @@ vmw_gb_surface_define_internal(struct drm_device *dev, } } else if (req->base.drm_surface_flags & (drm_vmw_surface_flag_create_buffer | - drm_vmw_surface_flag_coherent)) - ret = vmw_user_bo_alloc(dev_priv, tfile, - res->backup_size, - req->base.drm_surface_flags & - drm_vmw_surface_flag_shareable, - &backup_handle, - &res->backup, - &user_srf->backup_base); + drm_vmw_surface_flag_coherent)) { + ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, + res->backup_size, + &backup_handle, + &res->backup); + if (ret == 0) + vmw_bo_reference(res->backup); + + } if (unlikely(ret != 0)) { vmw_resource_unreference(&res); @@ -1593,7 +1546,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, req->base.drm_surface_flags & drm_vmw_surface_flag_shareable, VMW_RES_SURFACE, - &vmw_user_surface_base_release, NULL); + &vmw_user_surface_base_release); if (unlikely(ret != 0)) { vmw_resource_unreference(&tmp); @@ -1613,7 +1566,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->buffer_size = 0; rep->buffer_handle = SVGA3D_INVALID_ID; } - vmw_resource_unreference(&res); out_unlock: @@ -1636,12 +1588,11 @@ vmw_gb_surface_reference_internal(struct drm_device *dev, struct drm_file *file_priv) { struct vmw_private *dev_priv = vmw_priv(dev); - struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; struct vmw_surface *srf; struct vmw_user_surface *user_srf; struct vmw_surface_metadata *metadata; struct ttm_base_object *base; - uint32_t backup_handle; + u32 backup_handle; int ret; ret = vmw_surface_handle_reference(dev_priv, file_priv, req->sid, @@ -1658,14 +1609,12 @@ vmw_gb_surface_reference_internal(struct drm_device *dev, metadata = &srf->metadata; mutex_lock(&dev_priv->cmdbuf_mutex); /* Protect res->backup */ - ret = vmw_user_bo_reference(tfile, srf->res.backup, &backup_handle); + ret = drm_gem_handle_create(file_priv, &srf->res.backup->base.base, + &backup_handle); mutex_unlock(&dev_priv->cmdbuf_mutex); - - if (unlikely(ret != 0)) { - DRM_ERROR("Could not add a reference to a GB surface " - "backup buffer.\n"); - (void) ttm_ref_object_base_unref(tfile, base->handle, - TTM_REF_USAGE); + if (ret != 0) { + drm_err(dev, "Wasn't able to create a backing handle for surface sid = %u.\n", + req->sid); goto out_bad_resource; } @@ -1955,11 +1904,7 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) u32 num_mip; u32 num_subres; u32 num_samples; - size_t dirty_size, acc_size; - static struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; + size_t dirty_size; int ret; if (metadata->array_size) @@ -1973,14 +1918,6 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) num_subres = num_layers * num_mip; dirty_size = struct_size(dirty, boxes, num_subres); - acc_size = ttm_round_pot(dirty_size); - ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv), - acc_size, &ctx); - if (ret) { - VMW_DEBUG_USER("Out of graphics memory for surface " - "dirty tracker.\n"); - return ret; - } dirty = kvzalloc(dirty_size, GFP_KERNEL); if (!dirty) { @@ -1990,13 +1927,12 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) num_samples = max_t(u32, 1, metadata->multisample_count); ret = vmw_surface_setup_cache(&metadata->base_size, metadata->format, - num_mip, num_layers, num_samples, - &dirty->cache); + num_mip, num_layers, num_samples, + &dirty->cache); if (ret) goto out_no_cache; dirty->num_subres = num_subres; - dirty->size = acc_size; res->dirty = (struct vmw_resource_dirty *) dirty; return 0; @@ -2004,7 +1940,6 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) out_no_cache: kvfree(dirty); out_no_dirty: - ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size); return ret; } @@ -2015,10 +1950,8 @@ static void vmw_surface_dirty_free(struct vmw_resource *res) { struct vmw_surface_dirty *dirty = (struct vmw_surface_dirty *) res->dirty; - size_t acc_size = dirty->size; kvfree(dirty); - ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size); res->dirty = NULL; } @@ -2051,8 +1984,6 @@ static int vmw_surface_clean(struct vmw_resource *res) * vmw_gb_surface_define - Define a private GB surface * * @dev_priv: Pointer to a device private. - * @user_accounting_size: Used to track user-space memory usage, set - * to 0 for kernel mode only memory * @metadata: Metadata representing the surface to create. * @user_srf_out: allocated user_srf. Set to NULL on failure. * @@ -2062,17 +1993,12 @@ static int vmw_surface_clean(struct vmw_resource *res) * it available to user mode drivers. */ int vmw_gb_surface_define(struct vmw_private *dev_priv, - uint32_t user_accounting_size, const struct vmw_surface_metadata *req, struct vmw_surface **srf_out) { struct vmw_surface_metadata *metadata; struct vmw_user_surface *user_srf; struct vmw_surface *srf; - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; u32 sample_count = 1; u32 num_layers = 1; int ret; @@ -2113,22 +2039,13 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, if (req->sizes != NULL) return -EINVAL; - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - user_accounting_size, &ctx); - if (ret != 0) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for surface.\n"); - goto out_unlock; - } - user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL); if (unlikely(!user_srf)) { ret = -ENOMEM; - goto out_no_user_srf; + goto out_unlock; } *srf_out = &user_srf->srf; - user_srf->size = user_accounting_size; user_srf->prime.base.shareable = false; user_srf->prime.base.tfile = NULL; @@ -2179,9 +2096,6 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, return ret; -out_no_user_srf: - ttm_mem_global_free(vmw_mem_glob(dev_priv), user_accounting_size); - out_unlock: return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c new file mode 100644 index 000000000000..b0005b03a617 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2021 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "vmwgfx_drv.h" + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_resource.h> +#include <linux/slab.h> + + +static int vmw_sys_man_alloc(struct ttm_resource_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + *res = kzalloc(sizeof(**res), GFP_KERNEL); + if (!*res) + return -ENOMEM; + + ttm_resource_init(bo, place, *res); + return 0; +} + +static void vmw_sys_man_free(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + kfree(res); +} + +static const struct ttm_resource_manager_func vmw_sys_manager_func = { + .alloc = vmw_sys_man_alloc, + .free = vmw_sys_man_free, +}; + +int vmw_sys_man_init(struct vmw_private *dev_priv) +{ + struct ttm_device *bdev = &dev_priv->bdev; + struct ttm_resource_manager *man = + kzalloc(sizeof(*man), GFP_KERNEL); + + if (!man) + return -ENOMEM; + + man->use_tt = true; + man->func = &vmw_sys_manager_func; + + ttm_resource_manager_init(man, 0); + ttm_set_driver_manager(bdev, VMW_PL_SYSTEM, man); + ttm_resource_manager_set_used(man, true); + return 0; +} + +void vmw_sys_man_fini(struct vmw_private *dev_priv) +{ + struct ttm_resource_manager *man = ttm_manager_type(&dev_priv->bdev, + VMW_PL_SYSTEM); + + ttm_resource_manager_evict_all(&dev_priv->bdev, man); + + ttm_resource_manager_set_used(man, false); + ttm_resource_manager_cleanup(man); + + ttm_set_driver_manager(&dev_priv->bdev, VMW_PL_SYSTEM, NULL); + kfree(man); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c deleted file mode 100644 index 2a3d3468e4e0..000000000000 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Huge page-table-entry support for IO memory. - * - * Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd. - */ -#include "vmwgfx_drv.h" -#include <drm/ttm/ttm_bo_driver.h> -#include <drm/ttm/ttm_placement.h> -#include <drm/ttm/ttm_range_manager.h> - -/** - * struct vmw_thp_manager - Range manager implementing huge page alignment - * - * @manager: TTM resource manager. - * @mm: The underlying range manager. Protected by @lock. - * @lock: Manager lock. - */ -struct vmw_thp_manager { - struct ttm_resource_manager manager; - struct drm_mm mm; - spinlock_t lock; -}; - -static struct vmw_thp_manager *to_thp_manager(struct ttm_resource_manager *man) -{ - return container_of(man, struct vmw_thp_manager, manager); -} - -static const struct ttm_resource_manager_func vmw_thp_func; - -static int vmw_thp_insert_aligned(struct ttm_buffer_object *bo, - struct drm_mm *mm, struct drm_mm_node *node, - unsigned long align_pages, - const struct ttm_place *place, - struct ttm_resource *mem, - unsigned long lpfn, - enum drm_mm_insert_mode mode) -{ - if (align_pages >= bo->page_alignment && - (!bo->page_alignment || align_pages % bo->page_alignment == 0)) { - return drm_mm_insert_node_in_range(mm, node, - mem->num_pages, - align_pages, 0, - place->fpfn, lpfn, mode); - } - - return -ENOSPC; -} - -static int vmw_thp_get_node(struct ttm_resource_manager *man, - struct ttm_buffer_object *bo, - const struct ttm_place *place, - struct ttm_resource **res) -{ - struct vmw_thp_manager *rman = to_thp_manager(man); - struct drm_mm *mm = &rman->mm; - struct ttm_range_mgr_node *node; - unsigned long align_pages; - unsigned long lpfn; - enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST; - int ret; - - node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL); - if (!node) - return -ENOMEM; - - ttm_resource_init(bo, place, &node->base); - - lpfn = place->lpfn; - if (!lpfn) - lpfn = man->size; - - mode = DRM_MM_INSERT_BEST; - if (place->flags & TTM_PL_FLAG_TOPDOWN) - mode = DRM_MM_INSERT_HIGH; - - spin_lock(&rman->lock); - if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) { - align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT); - if (node->base.num_pages >= align_pages) { - ret = vmw_thp_insert_aligned(bo, mm, &node->mm_nodes[0], - align_pages, place, - &node->base, lpfn, mode); - if (!ret) - goto found_unlock; - } - } - - align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT); - if (node->base.num_pages >= align_pages) { - ret = vmw_thp_insert_aligned(bo, mm, &node->mm_nodes[0], - align_pages, place, &node->base, - lpfn, mode); - if (!ret) - goto found_unlock; - } - - ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0], - node->base.num_pages, - bo->page_alignment, 0, - place->fpfn, lpfn, mode); -found_unlock: - spin_unlock(&rman->lock); - - if (unlikely(ret)) { - kfree(node); - } else { - node->base.start = node->mm_nodes[0].start; - *res = &node->base; - } - - return ret; -} - -static void vmw_thp_put_node(struct ttm_resource_manager *man, - struct ttm_resource *res) -{ - struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); - struct vmw_thp_manager *rman = to_thp_manager(man); - - spin_lock(&rman->lock); - drm_mm_remove_node(&node->mm_nodes[0]); - spin_unlock(&rman->lock); - - kfree(node); -} - -int vmw_thp_init(struct vmw_private *dev_priv) -{ - struct vmw_thp_manager *rman; - - rman = kzalloc(sizeof(*rman), GFP_KERNEL); - if (!rman) - return -ENOMEM; - - ttm_resource_manager_init(&rman->manager, - dev_priv->vram_size >> PAGE_SHIFT); - - rman->manager.func = &vmw_thp_func; - drm_mm_init(&rman->mm, 0, rman->manager.size); - spin_lock_init(&rman->lock); - - ttm_set_driver_manager(&dev_priv->bdev, TTM_PL_VRAM, &rman->manager); - ttm_resource_manager_set_used(&rman->manager, true); - return 0; -} - -void vmw_thp_fini(struct vmw_private *dev_priv) -{ - struct ttm_resource_manager *man = ttm_manager_type(&dev_priv->bdev, TTM_PL_VRAM); - struct vmw_thp_manager *rman = to_thp_manager(man); - struct drm_mm *mm = &rman->mm; - int ret; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&dev_priv->bdev, man); - if (ret) - return; - spin_lock(&rman->lock); - drm_mm_clean(mm); - drm_mm_takedown(mm); - spin_unlock(&rman->lock); - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&dev_priv->bdev, TTM_PL_VRAM, NULL); - kfree(rman); -} - -static void vmw_thp_debug(struct ttm_resource_manager *man, - struct drm_printer *printer) -{ - struct vmw_thp_manager *rman = to_thp_manager(man); - - spin_lock(&rman->lock); - drm_mm_print(&rman->mm, printer); - spin_unlock(&rman->lock); -} - -static const struct ttm_resource_manager_func vmw_thp_func = { - .alloc = vmw_thp_get_node, - .free = vmw_thp_put_node, - .debug = vmw_thp_debug -}; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index e899a936a42a..b84ecc6d6611 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -92,6 +92,13 @@ static const struct ttm_place gmr_vram_placement_flags[] = { } }; +static const struct ttm_place vmw_sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = VMW_PL_SYSTEM, + .flags = 0 +}; + struct ttm_placement vmw_vram_gmr_placement = { .num_placement = 2, .placement = vram_gmr_placement_flags, @@ -113,28 +120,11 @@ struct ttm_placement vmw_sys_placement = { .busy_placement = &sys_placement_flags }; -static const struct ttm_place evictable_placement_flags[] = { - { - .fpfn = 0, - .lpfn = 0, - .mem_type = TTM_PL_SYSTEM, - .flags = 0 - }, { - .fpfn = 0, - .lpfn = 0, - .mem_type = TTM_PL_VRAM, - .flags = 0 - }, { - .fpfn = 0, - .lpfn = 0, - .mem_type = VMW_PL_GMR, - .flags = 0 - }, { - .fpfn = 0, - .lpfn = 0, - .mem_type = VMW_PL_MOB, - .flags = 0 - } +struct ttm_placement vmw_pt_sys_placement = { + .num_placement = 1, + .placement = &vmw_sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &vmw_sys_placement_flags }; static const struct ttm_place nonfixed_placement_flags[] = { @@ -156,13 +146,6 @@ static const struct ttm_place nonfixed_placement_flags[] = { } }; -struct ttm_placement vmw_evictable_placement = { - .num_placement = 4, - .placement = evictable_placement_flags, - .num_busy_placement = 1, - .busy_placement = &sys_placement_flags -}; - struct ttm_placement vmw_srf_placement = { .num_placement = 1, .num_busy_placement = 2, @@ -184,19 +167,6 @@ struct ttm_placement vmw_nonfixed_placement = { .busy_placement = &sys_placement_flags }; -struct vmw_ttm_tt { - struct ttm_tt dma_ttm; - struct vmw_private *dev_priv; - int gmr_id; - struct vmw_mob *mob; - int mem_type; - struct sg_table sgt; - struct vmw_sg_table vsgt; - uint64_t sg_alloc_size; - bool mapped; - bool bound; -}; - const size_t vmw_tt_size = sizeof(struct vmw_ttm_tt); /** @@ -317,17 +287,8 @@ static int vmw_ttm_map_for_dma(struct vmw_ttm_tt *vmw_tt) static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) { struct vmw_private *dev_priv = vmw_tt->dev_priv; - struct ttm_mem_global *glob = vmw_mem_glob(dev_priv); struct vmw_sg_table *vsgt = &vmw_tt->vsgt; - struct ttm_operation_ctx ctx = { - .interruptible = true, - .no_wait_gpu = false - }; - struct vmw_piter iter; - dma_addr_t old; int ret = 0; - static size_t sgl_size; - static size_t sgt_size; if (vmw_tt->mapped) return 0; @@ -336,20 +297,12 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) vsgt->pages = vmw_tt->dma_ttm.pages; vsgt->num_pages = vmw_tt->dma_ttm.num_pages; vsgt->addrs = vmw_tt->dma_ttm.dma_address; - vsgt->sgt = &vmw_tt->sgt; + vsgt->sgt = NULL; switch (dev_priv->map_mode) { case vmw_dma_map_bind: case vmw_dma_map_populate: - if (unlikely(!sgl_size)) { - sgl_size = ttm_round_pot(sizeof(struct scatterlist)); - sgt_size = ttm_round_pot(sizeof(struct sg_table)); - } - vmw_tt->sg_alloc_size = sgt_size + sgl_size * vsgt->num_pages; - ret = ttm_mem_global_alloc(glob, vmw_tt->sg_alloc_size, &ctx); - if (unlikely(ret != 0)) - return ret; - + vsgt->sgt = &vmw_tt->sgt; ret = sg_alloc_table_from_pages_segment( &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, (unsigned long)vsgt->num_pages << PAGE_SHIFT, @@ -357,15 +310,6 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) if (ret) goto out_sg_alloc_fail; - if (vsgt->num_pages > vmw_tt->sgt.orig_nents) { - uint64_t over_alloc = - sgl_size * (vsgt->num_pages - - vmw_tt->sgt.orig_nents); - - ttm_mem_global_free(glob, over_alloc); - vmw_tt->sg_alloc_size -= over_alloc; - } - ret = vmw_ttm_map_for_dma(vmw_tt); if (unlikely(ret != 0)) goto out_map_fail; @@ -375,16 +319,6 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) break; } - old = ~((dma_addr_t) 0); - vmw_tt->vsgt.num_regions = 0; - for (vmw_piter_start(&iter, vsgt, 0); vmw_piter_next(&iter);) { - dma_addr_t cur = vmw_piter_dma_addr(&iter); - - if (cur != old + PAGE_SIZE) - vmw_tt->vsgt.num_regions++; - old = cur; - } - vmw_tt->mapped = true; return 0; @@ -392,7 +326,6 @@ out_map_fail: sg_free_table(vmw_tt->vsgt.sgt); vmw_tt->vsgt.sgt = NULL; out_sg_alloc_fail: - ttm_mem_global_free(glob, vmw_tt->sg_alloc_size); return ret; } @@ -418,8 +351,6 @@ static void vmw_ttm_unmap_dma(struct vmw_ttm_tt *vmw_tt) vmw_ttm_unmap_from_dma(vmw_tt); sg_free_table(vmw_tt->vsgt.sgt); vmw_tt->vsgt.sgt = NULL; - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_tt->sg_alloc_size); break; default: break; @@ -484,6 +415,9 @@ static int vmw_ttm_bind(struct ttm_device *bdev, &vmw_be->vsgt, ttm->num_pages, vmw_be->gmr_id); break; + case VMW_PL_SYSTEM: + /* Nothing to be done for a system bind */ + break; default: BUG(); } @@ -507,6 +441,8 @@ static void vmw_ttm_unbind(struct ttm_device *bdev, case VMW_PL_MOB: vmw_mob_unbind(vmw_be->dev_priv, vmw_be->mob); break; + case VMW_PL_SYSTEM: + break; default: BUG(); } @@ -534,7 +470,6 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) static int vmw_ttm_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { - unsigned int i; int ret; /* TODO: maybe completely drop this ? */ @@ -542,22 +477,7 @@ static int vmw_ttm_populate(struct ttm_device *bdev, return 0; ret = ttm_pool_alloc(&bdev->pool, ttm, ctx); - if (ret) - return ret; - - for (i = 0; i < ttm->num_pages; ++i) { - ret = ttm_mem_global_alloc_page(&ttm_mem_glob, ttm->pages[i], - PAGE_SIZE, ctx); - if (ret) - goto error; - } - return 0; -error: - while (i--) - ttm_mem_global_free_page(&ttm_mem_glob, ttm->pages[i], - PAGE_SIZE); - ttm_pool_free(&bdev->pool, ttm); return ret; } @@ -566,7 +486,6 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev, { struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt, dma_ttm); - unsigned int i; vmw_ttm_unbind(bdev, ttm); @@ -577,10 +496,6 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev, vmw_ttm_unmap_dma(vmw_tt); - for (i = 0; i < ttm->num_pages; ++i) - ttm_mem_global_free_page(&ttm_mem_glob, ttm->pages[i], - PAGE_SIZE); - ttm_pool_free(&bdev->pool, ttm); } @@ -624,6 +539,7 @@ static int vmw_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource * switch (mem->mem_type) { case TTM_PL_SYSTEM: + case VMW_PL_SYSTEM: case VMW_PL_GMR: case VMW_PL_MOB: return 0; @@ -670,6 +586,11 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo) (void) ttm_bo_wait(bo, false, false); } +static bool vmw_memtype_is_system(uint32_t mem_type) +{ + return mem_type == TTM_PL_SYSTEM || mem_type == VMW_PL_SYSTEM; +} + static int vmw_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, @@ -680,7 +601,7 @@ static int vmw_move(struct ttm_buffer_object *bo, struct ttm_resource_manager *new_man = ttm_manager_type(bo->bdev, new_mem->mem_type); int ret; - if (new_man->use_tt && new_mem->mem_type != TTM_PL_SYSTEM) { + if (new_man->use_tt && !vmw_memtype_is_system(new_mem->mem_type)) { ret = vmw_ttm_bind(bo->bdev, bo->ttm, new_mem); if (ret) return ret; @@ -689,7 +610,7 @@ static int vmw_move(struct ttm_buffer_object *bo, vmw_move_notify(bo, bo->resource, new_mem); if (old_man->use_tt && new_man->use_tt) { - if (bo->resource->mem_type == TTM_PL_SYSTEM) { + if (vmw_memtype_is_system(bo->resource->mem_type)) { ttm_bo_move_null(bo, new_mem); return 0; } @@ -736,7 +657,7 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv, int ret; ret = vmw_bo_create_kernel(dev_priv, bo_size, - &vmw_sys_placement, + &vmw_pt_sys_placement, &bo); if (unlikely(ret != 0)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c index 0a4c340252ec..265f7c48d856 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c @@ -27,30 +27,44 @@ #include "vmwgfx_drv.h" -static struct ttm_buffer_object *vmw_bo_vm_lookup(struct ttm_device *bdev, - unsigned long offset, - unsigned long pages) +static int vmw_bo_vm_lookup(struct ttm_device *bdev, + struct drm_file *filp, + unsigned long offset, + unsigned long pages, + struct ttm_buffer_object **p_bo) { struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, bdev); struct drm_device *drm = &dev_priv->drm; struct drm_vma_offset_node *node; - struct ttm_buffer_object *bo = NULL; + int ret; + + *p_bo = NULL; drm_vma_offset_lock_lookup(bdev->vma_manager); node = drm_vma_offset_lookup_locked(bdev->vma_manager, offset, pages); if (likely(node)) { - bo = container_of(node, struct ttm_buffer_object, + *p_bo = container_of(node, struct ttm_buffer_object, base.vma_node); - bo = ttm_bo_get_unless_zero(bo); + *p_bo = ttm_bo_get_unless_zero(*p_bo); } drm_vma_offset_unlock_lookup(bdev->vma_manager); - if (!bo) + if (!*p_bo) { drm_err(drm, "Could not find buffer object to map\n"); + return -EINVAL; + } + + if (!drm_vma_node_is_allowed(node, filp)) { + ret = -EACCES; + goto out_no_access; + } - return bo; + return 0; +out_no_access: + ttm_bo_put(*p_bo); + return ret; } int vmw_mmap(struct file *filp, struct vm_area_struct *vma) @@ -64,7 +78,6 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) }; struct drm_file *file_priv = filp->private_data; struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); - struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; struct ttm_device *bdev = &dev_priv->bdev; struct ttm_buffer_object *bo; int ret; @@ -72,13 +85,9 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START)) return -EINVAL; - bo = vmw_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma)); - if (unlikely(!bo)) - return -EINVAL; - - ret = vmw_user_bo_verify_access(bo, tfile); + ret = vmw_bo_vm_lookup(bdev, file_priv, vma->vm_pgoff, vma_pages(vma), &bo); if (unlikely(ret != 0)) - goto out_unref; + return ret; ret = ttm_bo_mmap_obj(vma, bo); if (unlikely(ret != 0)) @@ -99,38 +108,3 @@ out_unref: return ret; } -/* struct vmw_validation_mem callback */ -static int vmw_vmt_reserve(struct vmw_validation_mem *m, size_t size) -{ - static struct ttm_operation_ctx ctx = {.interruptible = false, - .no_wait_gpu = false}; - struct vmw_private *dev_priv = container_of(m, struct vmw_private, vvm); - - return ttm_mem_global_alloc(vmw_mem_glob(dev_priv), size, &ctx); -} - -/* struct vmw_validation_mem callback */ -static void vmw_vmt_unreserve(struct vmw_validation_mem *m, size_t size) -{ - struct vmw_private *dev_priv = container_of(m, struct vmw_private, vvm); - - return ttm_mem_global_free(vmw_mem_glob(dev_priv), size); -} - -/** - * vmw_validation_mem_init_ttm - Interface the validation memory tracker - * to ttm. - * @dev_priv: Pointer to struct vmw_private. The reason we choose a vmw private - * rather than a struct vmw_validation_mem is to make sure assumption in the - * callbacks that struct vmw_private derives from struct vmw_validation_mem - * holds true. - * @gran: The recommended allocation granularity - */ -void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv, size_t gran) -{ - struct vmw_validation_mem *vvm = &dev_priv->vvm; - - vvm->reserve_mem = vmw_vmt_reserve; - vvm->unreserve_mem = vmw_vmt_unreserve; - vvm->gran = gran; -} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_va.c b/drivers/gpu/drm/vmwgfx/vmwgfx_va.c index ebc1d83c34b4..6ad744ae07f5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_va.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_va.c @@ -117,7 +117,7 @@ int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data; return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile, - arg->stream_id, TTM_REF_USAGE); + arg->stream_id); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index b09094b50c5d..f46891012be3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -29,6 +29,9 @@ #include "vmwgfx_validation.h" #include "vmwgfx_drv.h" + +#define VMWGFX_VALIDATION_MEM_GRAN (16*PAGE_SIZE) + /** * struct vmw_validation_bo_node - Buffer object validation metadata. * @base: Metadata used for TTM reservation- and validation. @@ -43,7 +46,7 @@ */ struct vmw_validation_bo_node { struct ttm_validate_buffer base; - struct drm_hash_item hash; + struct vmwgfx_hash_item hash; unsigned int coherent_count; u32 as_mob : 1; u32 cpu_blit : 1; @@ -72,7 +75,7 @@ struct vmw_validation_bo_node { */ struct vmw_validation_res_node { struct list_head head; - struct drm_hash_item hash; + struct vmwgfx_hash_item hash; struct vmw_resource *res; struct vmw_buffer_object *new_backup; unsigned long new_backup_offset; @@ -113,13 +116,8 @@ void *vmw_validation_mem_alloc(struct vmw_validation_context *ctx, struct page *page; if (ctx->vm && ctx->vm_size_left < PAGE_SIZE) { - int ret = ctx->vm->reserve_mem(ctx->vm, ctx->vm->gran); - - if (ret) - return NULL; - - ctx->vm_size_left += ctx->vm->gran; - ctx->total_mem += ctx->vm->gran; + ctx->vm_size_left += VMWGFX_VALIDATION_MEM_GRAN; + ctx->total_mem += VMWGFX_VALIDATION_MEM_GRAN; } page = alloc_page(GFP_KERNEL | __GFP_ZERO); @@ -159,7 +157,6 @@ static void vmw_validation_mem_free(struct vmw_validation_context *ctx) ctx->mem_size_left = 0; if (ctx->vm && ctx->total_mem) { - ctx->vm->unreserve_mem(ctx->vm, ctx->total_mem); ctx->total_mem = 0; ctx->vm_size_left = 0; } @@ -184,9 +181,9 @@ vmw_validation_find_bo_dup(struct vmw_validation_context *ctx, return NULL; if (ctx->ht) { - struct drm_hash_item *hash; + struct vmwgfx_hash_item *hash; - if (!drm_ht_find_item(ctx->ht, (unsigned long) vbo, &hash)) + if (!vmwgfx_ht_find_item(ctx->ht, (unsigned long) vbo, &hash)) bo_node = container_of(hash, typeof(*bo_node), hash); } else { struct vmw_validation_bo_node *entry; @@ -221,9 +218,9 @@ vmw_validation_find_res_dup(struct vmw_validation_context *ctx, return NULL; if (ctx->ht) { - struct drm_hash_item *hash; + struct vmwgfx_hash_item *hash; - if (!drm_ht_find_item(ctx->ht, (unsigned long) res, &hash)) + if (!vmwgfx_ht_find_item(ctx->ht, (unsigned long) res, &hash)) res_node = container_of(hash, typeof(*res_node), hash); } else { struct vmw_validation_res_node *entry; @@ -280,7 +277,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, if (ctx->ht) { bo_node->hash.key = (unsigned long) vbo; - ret = drm_ht_insert_item(ctx->ht, &bo_node->hash); + ret = vmwgfx_ht_insert_item(ctx->ht, &bo_node->hash); if (ret) { DRM_ERROR("Failed to initialize a buffer " "validation entry.\n"); @@ -335,7 +332,7 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, if (ctx->ht) { node->hash.key = (unsigned long) res; - ret = drm_ht_insert_item(ctx->ht, &node->hash); + ret = vmwgfx_ht_insert_item(ctx->ht, &node->hash); if (ret) { DRM_ERROR("Failed to initialize a resource validation " "entry.\n"); @@ -688,13 +685,13 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx) return; list_for_each_entry(entry, &ctx->bo_list, base.head) - (void) drm_ht_remove_item(ctx->ht, &entry->hash); + (void) vmwgfx_ht_remove_item(ctx->ht, &entry->hash); list_for_each_entry(val, &ctx->resource_list, head) - (void) drm_ht_remove_item(ctx->ht, &val->hash); + (void) vmwgfx_ht_remove_item(ctx->ht, &val->hash); list_for_each_entry(val, &ctx->resource_ctx_list, head) - (void) drm_ht_remove_item(ctx->ht, &val->hash); + (void) vmwgfx_ht_remove_item(ctx->ht, &val->hash); ctx->ht = NULL; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h index 739906d1b3eb..f21df053882b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h @@ -31,29 +31,15 @@ #include <linux/list.h> #include <linux/ww_mutex.h> -#include <drm/drm_hashtab.h> #include <drm/ttm/ttm_execbuf_util.h> +#include "vmwgfx_hashtab.h" + #define VMW_RES_DIRTY_NONE 0 #define VMW_RES_DIRTY_SET BIT(0) #define VMW_RES_DIRTY_CLEAR BIT(1) /** - * struct vmw_validation_mem - Custom interface to provide memory reservations - * for the validation code. - * @reserve_mem: Callback to reserve memory - * @unreserve_mem: Callback to unreserve memory - * @gran: Reservation granularity. Contains a hint how much memory should - * be reserved in each call to @reserve_mem(). A slow implementation may want - * reservation to be done in large batches. - */ -struct vmw_validation_mem { - int (*reserve_mem)(struct vmw_validation_mem *m, size_t size); - void (*unreserve_mem)(struct vmw_validation_mem *m, size_t size); - size_t gran; -}; - -/** * struct vmw_validation_context - Per command submission validation context * @ht: Hash table used to find resource- or buffer object duplicates * @resource_list: List head for resource validation metadata @@ -73,7 +59,7 @@ struct vmw_validation_mem { * @total_mem: Amount of reserved memory. */ struct vmw_validation_context { - struct drm_open_hash *ht; + struct vmwgfx_open_hash *ht; struct list_head resource_list; struct list_head resource_ctx_list; struct list_head bo_list; @@ -129,21 +115,6 @@ vmw_validation_has_bos(struct vmw_validation_context *ctx) } /** - * vmw_validation_set_val_mem - Register a validation mem object for - * validation memory reservation - * @ctx: The validation context - * @vm: Pointer to a struct vmw_validation_mem - * - * Must be set before the first attempt to allocate validation memory. - */ -static inline void -vmw_validation_set_val_mem(struct vmw_validation_context *ctx, - struct vmw_validation_mem *vm) -{ - ctx->vm = vm; -} - -/** * vmw_validation_set_ht - Register a hash table for duplicate finding * @ctx: The validation context * @ht: Pointer to a hash table to use for duplicate finding @@ -151,7 +122,7 @@ vmw_validation_set_val_mem(struct vmw_validation_context *ctx, * available at validation context declaration time */ static inline void vmw_validation_set_ht(struct vmw_validation_context *ctx, - struct drm_open_hash *ht) + struct vmwgfx_open_hash *ht) { ctx->ht = ht; } @@ -190,22 +161,6 @@ vmw_validation_bo_fence(struct vmw_validation_context *ctx, } /** - * vmw_validation_context_init - Initialize a validation context - * @ctx: Pointer to the validation context to initialize - * - * This function initializes a validation context with @merge_dups set - * to false - */ -static inline void -vmw_validation_context_init(struct vmw_validation_context *ctx) -{ - memset(ctx, 0, sizeof(*ctx)); - INIT_LIST_HEAD(&ctx->resource_list); - INIT_LIST_HEAD(&ctx->resource_ctx_list); - INIT_LIST_HEAD(&ctx->bo_list); -} - -/** * vmw_validation_align - Align a validation memory allocation * @val: The size to be aligned * diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 434064c820e8..e63088c2121d 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -761,6 +761,7 @@ static struct xenbus_driver xen_driver = { .probe = xen_drv_probe, .remove = xen_drv_remove, .otherend_changed = displback_changed, + .not_essential = true, }; static int __init xen_drv_init(void) diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index c3d08269faa9..d8d38d86d5c6 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -7,7 +7,6 @@ config DRM_ZYNQMP_DPSUB depends on XILINX_ZYNQMP_DPDMA select DMA_ENGINE select DRM_GEM_CMA_HELPER - select DRM_KMS_CMA_HELPER select DRM_KMS_HELPER select GENERIC_PHY help |