aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 11:26:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 11:26:46 -0700
commit477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6 (patch)
tree1897dd1de49e1ea24897163533e2d8ead5dad0ad /drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
parent835d31d319d9c8c4eb6cac074643360ba0ecab10 (diff)
parent8f0284f190e6a0aa09015090568c03f18288231a (diff)
downloadlinux-477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6.tar.gz
Merge tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - i915 has seen a lot of refactoring and uAPI cleanups due to a change in the upstream direction going forward This has all been audited with known userspace, but there may be some pitfalls that were missed. - i915 now uses common TTM to enable discrete memory on DG1/2 GPUs - i915 enables Jasper and Elkhart Lake by default and has preliminary XeHP/DG2 support - amdgpu adds support for Cyan Skillfish - lots of implicit fencing rules documented and fixed up in drivers - msm now uses the core scheduler - the irq midlayer has been removed for non-legacy drivers - the sysfb code now works on more than x86. Otherwise the usual smattering of stuff everywhere, panels, bridges, refactorings. Detailed summary: core: - extract i915 eDP backlight into core - DP aux bus support - drm_device.irq_enabled removed - port drivers to native irq interfaces - export gem shadow plane handling for vgem - print proper driver name in framebuffer registration - driver fixes for implicit fencing rules - ARM fixed rate compression modifier added - updated fb damage handling - rmfb ioctl logging/docs - drop drm_gem_object_put_locked - define DRM_FORMAT_MAX_PLANES - add gem fb vmap/vunmap helpers - add lockdep_assert(once) helpers - mark drm irq midlayer as legacy - use offset adjusted bo mapping conversion vgaarb: - cleanups fbdev: - extend efifb handling to all arches - div by 0 fixes for multiple drivers udmabuf: - add hugepage mapping support dma-buf: - non-dynamic exporter fixups - document implicit fencing rules amdgpu: - Initial Cyan Skillfish support - switch virtual DCE over to vkms based atomic - VCN/JPEG power down fixes - NAVI PCIE link handling fixes - AMD HDMI freesync fixes - Yellow Carp + Beige Goby fixes - Clockgating/S0ix/SMU/EEPROM fixes - embed hw fence in job - rework dma-resv handling - ensure eviction to system ram amdkfd: - uapi: SVM address range query added - sysfs leak fix - GPUVM TLB optimizations - vmfault/migration counters i915: - Enable JSL and EHL by default - preliminary XeHP/DG2 support - remove all CNL support (never shipped) - move to TTM for discrete memory support - allow mixed object mmap handling - GEM uAPI spring cleaning - add I915_MMAP_OBJECT_FIXED - reinstate ADL-P mmap ioctls - drop a bunch of unused by userspace features - disable and remove GPU relocations - revert some i915 misfeatures - major refactoring of GuC for Gen11+ - execbuffer object locking separate step - reject caching/set-domain on discrete - Enable pipe DMC loading on XE-LPD and ADL-P - add PSF GV point support - Refactor and fix DDI buffer translations - Clean up FBC CFB allocation code - Finish INTEL_GEN() and friends macro conversions nouveau: - add eDP backlight support - implicit fence fix msm: - a680/7c3 support - drm/scheduler conversion panfrost: - rework GPU reset virtio: - fix fencing for planes ast: - add detect support bochs: - move to tiny GPU driver vc4: - use hotplug irqs - HDMI codec support vmwgfx: - use internal vmware device headers ingenic: - demidlayering irq rcar-du: - shutdown fixes - convert to bridge connector helpers zynqmp-dsub: - misc fixes mgag200: - convert PLL handling to atomic mediatek: - MT8133 AAL support - gem mmap object support - MT8167 support etnaviv: - NXP Layerscape LS1028A SoC support - GEM mmap cleanups tegra: - new user API exynos: - missing unlock fix - build warning fix - use refcount_t" * tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm: (1318 commits) drm/amd/display: Move AllowDRAMSelfRefreshOrDRAMClockChangeInVblank to bounding box drm/amd/display: Remove duplicate dml init drm/amd/display: Update bounding box states (v2) drm/amd/display: Update number of DCN3 clock states drm/amdgpu: disable GFX CGCG in aldebaran drm/amdgpu: Clear RAS interrupt status on aldebaran drm/amdgpu: Add support for RAS XGMI err query drm/amdkfd: Account for SH/SE count when setting up cu masks. drm/amdgpu: rename amdgpu_bo_get_preferred_pin_domain drm/amdgpu: drop redundant cancel_delayed_work_sync call drm/amdgpu: add missing cleanups for more ASICs on UVD/VCE suspend drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE on suspend drm/amdkfd: map SVM range with correct access permission drm/amdkfd: check access permisson to restore retry fault drm/amdgpu: Update RAS XGMI Error Query drm/amdgpu: Add driver infrastructure for MCA RAS drm/amd/display: Add Logging for HDMI color depth information drm/amd/amdgpu: consolidate PSP TA init shared buf functions drm/amd/amdgpu: add name field back to ras_common_if drm/amdgpu: Fix build with missing pm_suspend_target_state module export ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c84
1 files changed, 63 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 88813dad731f..c021519af810 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -98,36 +98,78 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
uint32_t *se_mask)
{
struct kfd_cu_info cu_info;
- uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0};
- int i, se, sh, cu = 0;
-
+ uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
+ int i, se, sh, cu;
amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;
+ /* Exceeding these bounds corrupts the stack and indicates a coding error.
+ * Returning with no CU's enabled will hang the queue, which should be
+ * attention grabbing.
+ */
+ if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
+ pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
+ return;
+ }
+ if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
+ pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+ cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
+ return;
+ }
+ /* Count active CUs per SH.
+ *
+ * Some CUs in an SH may be disabled. HW expects disabled CUs to be
+ * represented in the high bits of each SH's enable mask (the upper and lower
+ * 16 bits of se_mask) and will take care of the actual distribution of
+ * disabled CUs within each SH automatically.
+ * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
+ *
+ * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
+ */
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
- cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
-
- /* Symmetrically map cu_mask to all SEs:
- * cu_mask[0] bit0 -> se_mask[0] bit0;
- * cu_mask[0] bit1 -> se_mask[1] bit0;
- * ... (if # SE is 4)
- * cu_mask[0] bit4 -> se_mask[0] bit1;
+ cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
+
+ /* Symmetrically map cu_mask to all SEs & SHs:
+ * se_mask programs up to 2 SH in the upper and lower 16 bits.
+ *
+ * Examples
+ * Assuming 1 SH/SE, 4 SEs:
+ * cu_mask[0] bit0 -> se_mask[0] bit0
+ * cu_mask[0] bit1 -> se_mask[1] bit0
+ * ...
+ * cu_mask[0] bit4 -> se_mask[0] bit1
+ * ...
+ *
+ * Assuming 2 SH/SE, 4 SEs
+ * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0)
+ * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0)
+ * ...
+ * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0)
+ * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0)
+ * ...
+ * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
* ...
+ *
+ * First ensure all CUs are disabled, then enable user specified CUs.
*/
- se = 0;
- for (i = 0; i < cu_mask_count; i++) {
- if (cu_mask[i / 32] & (1 << (i % 32)))
- se_mask[se] |= 1 << cu;
-
- do {
- se++;
- if (se == cu_info.num_shader_engines) {
- se = 0;
- cu++;
+ for (i = 0; i < cu_info.num_shader_engines; i++)
+ se_mask[i] = 0;
+
+ i = 0;
+ for (cu = 0; cu < 16; cu++) {
+ for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
+ for (se = 0; se < cu_info.num_shader_engines; se++) {
+ if (cu_per_sh[se][sh] > cu) {
+ if (cu_mask[i / 32] & (1 << (i % 32)))
+ se_mask[se] |= 1 << (cu + sh * 16);
+ i++;
+ if (i == cu_mask_count)
+ return;
+ }
}
- } while (cu >= cu_per_se[se] && cu < 32);
+ }
}
}