diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_engine_cs.c | 215 |
1 files changed, 195 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 14c6ddbbfde8..283870c65991 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -21,8 +21,9 @@ #include "intel_engine_user.h" #include "intel_execlists_submission.h" #include "intel_gt.h" -#include "intel_gt_requests.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_reset.h" @@ -71,6 +72,62 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 6, .base = BLT_RING_BASE } }, }, + [BCS1] = { + .class = COPY_ENGINE_CLASS, + .instance = 1, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } + }, + }, + [BCS2] = { + .class = COPY_ENGINE_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } + }, + }, + [BCS3] = { + .class = COPY_ENGINE_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } + }, + }, + [BCS4] = { + .class = COPY_ENGINE_CLASS, + .instance = 4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } + }, + }, + [BCS5] = { + .class = COPY_ENGINE_CLASS, + .instance = 5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } + }, + }, + [BCS6] = { + .class = COPY_ENGINE_CLASS, + .instance = 6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } + }, + }, + [BCS7] = { + .class = COPY_ENGINE_CLASS, + .instance = 7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } + }, + }, + [BCS8] = { + .class = COPY_ENGINE_CLASS, + .instance = 8, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } + }, + }, [VCS0] = { .class = VIDEO_DECODE_CLASS, .instance = 0, @@ -334,6 +391,14 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id) static const u32 engine_reset_domains[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, + [BCS1] = XEHPC_GRDOM_BLT1, + [BCS2] = XEHPC_GRDOM_BLT2, + [BCS3] = XEHPC_GRDOM_BLT3, + [BCS4] = XEHPC_GRDOM_BLT4, + [BCS5] = XEHPC_GRDOM_BLT5, + [BCS6] = XEHPC_GRDOM_BLT6, + [BCS7] = XEHPC_GRDOM_BLT7, + [BCS8] = XEHPC_GRDOM_BLT8, [VCS0] = GEN11_GRDOM_MEDIA, [VCS1] = GEN11_GRDOM_MEDIA2, [VCS2] = GEN11_GRDOM_MEDIA3, @@ -610,8 +675,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) return; - ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), - ss_per_ccs); + ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask, + ss_per_ccs); /* * If all DSS in a quadrant are fused off, the corresponding CCS * engine is not available for use. @@ -622,6 +687,34 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) } } +static void engine_mask_apply_copy_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + unsigned long meml3_mask; + unsigned long quad; + + meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3); + meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask); + + /* + * Link Copy engines may be fused off according to meml3_mask. Each + * bit is a quad that houses 2 Link Copy and two Sub Copy engines. + */ + for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) { + unsigned int instance = quad * 2 + 1; + intel_engine_mask_t mask = GENMASK(_BCS(instance + 1), + _BCS(instance)); + + if (mask & info->engine_mask) { + drm_dbg(&i915->drm, "bcs%u fused off\n", instance); + drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1); + + info->engine_mask &= ~mask; + } + } +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -704,6 +797,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); engine_mask_apply_compute_fuses(gt); + engine_mask_apply_copy_fuses(gt); return info->engine_mask; } @@ -1282,10 +1376,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); /* - * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is + * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (GRAPHICS_VER(engine->i915) == 12) + if (IS_GRAPHICS_VER(engine->i915, 11, 12)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); @@ -1308,6 +1402,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) return -ENODEV; ENGINE_TRACE(engine, "\n"); + /* + * TODO: Find out why occasionally stopping the CS times out. Seen + * especially with gem_eio tests. + * + * Occasionally trying to stop the cs times out, but does not adversely + * affect functionality. The timeout is set as a config parameter that + * defaults to 100ms. In most cases the follow up operation is to wait + * for pending MI_FORCE_WAKES. The assumption is that this timeout is + * sufficient for any pending MI_FORCEWAKEs to complete. Once root + * caused, the caller must check and handle the return from this + * function. + */ if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) { ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n", @@ -1334,12 +1440,76 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } -static u32 -read_subslice_reg(const struct intel_engine_cs *engine, - int slice, int subslice, i915_reg_t reg) +static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) +{ + static const i915_reg_t _reg[I915_NUM_ENGINES] = { + [RCS0] = MSG_IDLE_CS, + [BCS0] = MSG_IDLE_BCS, + [VCS0] = MSG_IDLE_VCS0, + [VCS1] = MSG_IDLE_VCS1, + [VCS2] = MSG_IDLE_VCS2, + [VCS3] = MSG_IDLE_VCS3, + [VCS4] = MSG_IDLE_VCS4, + [VCS5] = MSG_IDLE_VCS5, + [VCS6] = MSG_IDLE_VCS6, + [VCS7] = MSG_IDLE_VCS7, + [VECS0] = MSG_IDLE_VECS0, + [VECS1] = MSG_IDLE_VECS1, + [VECS2] = MSG_IDLE_VECS2, + [VECS3] = MSG_IDLE_VECS3, + [CCS0] = MSG_IDLE_CS, + [CCS1] = MSG_IDLE_CS, + [CCS2] = MSG_IDLE_CS, + [CCS3] = MSG_IDLE_CS, + }; + u32 val; + + if (!_reg[engine->id].reg) { + drm_err(&engine->i915->drm, + "MSG IDLE undefined for engine id %u\n", engine->id); + return 0; + } + + val = intel_uncore_read(engine->uncore, _reg[engine->id]); + + /* bits[29:25] & bits[13:9] >> shift */ + return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; +} + +static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) { - return intel_uncore_read_with_mcr_steering(engine->uncore, reg, - slice, subslice); + int ret; + + /* Ensure GPM receives fw up/down after CS is stopped */ + udelay(1); + + /* Wait for forcewake request to complete in GPM */ + ret = __intel_wait_for_register_fw(gt->uncore, + GEN9_PWRGT_DOMAIN_STATUS, + fw_mask, fw_mask, 5000, 0, NULL); + + /* Ensure CS receives fw ack from GPM */ + udelay(1); + + if (ret) + GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); +} + +/* + * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any + * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The + * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the + * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we + * are concerned only with the gt reset here, we use a logical OR of pending + * forcewakeups from all reset domains and then wait for them to complete by + * querying PWRGT_DOMAIN_STATUS. + */ +void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine) +{ + u32 fw_pending = __cs_pending_mi_force_wakes(engine); + + if (fw_pending) + __gpm_wait_for_fw_complete(engine->gt, fw_pending); } /* NB: please notice the memset */ @@ -1375,28 +1545,33 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } } else { for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } } if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) instdone->geom_svg[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - XEHPG_INSTDONE_GEOM_SVG); + intel_gt_mcr_read(engine->gt, + XEHPG_INSTDONE_GEOM_SVG, + slice, subslice); } } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = |