aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/intel_engine_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c215
1 files changed, 195 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 14c6ddbbfde8..283870c65991 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -21,8 +21,9 @@
#include "intel_engine_user.h"
#include "intel_execlists_submission.h"
#include "intel_gt.h"
-#include "intel_gt_requests.h"
+#include "intel_gt_mcr.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_lrc_reg.h"
#include "intel_reset.h"
@@ -71,6 +72,62 @@ static const struct engine_info intel_engines[] = {
{ .graphics_ver = 6, .base = BLT_RING_BASE }
},
},
+ [BCS1] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 1,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
+ },
+ },
+ [BCS2] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 2,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
+ },
+ },
+ [BCS3] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 3,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
+ },
+ },
+ [BCS4] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 4,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
+ },
+ },
+ [BCS5] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 5,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
+ },
+ },
+ [BCS6] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 6,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
+ },
+ },
+ [BCS7] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 7,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
+ },
+ },
+ [BCS8] = {
+ .class = COPY_ENGINE_CLASS,
+ .instance = 8,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
+ },
+ },
[VCS0] = {
.class = VIDEO_DECODE_CLASS,
.instance = 0,
@@ -334,6 +391,14 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
static const u32 engine_reset_domains[] = {
[RCS0] = GEN11_GRDOM_RENDER,
[BCS0] = GEN11_GRDOM_BLT,
+ [BCS1] = XEHPC_GRDOM_BLT1,
+ [BCS2] = XEHPC_GRDOM_BLT2,
+ [BCS3] = XEHPC_GRDOM_BLT3,
+ [BCS4] = XEHPC_GRDOM_BLT4,
+ [BCS5] = XEHPC_GRDOM_BLT5,
+ [BCS6] = XEHPC_GRDOM_BLT6,
+ [BCS7] = XEHPC_GRDOM_BLT7,
+ [BCS8] = XEHPC_GRDOM_BLT8,
[VCS0] = GEN11_GRDOM_MEDIA,
[VCS1] = GEN11_GRDOM_MEDIA2,
[VCS2] = GEN11_GRDOM_MEDIA3,
@@ -610,8 +675,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
return;
- ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
- ss_per_ccs);
+ ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
+ ss_per_ccs);
/*
* If all DSS in a quadrant are fused off, the corresponding CCS
* engine is not available for use.
@@ -622,6 +687,34 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
}
}
+static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_gt_info *info = &gt->info;
+ unsigned long meml3_mask;
+ unsigned long quad;
+
+ meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3);
+ meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask);
+
+ /*
+ * Link Copy engines may be fused off according to meml3_mask. Each
+ * bit is a quad that houses 2 Link Copy and two Sub Copy engines.
+ */
+ for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) {
+ unsigned int instance = quad * 2 + 1;
+ intel_engine_mask_t mask = GENMASK(_BCS(instance + 1),
+ _BCS(instance));
+
+ if (mask & info->engine_mask) {
+ drm_dbg(&i915->drm, "bcs%u fused off\n", instance);
+ drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1);
+
+ info->engine_mask &= ~mask;
+ }
+ }
+}
+
/*
* Determine which engines are fused off in our particular hardware.
* Note that we have a catch-22 situation where we need to be able to access
@@ -704,6 +797,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
engine_mask_apply_compute_fuses(gt);
+ engine_mask_apply_copy_fuses(gt);
return info->engine_mask;
}
@@ -1282,10 +1376,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
/*
- * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+ * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (GRAPHICS_VER(engine->i915) == 12)
+ if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
@@ -1308,6 +1402,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
return -ENODEV;
ENGINE_TRACE(engine, "\n");
+ /*
+ * TODO: Find out why occasionally stopping the CS times out. Seen
+ * especially with gem_eio tests.
+ *
+ * Occasionally trying to stop the cs times out, but does not adversely
+ * affect functionality. The timeout is set as a config parameter that
+ * defaults to 100ms. In most cases the follow up operation is to wait
+ * for pending MI_FORCE_WAKES. The assumption is that this timeout is
+ * sufficient for any pending MI_FORCEWAKEs to complete. Once root
+ * caused, the caller must check and handle the return from this
+ * function.
+ */
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
"timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
@@ -1334,12 +1440,76 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
}
-static u32
-read_subslice_reg(const struct intel_engine_cs *engine,
- int slice, int subslice, i915_reg_t reg)
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+ static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+ [RCS0] = MSG_IDLE_CS,
+ [BCS0] = MSG_IDLE_BCS,
+ [VCS0] = MSG_IDLE_VCS0,
+ [VCS1] = MSG_IDLE_VCS1,
+ [VCS2] = MSG_IDLE_VCS2,
+ [VCS3] = MSG_IDLE_VCS3,
+ [VCS4] = MSG_IDLE_VCS4,
+ [VCS5] = MSG_IDLE_VCS5,
+ [VCS6] = MSG_IDLE_VCS6,
+ [VCS7] = MSG_IDLE_VCS7,
+ [VECS0] = MSG_IDLE_VECS0,
+ [VECS1] = MSG_IDLE_VECS1,
+ [VECS2] = MSG_IDLE_VECS2,
+ [VECS3] = MSG_IDLE_VECS3,
+ [CCS0] = MSG_IDLE_CS,
+ [CCS1] = MSG_IDLE_CS,
+ [CCS2] = MSG_IDLE_CS,
+ [CCS3] = MSG_IDLE_CS,
+ };
+ u32 val;
+
+ if (!_reg[engine->id].reg) {
+ drm_err(&engine->i915->drm,
+ "MSG IDLE undefined for engine id %u\n", engine->id);
+ return 0;
+ }
+
+ val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+ /* bits[29:25] & bits[13:9] >> shift */
+ return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
{
- return intel_uncore_read_with_mcr_steering(engine->uncore, reg,
- slice, subslice);
+ int ret;
+
+ /* Ensure GPM receives fw up/down after CS is stopped */
+ udelay(1);
+
+ /* Wait for forcewake request to complete in GPM */
+ ret = __intel_wait_for_register_fw(gt->uncore,
+ GEN9_PWRGT_DOMAIN_STATUS,
+ fw_mask, fw_mask, 5000, 0, NULL);
+
+ /* Ensure CS receives fw ack from GPM */
+ udelay(1);
+
+ if (ret)
+ GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
+}
+
+/*
+ * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
+ * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
+ * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
+ * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
+ * are concerned only with the gt reset here, we use a logical OR of pending
+ * forcewakeups from all reset domains and then wait for them to complete by
+ * querying PWRGT_DOMAIN_STATUS.
+ */
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
+{
+ u32 fw_pending = __cs_pending_mi_force_wakes(engine);
+
+ if (fw_pending)
+ __gpm_wait_for_fw_complete(engine->gt, fw_pending);
}
/* NB: please notice the memset */
@@ -1375,28 +1545,33 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
instdone->sampler[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_SAMPLER_INSTDONE);
+ intel_gt_mcr_read(engine->gt,
+ GEN7_SAMPLER_INSTDONE,
+ slice, subslice);
instdone->row[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_ROW_INSTDONE);
+ intel_gt_mcr_read(engine->gt,
+ GEN7_ROW_INSTDONE,
+ slice, subslice);
}
} else {
for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
instdone->sampler[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_SAMPLER_INSTDONE);
+ intel_gt_mcr_read(engine->gt,
+ GEN7_SAMPLER_INSTDONE,
+ slice, subslice);
instdone->row[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_ROW_INSTDONE);
+ intel_gt_mcr_read(engine->gt,
+ GEN7_ROW_INSTDONE,
+ slice, subslice);
}
}
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice)
instdone->geom_svg[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- XEHPG_INSTDONE_GEOM_SVG);
+ intel_gt_mcr_read(engine->gt,
+ XEHPG_INSTDONE_GEOM_SVG,
+ slice, subslice);
}
} else if (GRAPHICS_VER(i915) >= 7) {
instdone->instdone =