From 192bb40f030a41ca95c5cff8c9340b725bc7ba8b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 17 Nov 2022 09:33:58 -0800 Subject: drm/i915/gt: Manage uncore->lock while waiting on MCR register The GT MCR code currently relies on uncore->lock to avoid race conditions on the steering control register during MCR operations. The *_fw() versions of MCR operations expect the caller to already hold uncore->lock, while the non-fw variants manage the lock internally. However the sole callsite of intel_gt_mcr_wait_for_reg_fw() does not currently obtain the forcewake lock, allowing a potential race condition (and triggering an assertion on lockdep builds). Furthermore, since 'wait for register value' requests may not return immediately, it is undesirable to hold a fundamental lock like uncore->lock for the entire wait and block all other MMIO for the duration; rather the lock is only needed around the MCR read operations and can be released during the delays. Convert intel_gt_mcr_wait_for_reg_fw() to a non-fw variant that will manage uncore->lock internally. This does have the side effect of causing an unnecessary lookup in the forcewake table on each read operation, but since the caller is still holding the relevant forcewake domain, this will ultimately just incremenent the reference count and won't actually cause any additional MMIO traffic. In the future we plan to switch to a dedicated MCR lock to protect the steering critical section rather than using the overloaded and high-traffic uncore->lock; on MTL and beyond the new lock can be implemented on top of the hardware-provided synchonization mechanism for steering. Fixes: 3068bec83eea ("drm/i915/gt: Add intel_gt_mcr_wait_for_reg_fw()") Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20221117173358.1980230-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++++++++++-------- drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 12 ++++++------ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 04908f1d6564..ca3210dd7b22 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1034,9 +1034,9 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb) { if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0, - TLB_INVAL_TIMEOUT_US, - TLB_INVAL_TIMEOUT_MS); + return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); else return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0, TLB_INVAL_TIMEOUT_US, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 830edffe88cc..d9a8ff9e5e57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -730,17 +730,19 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, * * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms) +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms) { - u32 reg_value = 0; -#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value) int ret; + lockdep_assert_not_held(>->uncore->lock); + +#define done ((intel_gt_mcr_read_any(gt, reg) & mask) == value) + /* Catch any overuse of this function */ might_sleep_if(slow_timeout_ms); GEM_BUG_ON(fast_timeout_us > 20000); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 3fb0502bff22..ae93b20e1c17 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -37,12 +37,12 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance); -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, - i915_mcr_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms); +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms); /* * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice -- cgit From 71feb6f901ecba962177a0a029dc545c91a4b396 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Nov 2022 11:52:49 +0000 Subject: drm/i915: Fix workarounds on Gen2-3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 3653727560d0 ("drm/i915: Simplify internal helper function signature") I broke the old platforms by not noticing engine workaround init does not initialize the list on old platforms. Fix it by always initializing which already does the right thing by mostly not doing anything if there aren't any workarounds on the list. Signed-off-by: Tvrtko Ursulin Fixes: 3653727560d0 ("drm/i915: Simplify internal helper function signature") Reported-by: Ville Syrjälä Cc: Mika Kuoppala Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221118115249.2683946-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c5f1edfeece2..e39ce0bf91b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3010,7 +3010,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) + if (GRAPHICS_VER(engine->i915) < 4) return; engine_fake_wa_init(engine, wal); @@ -3035,9 +3035,6 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (GRAPHICS_VER(engine->i915) < 4) - return; - wa_init_start(wal, engine->gt, "engine", engine->name); engine_init_workarounds(engine, wal); wa_init_finish(wal); -- cgit From 67b5655b2e717b8b681f8acd9cbddd2d687d5d4e Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Sat, 19 Nov 2022 00:03:54 +0530 Subject: drm/i915/mtl: Enable Idle Messaging for GSC CS By defaut idle messaging is disabled for GSC CS so to unblock RC6 entry on media tile idle messaging need to be enabled. v2: - Fix review comments (Vinay) - Set GSC idle hysteresis as per spec (Badal) v3: - Fix review comments (Rodrigo) Bspec: 71496 Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Signed-off-by: Badal Nilawar Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221118183354.1047829-1-badal.nilawar@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b0a4a2dbe3ee..e971b153fda9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -15,6 +15,22 @@ #include "intel_rc6.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_gt_regs.h" + +static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_METEORLAKE(i915) && engine->id == GSC0) { + intel_uncore_write(engine->gt->uncore, + RC_PSMI_CTRL_GSCCS, + _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); + /* hysteresis 0xA=5us as recommended in spec*/ + intel_uncore_write(engine->gt->uncore, + PWRCTX_MAXCNT_GSCCS, + 0xA); + } +} static void dbg_poison_ce(struct intel_context *ce) { @@ -275,6 +291,8 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); intel_engine_init_heartbeat(engine); + + intel_gsc_idle_msg_enable(engine); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index bd3848bd7b6a..3658e12db74b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -916,6 +916,10 @@ #define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) #define MSG_IDLE_FW_SHIFT 9 +#define RC_PSMI_CTRL_GSCCS _MMIO(0x11a050) +#define IDLE_MSG_DISABLE REG_BIT(0) +#define PWRCTX_MAXCNT_GSCCS _MMIO(0x11a054) + #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) -- cgit From e746f84b8e813816951b63485134927ed6763a1b Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 10 Nov 2022 17:19:12 +0000 Subject: i915/uncore: Acquire fw before loop in intel_uncore_read64_2x32 PMU reads the GT timestamp as a 2x32 mmio read and since upper and lower 32 bit registers are read in a loop, there is a latency involved between getting the GT timestamp and the CPU timestamp. As part of the resolution, refactor intel_uncore_read64_2x32 to acquire forcewake and uncore lock prior to reading upper and lower regs. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Tvrtko Ursulin Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221110171913.670286-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/intel_uncore.h | 44 +++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 5449146a0624..e9e38490815d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -382,20 +382,6 @@ __uncore_write(write_notrace, 32, l, false) */ __uncore_read(read64, 64, q, true) -static inline u64 -intel_uncore_read64_2x32(struct intel_uncore *uncore, - i915_reg_t lower_reg, i915_reg_t upper_reg) -{ - u32 upper, lower, old_upper, loop = 0; - upper = intel_uncore_read(uncore, upper_reg); - do { - old_upper = upper; - lower = intel_uncore_read(uncore, lower_reg); - upper = intel_uncore_read(uncore, upper_reg); - } while (upper != old_upper && loop++ < 2); - return (u64)upper << 32 | lower; -} - #define intel_uncore_posting_read(...) ((void)intel_uncore_read_notrace(__VA_ARGS__)) #define intel_uncore_posting_read16(...) ((void)intel_uncore_read16_notrace(__VA_ARGS__)) @@ -455,6 +441,36 @@ static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, intel_uncore_write_fw(uncore, reg, val); } +static inline u64 +intel_uncore_read64_2x32(struct intel_uncore *uncore, + i915_reg_t lower_reg, i915_reg_t upper_reg) +{ + u32 upper, lower, old_upper, loop = 0; + enum forcewake_domains fw_domains; + unsigned long flags; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, lower_reg, + FW_REG_READ); + + fw_domains |= intel_uncore_forcewake_for_reg(uncore, upper_reg, + FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + upper = intel_uncore_read_fw(uncore, upper_reg); + do { + old_upper = upper; + lower = intel_uncore_read_fw(uncore, lower_reg); + upper = intel_uncore_read_fw(uncore, upper_reg); + } while (upper != old_upper && loop++ < 2); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return (u64)upper << 32 | lower; +} + static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, i915_reg_t reg, u32 val, u32 mask, u32 expected_val) -- cgit From 529d95a6067b74da9d4d5d9ab3009b35c98c5fce Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 10 Nov 2022 17:19:13 +0000 Subject: drm/i915/selftest: Bump up sample period for busy stats selftest Engine busyness samples around a 10ms period is failing with busyness ranging approx. from 87% to 115% as shown below. The expected range is +/- 5% of the sample period. Fail 10% of the time. rcs0: reported 11716042ns [91%] busyness while spinning [for 12805719ns] When determining busyness of active engine, the GuC based engine busyness implementation relies on a 64 bit timestamp register read. The latency incurred by this register read causes the failure. On DG1, when the test fails, the observed latencies range from 900us - 1.5ms. Optimizing the 2x32 read by acquiring the lock and forcewake prior to all reg reads reduces the rate of failure to around 2%, but does not eliminate it. In order to make the selftest more robust and always account for such latencies, increase the sample period to 100 ms. This eliminates the issue as seen in a 1000 runs. v2: (Ashutosh) - Add error to commit msg - Include gitlab bug - Update commit for inclusion of 2x32 optimized read Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4418 Signed-off-by: Umesh Nerlige Ramappa Acked-by: Tvrtko Ursulin Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221110171913.670286-3-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 0dcb3ed44a73..87c94314cf67 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -317,7 +317,7 @@ static int live_engine_busy_stats(void *arg) ENGINE_TRACE(engine, "measuring busy time\n"); preempt_disable(); de = intel_engine_get_busy_time(engine, &t[0]); - mdelay(10); + mdelay(100); de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); preempt_enable(); dt = ktime_sub(t[1], t[0]); -- cgit From ba51925da4ef763d6a3aa03b15241a85cdb76865 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 21 Nov 2022 11:24:49 +0200 Subject: drm/i915/gsc: Only initialize GSC in tile 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For multi-tile setups the GSC operational only on the tile 0. Skip GSC auxiliary device creation for all other tiles in GSC device init code. Initialize basic GSC fields and use the same path as HECI1 (HECI_PXP) device disable. Cc: Tomas Winkler Cc: Vitaly Lubart Signed-off-by: José Roberto de Souza Signed-off-by: Alexander Usyskin Acked-by: Tomas Winkler Reviewed-by: Tomas Winkler Link: https://patchwork.freedesktop.org/patch/msgid/20221121092449.328674-1-alexander.usyskin@intel.com --- drivers/gpu/drm/i915/gt/intel_gsc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 976fdf27e790..bcc3605158db 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -174,6 +174,14 @@ static void gsc_init_one(struct drm_i915_private *i915, struct intel_gsc *gsc, intf->irq = -1; intf->id = intf_id; + /* + * On the multi-tile setups the GSC is functional on the first tile only + */ + if (gsc_to_gt(gsc)->info.id != 0) { + drm_dbg(&i915->drm, "Not initializing gsc for remote tiles\n"); + return; + } + if (intf_id == 0 && !HAS_HECI_PXP(i915)) return; -- cgit From 8b7f7a9b10b704ba7d73199ff0f01354e0bad7a5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Nov 2022 16:16:16 +0200 Subject: drm/i915/guc: make default_lists const data The default_lists array should be in rodata. Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state capture.") Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Lucas De Marchi Signed-off-by: Jani Nikula Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20221122141616.3469214-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 1d49a7ec0bd8..1c1b85073b4b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -170,7 +170,7 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = { } /* List of lists */ -static struct __guc_mmio_reg_descr_group default_lists[] = { +static const struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), -- cgit From 02224691cb0f367acb476911bddfa21e2d596ca5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 10 Nov 2022 16:56:51 -0800 Subject: drm/i915/huc: fix leak of debug object in huc load fence on driver unload The fence is always initialized in huc_init_early, but the cleanup in huc_fini is only being run if HuC is enabled. This causes a leaking of the debug object when HuC is disabled/not supported, which can in turn trigger a warning if we try to register a new debug offset at the same address on driver reload. To fix the issue, make sure to always run the cleanup code. Reported-by: Tvrtko Ursulin Reported-by: Brian Norris Fixes: 27536e03271d ("drm/i915/huc: track delayed HuC load with a fence") Signed-off-by: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Brian Norris Cc: Alan Previn Cc: John Harrison Tested-by: Brian Norris Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221111005651.4160369-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 12 +++++++----- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index be855811d85d..0976e9101346 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -329,13 +329,15 @@ out: void intel_huc_fini(struct intel_huc *huc) { - if (!intel_uc_fw_is_loadable(&huc->fw)) - return; - + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ delayed_huc_load_complete(huc); - i915_sw_fence_fini(&huc->delayed_load.fence); - intel_uc_fw_fini(&huc->fw); + + if (intel_uc_fw_is_loadable(&huc->fw)) + intel_uc_fw_fini(&huc->fw); } void intel_huc_suspend(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 1d28286e6f06..2a508b137e90 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -722,6 +722,7 @@ int intel_uc_runtime_resume(struct intel_uc *uc) static const struct intel_uc_ops uc_ops_off = { .init_hw = __uc_check_hw, + .fini = __uc_fini, /* to clean-up the init_early initialization */ }; static const struct intel_uc_ops uc_ops_on = { -- cgit From 9b23059b29238204b1769589d665f44bd9b31255 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 22 Nov 2022 15:33:28 -0800 Subject: drm/i915/uc: Fix table order verification to check all FW types It was noticed that the table order verification step was only being run once rather than once per firmware type. Fix that. Note that the long term plan is to convert this code to be a mock selftest. It is already only compiled in when selftests are enabled. And the work involved in the conversion was estimated to be non-trivial. So that conversion is currently low on the priority list. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221122233328.854217-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 0c80ba51a4bd..31613c7e0838 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -238,7 +238,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, }; - static bool verified; + static bool verified[INTEL_UC_FW_NUM_TYPES]; const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; @@ -291,8 +291,8 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) } /* make sure the list is ordered as expected */ - if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified) { - verified = true; + if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified[uc_fw->type]) { + verified[uc_fw->type] = true; for (i = 1; i < fw_count; i++) { /* Next platform is good: */ @@ -343,7 +343,8 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) continue; bad: - drm_err(&i915->drm, "Invalid FW blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + intel_uc_fw_type_repr(uc_fw->type), intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev, fw_blobs[i - 1].blob.legacy ? "L" : "v", fw_blobs[i - 1].blob.major, -- cgit From f235dbd5b768e238d365fd05d92de5a32abc1c1f Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:54 +0100 Subject: drm/i915: Fix negative value passed as remaining time Commit b97060a99b01 ("drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC") extended the API of intel_gt_retire_requests_timeout() with an extra argument 'remaining_timeout', intended for passing back unconsumed portion of requested timeout when 0 (success) is returned. However, when request retirement happens to succeed despite an error returned by a call to dma_fence_wait_timeout(), that error code (a negative value) is passed back instead of remaining time. If we then pass that negative value forward as requested timeout to intel_uc_wait_for_idle(), an explicit BUG will be triggered. If request retirement succeeds but an error code is passed back via remaininig_timeout, we may have no clue on how much of the initial timeout might have been left for spending it on waiting for GuC to become idle. OTOH, since all pending requests have been successfully retired, that error code has been already ignored by intel_gt_retire_requests_timeout(), then we shouldn't fail. Assume no more time has been left on error and pass 0 timeout value to intel_uc_wait_for_idle() to give it a chance to return success if GuC is already idle. v3: Don't fail on any error passed back via remaining_timeout. v2: Fix the issue on the caller side, not the provider. Fixes: b97060a99b01 ("drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org # v5.15+ Reviewed-by: Andrzej Hajda Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ca3210dd7b22..600c79bb2ffc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -676,8 +676,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) -- cgit From f301a29f143760ce8d3d6b6a8436d45d3448cde6 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:55 +0100 Subject: drm/i915: Never return 0 if not all requests retired Users of intel_gt_retire_requests_timeout() expect 0 return value on success. However, we have no protection from passing back 0 potentially returned by a call to dma_fence_wait_timeout() when it succedes right after its timeout has expired. Replace 0 with -ETIME before potentially using the timeout value as return code, so -ETIME is returned if there are still some requests not retired after timeout, 0 otherwise. v3: Use conditional expression, more compact but also better reflecting intention standing behind the change. v2: Move the added lines down so flush_submission() is not affected. Fixes: f33a8a51602c ("drm/i915: Merge wait_for_timelines with retire_request") Signed-off-by: Janusz Krzysztofik Reviewed-by: Andrzej Hajda Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-3-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) -- cgit From 468a4e630c7da8cf586f85cc498d6097aed1ab4b Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 23 Nov 2022 10:36:47 -0800 Subject: drm/i915/dg2: Introduce Wa_18018764978 Wa_18018764978 applies to specific steppings of DG2 (G10 C0+, G11 and G12 A0+). Clean up style in function at the same time. Bspec: 66622 Signed-off-by: Matt Atwood Reviewed-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221123183648.407058-1-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 3658e12db74b..bdeb31a40623 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -456,6 +456,9 @@ #define GEN8_L3CNTLREG _MMIO(0x7034) #define GEN8_ERRDETBCTRL (1 << 9) +#define PSS_MODE2 _MMIO(0x703c) +#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) + #define GEN7_SC_INSTDONE _MMIO(0x7100) #define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e39ce0bf91b3..33e5120c1ad5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -770,9 +770,14 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_14014947963:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) + IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + /* Wa_18018764978:dg2 */ + if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || + IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) + wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + /* Wa_15010599737:dg2 */ wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } -- cgit From 900a80c5836587d95db32742f66e1f34f7b40fcb Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 23 Nov 2022 10:36:48 -0800 Subject: drm/i915/dg2: Introduce Wa_18019271663 Wa_18019271663 applies to all DG2 steppings and skus. Bspec: 66622 Signed-off-by: Matt Atwood Reviewed-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221123183648.407058-2-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 7 ++++--- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index bdeb31a40623..6caf2d8deb98 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -428,9 +428,10 @@ #define RC_OP_FLUSH_ENABLE (1 << 0) #define HIZ_RAW_STALL_OPT_DISABLE (1 << 2) #define CACHE_MODE_1 _MMIO(0x7004) /* IVB+ */ -#define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1 << 6) -#define GEN8_4x4_STC_OPTIMIZATION_DISABLE (1 << 6) -#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE (1 << 1) +#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) +#define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE REG_BIT(6) +#define GEN8_4x4_STC_OPTIMIZATION_DISABLE REG_BIT(6) +#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE REG_BIT(1) #define GEN7_GT_MODE _MMIO(0x7008) #define GEN9_IZ_HASHING_MASK(slice) (0x3 << ((slice) * 2)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 33e5120c1ad5..d5229039af87 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -780,6 +780,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_15010599737:dg2 */ wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + + /* Wa_18019271663:dg2 */ + wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, -- cgit From 0f85715804e72cdcf489a89930ec64437e11ca8c Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Tue, 22 Nov 2022 12:31:26 +0530 Subject: drm/i915/mtl: Media GT and Render GT share common GGTT On XE_LPM+ platforms the media engines are carved out into a separate GT but have a common GGTMMADR address range which essentially makes the GGTT address space to be shared between media and render GT. As a result any updates in GGTT shall invalidate TLB of GTs sharing it and similarly any operation on GGTT requiring an action on a GT will have to involve all GTs sharing it. setup_private_pat was being done on a per GGTT based as that doesn't touch any GGTT structures moved it to per GT based. BSPEC: 63834 v2: 1. Add details to commit msg 2. includes fix for failure to add item to ggtt->gt_list, as suggested by Lucas 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within it. 4. setup_private_pat moved out of intel_gt_tiles_init v3: 1. Move out for_each_gt from i915_driver.c (Jani Nikula) v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list (Matt Roper) Cc: Matt Roper Signed-off-by: Aravind Iddamsetty Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20221122070126.4813-1-aravind.iddamsetty@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 54 +++++++++++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_gt.c | 13 ++++++-- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ drivers/gpu/drm/i915/gt/intel_gtt.h | 4 +++ drivers/gpu/drm/i915/i915_driver.c | 12 ++++--- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_vma.c | 5 ++- drivers/gpu/drm/i915/selftests/i915_gem.c | 2 ++ 9 files changed, 111 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8145851ad23d..7644738b9cdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -196,10 +197,13 @@ retry: void i915_ggtt_suspend(struct i915_ggtt *ggtt) { + struct intel_gt *gt; + i915_ggtt_suspend_vm(&ggtt->vm); ggtt->invalidate(ggtt); - intel_gt_check_and_clear_faults(ggtt->vm.gt); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_check_and_clear_faults(gt); } void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = ggtt->vm.gt->uncore; struct drm_i915_private *i915 = ggtt->vm.i915; gen8_ggtt_invalidate(ggtt); - if (GRAPHICS_VER(i915) >= 12) - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, - GEN12_GUC_TLB_INV_CR_INVALIDATE); - else - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); + if (GRAPHICS_VER(i915) >= 12) { + struct intel_gt *gt; + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_uncore_write_fw(gt->uncore, + GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + } else { + intel_uncore_write_fw(ggtt->vm.gt->uncore, + GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } } u64 gen8_ggtt_pte_encode(dma_addr_t addr, @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - setup_private_pat(ggtt->vm.gt); - return ggtt_probe_common(ggtt, size); } @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) */ int i915_ggtt_probe_hw(struct drm_i915_private *i915) { - int ret; + struct intel_gt *gt; + int ret, i; + + for_each_gt(gt, i915, i) { + ret = intel_gt_assign_ggtt(gt); + if (ret) + return ret; + } ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); if (ret) @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) return 0; } +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt; + + ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); + if (!ggtt) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ggtt->gt_list); + + return ggtt; +} + int i915_ggtt_enable_hw(struct drm_i915_private *i915) { if (GRAPHICS_VER(i915) < 6) @@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) void i915_ggtt_resume(struct i915_ggtt *ggtt) { + struct intel_gt *gt; bool flush; - intel_gt_check_and_clear_faults(ggtt->vm.gt); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_check_and_clear_faults(gt); flush = i915_ggtt_resume_vm(&ggtt->vm); @@ -1307,9 +1336,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) if (flush) wbinvd_on_all_cpus(); - if (GRAPHICS_VER(ggtt->vm.i915) >= 8) - setup_private_pat(ggtt->vm.gt); - intel_ggtt_restore_fences(ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 600c79bb2ffc..9c45afdc6e3d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -109,9 +109,18 @@ static int intel_gt_probe_lmem(struct intel_gt *gt) int intel_gt_assign_ggtt(struct intel_gt *gt) { - gt->ggtt = drmm_kzalloc(>->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL); + /* Media GT shares primary GT's GGTT */ + if (gt->type == GT_MEDIA) { + gt->ggtt = to_gt(gt->i915)->ggtt; + } else { + gt->ggtt = i915_ggtt_create(gt->i915); + if (IS_ERR(gt->ggtt)) + return PTR_ERR(gt->ggtt); + } - return gt->ggtt ? 0 : -ENOMEM; + list_add_tail(>->ggtt_link, >->ggtt->gt_list); + + return 0; } int intel_gt_init_mmio(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index c1d9cd255e06..8d915640914b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -277,6 +277,9 @@ struct intel_gt { struct kobject *sysfs_defaults; struct i915_perf_gt perf; + + /** link: &ggtt.gt_list */ + struct list_head ggtt_link; }; struct intel_gt_definition { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 4d75ba4bb41d..d1900fec6cd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -390,6 +390,9 @@ struct i915_ggtt { struct mutex error_mutex; struct drm_mm_node error_capture; struct drm_mm_node uc_fw; + + /** List of GTs mapping this GGTT */ + struct list_head gt_list; }; struct i915_ppgtt { @@ -584,6 +587,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); int i915_init_ggtt(struct drm_i915_private *i915); void i915_ggtt_driver_release(struct drm_i915_private *i915); void i915_ggtt_driver_late_release(struct drm_i915_private *i915); +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915); static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) { diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 69103ae37779..4e1bb3c23c63 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -612,10 +612,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) i915_perf_init(dev_priv); - ret = intel_gt_assign_ggtt(to_gt(dev_priv)); - if (ret) - goto err_perf; - ret = i915_ggtt_probe_hw(dev_priv); if (ret) goto err_perf; @@ -1316,7 +1312,8 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, static int i915_drm_resume(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int ret; + struct intel_gt *gt; + int ret, i; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); @@ -1331,6 +1328,11 @@ static int i915_drm_resume(struct drm_device *dev) drm_err(&dev_priv->drm, "failed to re-enable GGTT\n"); i915_ggtt_resume(to_gt(dev_priv)->ggtt); + + for_each_gt(gt, dev_priv, i) + if (GRAPHICS_VER(gt->i915) >= 8) + setup_private_pat(gt); + /* Must be called after GGTT is resumed. */ intel_dpt_resume(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8468ca9885fd..086c4702e1bf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1143,6 +1143,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) { intel_uc_fetch_firmwares(>->uc); intel_wopcm_init(>->wopcm); + if (GRAPHICS_VER(dev_priv) >= 8) + setup_private_pat(gt); } ret = i915_init_ggtt(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f025ee4fa526..4cfe36b0366b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -43,16 +43,25 @@ static bool dying_vma(struct i915_vma *vma) return !kref_read(&vma->obj->base.refcount); } -static int ggtt_flush(struct intel_gt *gt) +static int ggtt_flush(struct i915_address_space *vm) { - /* - * Not everything in the GGTT is tracked via vma (otherwise we - * could evict as required with minimal stalling) so we are forced - * to idle the GPU and explicitly retire outstanding requests in - * the hopes that we can then remove contexts and the like only - * bound by their active reference. - */ - return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct intel_gt *gt; + int ret = 0; + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { + /* + * Not everything in the GGTT is tracked via vma (otherwise we + * could evict as required with minimal stalling) so we are forced + * to idle the GPU and explicitly retire outstanding requests in + * the hopes that we can then remove contexts and the like only + * bound by their active reference. + */ + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; + } + return ret; } static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) @@ -149,6 +158,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct drm_mm_node *node; enum drm_mm_insert_mode mode; struct i915_vma *active; + struct intel_gt *gt; int ret; lockdep_assert_held(&vm->mutex); @@ -174,7 +184,14 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); - intel_gt_retire_requests(vm->gt); + if (i915_is_ggtt(vm)) { + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_retire_requests(gt); + } else { + intel_gt_retire_requests(vm->gt); + } search_again: active = NULL; @@ -246,7 +263,7 @@ search_again: if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) return -EBUSY; - ret = ggtt_flush(vm->gt); + ret = ggtt_flush(vm); if (ret) return ret; @@ -332,7 +349,15 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - intel_gt_retire_requests(vm->gt); + if (i915_is_ggtt(vm)) { + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + struct intel_gt *gt; + + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_gt_retire_requests(gt); + } else { + intel_gt_retire_requests(vm->gt); + } if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ @@ -438,7 +463,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * switch otherwise is ineffective. */ if (i915_is_ggtt(vm)) { - ret = ggtt_flush(vm->gt); + ret = ggtt_flush(vm); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 703fee6b5f75..726705b10637 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1544,6 +1544,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u32 align, unsigned int flags) { struct i915_address_space *vm = vma->vm; + struct intel_gt *gt; + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); int err; do { @@ -1559,7 +1561,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, } /* Unlike i915_vma_pin, we don't take no for an answer! */ - flush_idle_contexts(vm->gt); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + flush_idle_contexts(gt); if (mutex_lock_interruptible(&vm->mutex) == 0) { /* * We pass NULL ww here, as we don't want to unbind diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index e5dd82e7e480..2535b9684bd1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -127,6 +127,8 @@ static void igt_pm_resume(struct drm_i915_private *i915) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { i915_ggtt_resume(to_gt(i915)->ggtt); + if (GRAPHICS_VER(i915) >= 8) + setup_private_pat(to_gt(i915)); i915_gem_resume(i915); } } -- cgit From 14347a9c889fbdbae81e500f6c6e313f5d8e5271 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 23 Nov 2022 15:54:17 -0800 Subject: drm/i915/huc: always init the delayed load fence The fence is only tracking if the HuC load is in progress or not and doesn't distinguish between already loaded, not supported or disabled, so we can always initialize it to completed, no matter the actual support. We already do that for most platforms, but we skip it on GTs that lack VCS engines (e.g. MTL root GT), so fix that. Note that the cleanup is already unconditional. While at it, move the init/fini to helper functions. Fixes: 02224691cb0f ("drm/i915/huc: fix leak of debug object in huc load fence on driver unload") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221123235417.1475709-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 47 ++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 0976e9101346..410905da8e97 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -211,6 +211,30 @@ void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *b huc->delayed_load.nb.notifier_call = NULL; } +static void delayed_huc_load_init(struct intel_huc *huc) +{ + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a delayed HuC load in progress. + */ + i915_sw_fence_init(&huc->delayed_load.fence, + sw_fence_dummy_notify); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + huc->delayed_load.timer.function = huc_delayed_load_timer_callback; +} + +static void delayed_huc_load_fini(struct intel_huc *huc) +{ + /* + * the fence is initialized in init_early, so we need to clean it up + * even if HuC loading is off. + */ + delayed_huc_load_complete(huc); + i915_sw_fence_fini(&huc->delayed_load.fence); +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; @@ -241,6 +265,15 @@ void intel_huc_init_early(struct intel_huc *huc) intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); + /* + * we always init the fence as already completed, even if HuC is not + * supported. This way we don't have to distinguish between HuC not + * supported/disabled or already loaded, and can focus on if the load + * is currently in progress (fence not complete) or not, which is what + * we care about for stalling userspace submissions. + */ + delayed_huc_load_init(huc); + if (!vcs_supported(gt)) { intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); return; @@ -255,17 +288,6 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status.mask = HUC_FW_VERIFIED; huc->status.value = HUC_FW_VERIFIED; } - - /* - * Initialize fence to be complete as this is expected to be complete - * unless there is a delayed HuC reload in progress. - */ - i915_sw_fence_init(&huc->delayed_load.fence, - sw_fence_dummy_notify); - i915_sw_fence_commit(&huc->delayed_load.fence); - - hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - huc->delayed_load.timer.function = huc_delayed_load_timer_callback; } #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") @@ -333,8 +355,7 @@ void intel_huc_fini(struct intel_huc *huc) * the fence is initialized in init_early, so we need to clean it up * even if HuC loading is off. */ - delayed_huc_load_complete(huc); - i915_sw_fence_fini(&huc->delayed_load.fence); + delayed_huc_load_fini(huc); if (intel_uc_fw_is_loadable(&huc->fw)) intel_uc_fw_fini(&huc->fw); -- cgit From 03b713d029bd17a1ed426590609af79843db95e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:10 -0800 Subject: drm/i915/gt: Correct kerneldoc for intel_gt_mcr_wait_for_reg() The kerneldoc function name was not updated when this function was converted to a non-fw form. Fixes: 192bb40f030a ("drm/i915/gt: Manage uncore->lock while waiting on MCR register") Reported-by: kernel test robot Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index d9a8ff9e5e57..ea86c1ab5dc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -702,7 +702,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, } /** - * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state + * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state * @gt: GT structure * @reg: the register to read * @mask: mask to apply to register value -- cgit From 8d9f7d25d50ba55f6b3463d0b8085f62efc39ec4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:11 -0800 Subject: drm/i915/gt: Pass gt rather than uncore to lowest-level reads/writes Passing the GT rather than uncore to the lowest level MCR read and write functions will make it easier to introduce dedicated MCR locking in a following patch. Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index ea86c1ab5dc5..f4484bb18ec9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -221,7 +221,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) /* * rw_with_mcr_steering_fw - Access a register with specific MCR steering - * @uncore: pointer to struct intel_uncore + * @gt: GT to read register from * @reg: register being accessed * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access * @group: group number (documented as "sliceid" on older platforms) @@ -232,10 +232,11 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) * * Caller needs to make sure the relevant forcewake wells are up. */ -static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, +static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { + struct intel_uncore *uncore = gt->uncore; u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; lockdep_assert_held(&uncore->lock); @@ -308,11 +309,12 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, return val; } -static u32 rw_with_mcr_steering(struct intel_uncore *uncore, +static u32 rw_with_mcr_steering(struct intel_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { + struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw_domains; u32 val; @@ -325,7 +327,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore, spin_lock_irq(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); - val = rw_with_mcr_steering_fw(uncore, reg, rw_flag, group, instance, value); + val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value); intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irq(&uncore->lock); @@ -347,7 +349,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, i915_mcr_reg_t reg, int group, int instance) { - return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0); + return rw_with_mcr_steering(gt, reg, FW_REG_READ, group, instance, 0); } /** @@ -364,7 +366,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value, int group, int instance) { - rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value); + rw_with_mcr_steering(gt, reg, FW_REG_WRITE, group, instance, value); } /** @@ -588,7 +590,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) for (type = 0; type < NUM_STEERING_TYPES; type++) { if (reg_needs_read_steering(gt, reg, type)) { get_nonterminated_steering(gt, type, &group, &instance); - return rw_with_mcr_steering_fw(gt->uncore, reg, + return rw_with_mcr_steering_fw(gt, reg, FW_REG_READ, group, instance, 0); } @@ -615,7 +617,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg) for (type = 0; type < NUM_STEERING_TYPES; type++) { if (reg_needs_read_steering(gt, reg, type)) { get_nonterminated_steering(gt, type, &group, &instance); - return rw_with_mcr_steering(gt->uncore, reg, + return rw_with_mcr_steering(gt, reg, FW_REG_READ, group, instance, 0); } -- cgit From 4186e2185b4ffc9ce652566d4a4f249484841ff4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:12 -0800 Subject: drm/i915/gt: Add dedicated MCR lock We've been overloading uncore->lock to protect access to the MCR steering register. That's not really what uncore->lock is intended for, and it would be better if we didn't need to hold such a high-traffic spinlock for the whole sequence of (apply steering, access MCR register, restore steering). Let's create a dedicated MCR lock to protect the steering control register over this critical section and stop relying on the high-traffic uncore->lock. For now the new lock is a software lock. However some platforms (MTL and beyond) have a hardware-provided locking mechanism that can be used to serialize not only software accesses, but also hardware/firmware accesses as well; support for that hardware level lock will be added in a future patch. v2: - Use irqsave/irqrestore spinlock calls; platforms using execlist submission rather than GuC submission can perform MCR accesses in interrupt context because reset -> errordump happens in a tasklet. Cc: Chris Wilson Cc: Mika Kuoppala Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 7 ++- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 79 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 8 +++ drivers/gpu/drm/i915/gt/intel_mocs.c | 3 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 12 +++-- 6 files changed, 101 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 9c45afdc6e3d..42b9e2bc7477 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1087,6 +1087,7 @@ static void mmio_invalidate_full(struct intel_gt *gt) enum intel_engine_id id; const i915_reg_t *regs; unsigned int num = 0; + unsigned long flags; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { regs = NULL; @@ -1107,7 +1108,8 @@ static void mmio_invalidate_full(struct intel_gt *gt) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); /* serialise invalidate with GT reset */ awake = 0; for_each_engine(engine, gt, id) { @@ -1141,7 +1143,8 @@ static void mmio_invalidate_full(struct intel_gt *gt) IS_ALDERLAKE_P(i915))) intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); - spin_unlock_irq(&uncore->lock); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); for_each_engine_masked(engine, gt, awake, tmp) { struct reg_and_bit rb; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index f4484bb18ec9..aa070ae57f11 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -143,6 +143,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) unsigned long fuse; int i; + spin_lock_init(>->mcr_lock); + /* * An mslice is unavailable only if both the meml3 for the slice is * disabled *and* all of the DSS in the slice (quadrant) are disabled. @@ -228,6 +230,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) * @instance: instance number (documented as "subsliceid" on older platforms) * @value: register value to be written (ignored for read) * + * Context: The caller must hold the MCR lock * Return: 0 for write access. register value for read access. * * Caller needs to make sure the relevant forcewake wells are up. @@ -239,7 +242,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, struct intel_uncore *uncore = gt->uncore; u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; - lockdep_assert_held(&uncore->lock); + lockdep_assert_held(>->mcr_lock); if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) { /* @@ -316,6 +319,7 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, { struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw_domains; + unsigned long flags; u32 val; fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg), @@ -324,17 +328,59 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, GEN8_MCR_SELECTOR, FW_REG_READ | FW_REG_WRITE); - spin_lock_irq(&uncore->lock); + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value); intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); return val; } +/** + * intel_gt_mcr_lock - Acquire MCR steering lock + * @gt: GT structure + * @flags: storage to save IRQ flags to + * + * Performs locking to protect the steering for the duration of an MCR + * operation. Depending on the platform, this may be a software lock + * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes + * access not only for the driver, but also for external hardware and + * firmware agents). + * + * Context: Takes gt->mcr_lock. uncore->lock should *not* be held when this + * function is called, although it may be acquired after this + * function call. + */ +void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) +{ + unsigned long __flags; + + lockdep_assert_not_held(>->uncore->lock); + + spin_lock_irqsave(>->mcr_lock, __flags); + + *flags = __flags; +} + +/** + * intel_gt_mcr_unlock - Release MCR steering lock + * @gt: GT structure + * @flags: IRQ flags to restore + * + * Releases the lock acquired by intel_gt_mcr_lock(). + * + * Context: Releases gt->mcr_lock + */ +void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags) +{ + spin_unlock_irqrestore(>->mcr_lock, flags); +} + /** * intel_gt_mcr_read - read a specific instance of an MCR register * @gt: GT structure @@ -342,6 +388,8 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, * @group: the MCR group * @instance: the MCR instance * + * Context: Takes and releases gt->mcr_lock + * * Returns the value read from an MCR register after steering toward a specific * group/instance. */ @@ -362,6 +410,8 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, * * Write an MCR register in unicast mode after steering toward a specific * group/instance. + * + * Context: Calls a function that takes and releases gt->mcr_lock */ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value, int group, int instance) @@ -376,10 +426,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 val * @value: value to write * * Write an MCR register in multicast mode to update all instances. + * + * Context: Takes and releases gt->mcr_lock */ void intel_gt_mcr_multicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) { + unsigned long flags; + + intel_gt_mcr_lock(gt, &flags); + /* * Ensure we have multicast behavior, just in case some non-i915 agent * left the hardware in unicast mode. @@ -388,6 +444,8 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value); + + intel_gt_mcr_unlock(gt, flags); } /** @@ -400,9 +458,13 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, * function assumes the caller is already holding any necessary forcewake * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should * be obtained automatically. + * + * Context: The caller must hold gt->mcr_lock. */ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) { + lockdep_assert_held(>->mcr_lock); + /* * Ensure we have multicast behavior, just in case some non-i915 agent * left the hardware in unicast mode. @@ -429,6 +491,8 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u3 * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should * be obtained automatically. * + * Context: Calls functions that take and release gt->mcr_lock + * * Returns the old (unmodified) value read. */ u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg, @@ -580,6 +644,8 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, * domains; use intel_gt_mcr_read_any() in cases where forcewake should be * obtained automatically. * + * Context: The caller must hold gt->mcr_lock. + * * Returns the value from a non-terminated instance of @reg. */ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) @@ -587,6 +653,8 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) int type; u8 group, instance; + lockdep_assert_held(>->mcr_lock); + for (type = 0; type < NUM_STEERING_TYPES; type++) { if (reg_needs_read_steering(gt, reg, type)) { get_nonterminated_steering(gt, type, &group, &instance); @@ -607,6 +675,8 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) * Reads a GT MCR register. The read will be steered to a non-terminated * instance (i.e., one that isn't fused off or powered down by power gating). * + * Context: Calls a function that takes and releases gt->mcr_lock. + * * Returns the value from a non-terminated instance of @reg. */ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg) @@ -730,6 +800,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, * Note that this routine assumes the caller holds forcewake asserted, it is * not suitable for very long waits. * + * Context: Calls a function that takes and releases gt->mcr_lock * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. */ int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, @@ -741,7 +812,7 @@ int intel_gt_mcr_wait_for_reg(struct intel_gt *gt, { int ret; - lockdep_assert_not_held(>->uncore->lock); + lockdep_assert_not_held(>->mcr_lock); #define done ((intel_gt_mcr_read_any(gt, reg) & mask) == value) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index ae93b20e1c17..41684495b7da 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -9,6 +9,8 @@ #include "intel_gt_types.h" void intel_gt_mcr_init(struct intel_gt *gt); +void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags); +void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags); u32 intel_gt_mcr_read(struct intel_gt *gt, i915_mcr_reg_t reg, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 8d915640914b..f519aa4a004a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -233,6 +233,14 @@ struct intel_gt { u8 instanceid; } default_steering; + /** + * @mcr_lock: Protects the MCR steering register + * + * Protects the MCR steering register (e.g., GEN8_MCR_SELECTOR). + * Should be taken before uncore->lock in cases where both are desired. + */ + spinlock_t mcr_lock; + /* * Base of per-tile GTTMMADR where we can derive the MMIO and the GGTT. */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 49fdd509527a..69b489e8dfed 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -613,14 +613,17 @@ static u32 l3cc_combine(u16 low, u16 high) static void init_l3cc_table(struct intel_gt *gt, const struct drm_i915_mocs_table *table) { + unsigned long flags; unsigned int i; u32 l3cc; + intel_gt_mcr_lock(gt, &flags); for_each_l3cc(l3cc, table, i) if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc); else intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc); + intel_gt_mcr_unlock(gt, flags); } void intel_mocs_init_engine(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d5229039af87..cafa7b9145b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1759,7 +1759,8 @@ static void wa_list_apply(const struct i915_wa_list *wal) fw = wal_get_fw_for_rmw(uncore, wal); - spin_lock_irqsave(&uncore->lock, flags); + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { @@ -1788,7 +1789,8 @@ static void wa_list_apply(const struct i915_wa_list *wal) } intel_uncore_forcewake_put__locked(uncore, fw); - spin_unlock_irqrestore(&uncore->lock, flags); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); } void intel_gt_apply_workarounds(struct intel_gt *gt) @@ -1809,7 +1811,8 @@ static bool wa_list_verify(struct intel_gt *gt, fw = wal_get_fw_for_rmw(uncore, wal); - spin_lock_irqsave(&uncore->lock, flags); + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) @@ -1819,7 +1822,8 @@ static bool wa_list_verify(struct intel_gt *gt, wal->name, from); intel_uncore_forcewake_put__locked(uncore, fw); - spin_unlock_irqrestore(&uncore->lock, flags); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); return ok; } -- cgit From b02ba9ed7cbe7a2ca5b790297cd9c4d30fb810fe Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 29 Nov 2022 15:20:29 -0800 Subject: drm/i915/uc: Rationalise delimiters in filename macros The way delimiters (underscores and dots) were added to the UC filenames was different for different types of delimiter. Rationalise them to all be done the same way - implicitly in the concatenation macro rather than explicitly in the file name prefix. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221129232031.3401386-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 31613c7e0838..848304b2f2e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -118,35 +118,35 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, */ #define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \ "i915/" \ - __stringify(prefix_) name_ ".bin" + __stringify(prefix_) "_" name_ ".bin" #define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ "i915/" \ - __stringify(prefix_) name_ \ + __stringify(prefix_) "_" name_ "_" \ __stringify(major_) ".bin" #define __MAKE_UC_FW_PATH_MMP(prefix_, name_, major_, minor_, patch_) \ "i915/" \ - __stringify(prefix_) name_ \ + __stringify(prefix_) "_" name_ "_" \ __stringify(major_) "." \ __stringify(minor_) "." \ __stringify(patch_) ".bin" /* Minor for internal driver use, not part of file name */ #define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_) \ - __MAKE_UC_FW_PATH_MAJOR(prefix_, "_guc_", major_) + __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) #define MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH_MMP(prefix_, "_guc_", major_, minor_, patch_) + __MAKE_UC_FW_PATH_MMP(prefix_, "guc", major_, minor_, patch_) #define MAKE_HUC_FW_PATH_BLANK(prefix_) \ - __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc") + __MAKE_UC_FW_PATH_BLANK(prefix_, "huc") #define MAKE_HUC_FW_PATH_GSC(prefix_) \ - __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc") + __MAKE_UC_FW_PATH_BLANK(prefix_, "huc_gsc") #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_) + __MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_) /* * All blobs need to be declared via MODULE_FIRMWARE(). -- cgit From 92fcd24853dea0aaf6da945dd06107e573840ae8 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 29 Nov 2022 15:20:30 -0800 Subject: drm/i915/uc: More refactoring of UC version numbers As a precursor to a coming change (for adding a GuC submission API version), abstract the UC version number into its own private structure separate to the firmware filename. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221129232031.3401386-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 +-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 76 +++++++++++++++----------------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 15 ++++--- 3 files changed, 48 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 2a508b137e90..4f4b519e12c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -437,9 +437,9 @@ static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw) drm_info(&i915->drm, "%s firmware %s version %u.%u.%u\n", intel_uc_fw_type_repr(fw->type), fw->file_selected.path, - fw->file_selected.major_ver, - fw->file_selected.minor_ver, - fw->file_selected.patch_ver); + fw->file_selected.ver.major, + fw->file_selected.ver.minor, + fw->file_selected.ver.patch); } static int __uc_init_hw(struct intel_uc *uc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 848304b2f2e3..1c6a520cf4cd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -278,8 +278,8 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) uc_fw->file_selected.path = blob->path; uc_fw->file_wanted.path = blob->path; - uc_fw->file_wanted.major_ver = blob->major; - uc_fw->file_wanted.minor_ver = blob->minor; + uc_fw->file_wanted.ver.major = blob->major; + uc_fw->file_wanted.ver.minor = blob->minor; uc_fw->loaded_via_gsc = blob->loaded_via_gsc; found = true; break; @@ -439,28 +439,28 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) uc_fw->user_overridden = user; } else if (i915_inject_probe_error(i915, e)) { /* require next major version */ - uc_fw->file_wanted.major_ver += 1; - uc_fw->file_wanted.minor_ver = 0; + uc_fw->file_wanted.ver.major += 1; + uc_fw->file_wanted.ver.minor = 0; uc_fw->user_overridden = user; } else if (i915_inject_probe_error(i915, e)) { /* require next minor version */ - uc_fw->file_wanted.minor_ver += 1; + uc_fw->file_wanted.ver.minor += 1; uc_fw->user_overridden = user; - } else if (uc_fw->file_wanted.major_ver && + } else if (uc_fw->file_wanted.ver.major && i915_inject_probe_error(i915, e)) { /* require prev major version */ - uc_fw->file_wanted.major_ver -= 1; - uc_fw->file_wanted.minor_ver = 0; + uc_fw->file_wanted.ver.major -= 1; + uc_fw->file_wanted.ver.minor = 0; uc_fw->user_overridden = user; - } else if (uc_fw->file_wanted.minor_ver && + } else if (uc_fw->file_wanted.ver.minor && i915_inject_probe_error(i915, e)) { /* require prev minor version - hey, this should work! */ - uc_fw->file_wanted.minor_ver -= 1; + uc_fw->file_wanted.ver.minor -= 1; uc_fw->user_overridden = user; } else if (user && i915_inject_probe_error(i915, e)) { /* officially unsupported platform */ - uc_fw->file_wanted.major_ver = 0; - uc_fw->file_wanted.minor_ver = 0; + uc_fw->file_wanted.ver.major = 0; + uc_fw->file_wanted.ver.minor = 0; uc_fw->user_overridden = true; } } @@ -472,9 +472,9 @@ static int check_gsc_manifest(const struct firmware *fw, u32 version_hi = dw[HUC_GSC_VERSION_HI_DW]; u32 version_lo = dw[HUC_GSC_VERSION_LO_DW]; - uc_fw->file_selected.major_ver = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi); - uc_fw->file_selected.minor_ver = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi); - uc_fw->file_selected.patch_ver = FIELD_GET(HUC_GSC_PATCH_VER_LO_MASK, version_lo); + uc_fw->file_selected.ver.major = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi); + uc_fw->file_selected.ver.minor = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi); + uc_fw->file_selected.ver.patch = FIELD_GET(HUC_GSC_PATCH_VER_LO_MASK, version_lo); return 0; } @@ -533,11 +533,11 @@ static int check_ccs_header(struct intel_gt *gt, } /* Get version numbers from the CSS header */ - uc_fw->file_selected.major_ver = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + uc_fw->file_selected.ver.major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); - uc_fw->file_selected.minor_ver = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + uc_fw->file_selected.ver.minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); - uc_fw->file_selected.patch_ver = FIELD_GET(CSS_SW_VERSION_UC_PATCH, + uc_fw->file_selected.ver.patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) @@ -622,19 +622,19 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (err) goto fail; - if (uc_fw->file_wanted.major_ver) { + if (uc_fw->file_wanted.ver.major) { /* Check the file's major version was as it claimed */ - if (uc_fw->file_selected.major_ver != uc_fw->file_wanted.major_ver) { + if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) { drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver, - uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver); + uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor, + uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor); if (!intel_uc_fw_is_overridden(uc_fw)) { err = -ENOEXEC; goto fail; } } else { - if (uc_fw->file_selected.minor_ver < uc_fw->file_wanted.minor_ver) + if (uc_fw->file_selected.ver.minor < uc_fw->file_wanted.ver.minor) old_ver = true; } } @@ -647,9 +647,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) "%s firmware %s (%d.%d) is recommended, but only %s (%d.%d) was found\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path, - uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver, + uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor, uc_fw->file_selected.path, - uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); + uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor); drm_info(&i915->drm, "Consider updating your linux-firmware pkg or downloading from %s\n", INTEL_UC_FIRMWARE_URL); @@ -1064,25 +1064,21 @@ void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path); drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); - ver_sel = MAKE_UC_VER(uc_fw->file_selected.major_ver, - uc_fw->file_selected.minor_ver, - uc_fw->file_selected.patch_ver); - ver_want = MAKE_UC_VER(uc_fw->file_wanted.major_ver, - uc_fw->file_wanted.minor_ver, - uc_fw->file_wanted.patch_ver); + ver_sel = MAKE_UC_VER_STRUCT(uc_fw->file_selected.ver); + ver_want = MAKE_UC_VER_STRUCT(uc_fw->file_wanted.ver); if (ver_sel < ver_want) drm_printf(p, "\tversion: wanted %u.%u.%u, found %u.%u.%u\n", - uc_fw->file_wanted.major_ver, - uc_fw->file_wanted.minor_ver, - uc_fw->file_wanted.patch_ver, - uc_fw->file_selected.major_ver, - uc_fw->file_selected.minor_ver, - uc_fw->file_selected.patch_ver); + uc_fw->file_wanted.ver.major, + uc_fw->file_wanted.ver.minor, + uc_fw->file_wanted.ver.patch, + uc_fw->file_selected.ver.major, + uc_fw->file_selected.ver.minor, + uc_fw->file_selected.ver.patch); else drm_printf(p, "\tversion: found %u.%u.%u\n", - uc_fw->file_selected.major_ver, - uc_fw->file_selected.minor_ver, - uc_fw->file_selected.patch_ver); + uc_fw->file_selected.ver.major, + uc_fw->file_selected.ver.minor, + uc_fw->file_selected.ver.patch); drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index bc898ba5355d..6501d6f1fbdf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -65,6 +65,12 @@ enum intel_uc_fw_type { }; #define INTEL_UC_FW_NUM_TYPES 2 +struct intel_uc_fw_ver { + u16 major; + u16 minor; + u16 patch; +}; + /* * The firmware build process will generate a version header file with major and * minor version defined. The versions are built into CSS header of firmware. @@ -72,9 +78,7 @@ enum intel_uc_fw_type { */ struct intel_uc_fw_file { const char *path; - u16 major_ver; - u16 minor_ver; - u16 patch_ver; + struct intel_uc_fw_ver ver; }; /* @@ -111,9 +115,8 @@ struct intel_uc_fw { }; #define MAKE_UC_VER(maj, min, pat) ((pat) | ((min) << 8) | ((maj) << 16)) -#define GET_UC_VER(uc) (MAKE_UC_VER((uc)->fw.file_selected.major_ver, \ - (uc)->fw.file_selected.minor_ver, \ - (uc)->fw.file_selected.patch_ver)) +#define MAKE_UC_VER_STRUCT(ver) MAKE_UC_VER((ver).major, (ver).minor, (ver).patch) +#define GET_UC_VER(uc) (MAKE_UC_VER_STRUCT((uc)->fw.file_selected.ver)) /* * When we load the uC binaries, we pin them in a reserved section at the top of -- cgit From 9bbba0667f3779efa9a5c262b2f1b97408a2f563 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 29 Nov 2022 15:20:31 -0800 Subject: drm/i915/guc: Use GuC submission API version number The GuC firmware includes an extra version number to specify the submission API level. So use that rather than the main firmware version number for submission related checks. Also, while it is guaranteed that GuC version number components are only 8-bits in size, other firmwares do not have that restriction. So stop making assumptions about them generically fitting in a u16 individually, or in a u32 as a combined 8.8.8. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221129232031.3401386-4-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 11 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 15 +-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 124 +++++++++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 10 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 3 +- 5 files changed, 137 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 1bb3f9829286..bb4dfe707a7d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -158,6 +158,9 @@ struct intel_guc { bool submission_selected; /** @submission_initialized: tracks whether GuC submission has been initialised */ bool submission_initialized; + /** @submission_version: Submission API version of the currently loaded firmware */ + struct intel_uc_fw_ver submission_version; + /** * @rc_supported: tracks whether we support GuC rc on the current platform */ @@ -268,6 +271,14 @@ struct intel_guc { #endif }; +/* + * GuC version number components are only 8-bit, so converting to a 32bit 8.8.8 + * integer works. + */ +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) +#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define GUC_SUBMIT_VER(guc) MAKE_GUC_VER_STRUCT((guc)->submission_version) + static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) { return container_of(log, struct intel_guc, log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 412c2624e119..16218757ff27 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1889,7 +1889,7 @@ int intel_guc_submission_init(struct intel_guc *guc) if (guc->submission_initialized) return 0; - if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { ret = guc_lrc_desc_pool_create_v69(guc); if (ret) return ret; @@ -2329,7 +2329,7 @@ static int register_context(struct intel_context *ce, bool loop) GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) ret = register_context_v70(guc, ce, loop); else ret = register_context_v69(guc, ce, loop); @@ -2341,7 +2341,7 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) guc_context_policy_init_v70(ce, loop); } @@ -2955,7 +2955,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, guc_id); @@ -3282,7 +3282,7 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, ce->guc_id.id); @@ -4365,7 +4365,7 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc) intel_wakeref_t wakeref; int ret = 0; - if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0)) + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) return 0; __guc_scheduling_policy_start_klv(&policy); @@ -4904,6 +4904,9 @@ void intel_guc_submission_print_info(struct intel_guc *guc, if (!sched_engine) return; + drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", + guc->submission_version.major, guc->submission_version.minor, + guc->submission_version.patch); drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", atomic_read(&guc->outstanding_submission_g2h)); drm_printf(p, "GuC tasklet count: %u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 1c6a520cf4cd..6c83a8b66c9e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -479,6 +479,62 @@ static int check_gsc_manifest(const struct firmware *fw, return 0; } +static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 css_value) +{ + /* Get version numbers from the CSS header */ + ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value); + ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value); + ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value); +} + +static void guc_read_css_info(struct intel_uc_fw *uc_fw, struct uc_css_header *css) +{ + struct intel_guc *guc = container_of(uc_fw, struct intel_guc, fw); + + /* + * The GuC firmware includes an extra version number to specify the + * submission API level. This allows submission code to work with + * multiple GuC versions without having to know the absolute firmware + * version number (there are likely to be multiple firmware releases + * which all support the same submission API level). + * + * Note that the spec for the CSS header defines this version number + * as 'vf_version' as it was originally intended for virtualisation. + * However, it is applicable to native submission as well. + * + * Unfortunately, due to an oversight, this version number was only + * exposed in the CSS header from v70.6.0. + */ + if (uc_fw->file_selected.ver.major >= 70) { + if (uc_fw->file_selected.ver.minor >= 6) { + /* v70.6.0 adds CSS header support */ + uc_unpack_css_version(&guc->submission_version, css->vf_version); + } else if (uc_fw->file_selected.ver.minor >= 3) { + /* v70.3.0 introduced v1.1.0 */ + guc->submission_version.major = 1; + guc->submission_version.minor = 1; + guc->submission_version.patch = 0; + } else { + /* v70.0.0 introduced v1.0.0 */ + guc->submission_version.major = 1; + guc->submission_version.minor = 0; + guc->submission_version.patch = 0; + } + } else if (uc_fw->file_selected.ver.major >= 69) { + /* v69.0.0 introduced v0.10.0 */ + guc->submission_version.major = 0; + guc->submission_version.minor = 10; + guc->submission_version.patch = 0; + } else { + /* Prior versions were v0.1.0 */ + guc->submission_version.major = 0; + guc->submission_version.minor = 1; + guc->submission_version.patch = 0; + } + + uc_fw->private_data_size = css->private_data_size; +} + static int check_ccs_header(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) @@ -532,20 +588,50 @@ static int check_ccs_header(struct intel_gt *gt, return -E2BIG; } - /* Get version numbers from the CSS header */ - uc_fw->file_selected.ver.major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->sw_version); - uc_fw->file_selected.ver.minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->sw_version); - uc_fw->file_selected.ver.patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, - css->sw_version); + uc_unpack_css_version(&uc_fw->file_selected.ver, css->sw_version); if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) - uc_fw->private_data_size = css->private_data_size; + guc_read_css_info(uc_fw, css); return 0; } +static bool is_ver_8bit(struct intel_uc_fw_ver *ver) +{ + return ver->major < 0xFF && ver->minor < 0xFF && ver->patch < 0xFF; +} + +static bool guc_check_version_range(struct intel_uc_fw *uc_fw) +{ + struct intel_guc *guc = container_of(uc_fw, struct intel_guc, fw); + + /* + * GuC version number components are defined as being 8-bits. + * The submission code relies on this to optimise version comparison + * tests. So enforce the restriction here. + */ + + if (!is_ver_8bit(&uc_fw->file_selected.ver)) { + drm_warn(&__uc_fw_to_gt(uc_fw)->i915->drm, "%s firmware: invalid file version: 0x%02X:%02X:%02X\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->file_selected.ver.major, + uc_fw->file_selected.ver.minor, + uc_fw->file_selected.ver.patch); + return false; + } + + if (!is_ver_8bit(&guc->submission_version)) { + drm_warn(&__uc_fw_to_gt(uc_fw)->i915->drm, "%s firmware: invalid submit version: 0x%02X:%02X:%02X\n", + intel_uc_fw_type_repr(uc_fw->type), + guc->submission_version.major, + guc->submission_version.minor, + guc->submission_version.patch); + return false; + } + + return true; +} + /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware @@ -622,6 +708,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (err) goto fail; + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC && !guc_check_version_range(uc_fw)) + goto fail; + if (uc_fw->file_wanted.ver.major) { /* Check the file's major version was as it claimed */ if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) { @@ -1055,7 +1144,7 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) */ void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { - u32 ver_sel, ver_want; + bool got_wanted; drm_printf(p, "%s firmware: %s\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path); @@ -1064,9 +1153,20 @@ void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path); drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); - ver_sel = MAKE_UC_VER_STRUCT(uc_fw->file_selected.ver); - ver_want = MAKE_UC_VER_STRUCT(uc_fw->file_wanted.ver); - if (ver_sel < ver_want) + + if (uc_fw->file_selected.ver.major < uc_fw->file_wanted.ver.major) + got_wanted = false; + else if ((uc_fw->file_selected.ver.major == uc_fw->file_wanted.ver.major) && + (uc_fw->file_selected.ver.minor < uc_fw->file_wanted.ver.minor)) + got_wanted = false; + else if ((uc_fw->file_selected.ver.major == uc_fw->file_wanted.ver.major) && + (uc_fw->file_selected.ver.minor == uc_fw->file_wanted.ver.minor) && + (uc_fw->file_selected.ver.patch < uc_fw->file_wanted.ver.patch)) + got_wanted = false; + else + got_wanted = true; + + if (!got_wanted) drm_printf(p, "\tversion: wanted %u.%u.%u, found %u.%u.%u\n", uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 6501d6f1fbdf..3ab87c54a398 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -66,9 +66,9 @@ enum intel_uc_fw_type { #define INTEL_UC_FW_NUM_TYPES 2 struct intel_uc_fw_ver { - u16 major; - u16 minor; - u16 patch; + u32 major; + u32 minor; + u32 patch; }; /* @@ -114,10 +114,6 @@ struct intel_uc_fw { bool loaded_via_gsc; }; -#define MAKE_UC_VER(maj, min, pat) ((pat) | ((min) << 8) | ((maj) << 16)) -#define MAKE_UC_VER_STRUCT(ver) MAKE_UC_VER((ver).major, (ver).minor, (ver).patch) -#define GET_UC_VER(uc) (MAKE_UC_VER_STRUCT((uc)->fw.file_selected.ver)) - /* * When we load the uC binaries, we pin them in a reserved section at the top of * the GGTT, which is ~18 MBs. On multi-GT systems where the GTs share the GGTT, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index 7a411178bdbf..646fa8aa6cf1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -74,7 +74,8 @@ struct uc_css_header { #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) - u32 reserved0[13]; + u32 vf_version; + u32 reserved0[12]; union { u32 private_data_size; /* only applies to GuC */ u32 reserved1; -- cgit From e3995e08a39a41691742b380023a0d480247afb0 Mon Sep 17 00:00:00 2001 From: Wayne Boyer Date: Wed, 30 Nov 2022 09:07:23 -0800 Subject: drm/i915/pvc: Implement recommended caching policy As per the performance tuning guide, set the HOSTCACHEEN bit to implement the recommended caching policy on PVC. Signed-off-by: Wayne Boyer Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221130170723.2460014-1-wayne.boyer@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6caf2d8deb98..fa12edafd4e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -972,6 +972,7 @@ #define GEN7_L3AGDIS (1 << 19) #define XEHPC_LNCFMISCCFGREG0 _MMIO(0xb01c) +#define XEHPC_HOSTCACHEEN REG_BIT(1) #define XEHPC_OVRLSCCC REG_BIT(0) #define GEN7_L3CNTLREG2 _MMIO(0xb020) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index cafa7b9145b5..ff63b3859e6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2906,6 +2906,7 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_PONTEVECCHIO(i915)) { wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN); } if (IS_DG2(i915)) { -- cgit From 70b612084586402d39aabf7d76c605914cc3c2b6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 30 Nov 2022 07:58:52 -0800 Subject: drm/i915/mcr: Hold forcewake and MCR lock over PPAT setup PPAT setup involves a series of multicast writes. This can be optimized slightly be acquiring forcewake and the steering lock just once for the entire sequence. v2: - We should use FW_REG_WRITE instead of FW_REG_READ. (Bala) Suggested-by: Balasubramani Vivekanandan Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221130155852.19601-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e82a9d763e57..919d2462fbf4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -481,14 +481,25 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) static void xehp_setup_private_ppat(struct intel_gt *gt) { - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); - intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); + enum forcewake_domains fw; + unsigned long flags; + + fw = intel_uncore_forcewake_for_reg(gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg), + FW_REG_WRITE); + intel_uncore_forcewake_get(gt->uncore, fw); + + intel_gt_mcr_lock(gt, &flags); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); + intel_gt_mcr_unlock(gt, flags); + + intel_uncore_forcewake_put(gt->uncore, fw); } static void icl_setup_private_ppat(struct intel_uncore *uncore) -- cgit From 3100240bf846ead1a2dbd6ae62bf48c687b9f5be Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 28 Nov 2022 15:30:13 -0800 Subject: drm/i915/mtl: Add hardware-level lock for steering Starting with MTL, the driver needs to not only protect the steering control register from simultaneous software accesses, but also protect against races with hardware/firmware agents. The hardware provides a dedicated locking mechanism to support this via the MTL_STEER_SEMAPHORE register. Reading the register acts as a 'trylock' operation; the read will return 0x1 if the lock is acquired or 0x0 if something else is already holding the lock; once acquired, writing 0x1 to the register will release the lock. We'll continue to grab the software lock as well, just so lockdep can track our locking; assuming the hardware lock is behaving properly, there should never be any contention on the software lock in this case. v2: - Extend hardware semaphore timeout and add a taint for CI if it ever happens (this would imply misbehaving hardware/firmware). (Mika) - Add "MTL_" prefix to new steering semaphore register. (Mika) Cc: Mika Kuoppala Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221128233014.4000136-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 38 +++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index aa070ae57f11..087e4ac5b68d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -347,10 +347,9 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, * @flags: storage to save IRQ flags to * * Performs locking to protect the steering for the duration of an MCR - * operation. Depending on the platform, this may be a software lock - * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes - * access not only for the driver, but also for external hardware and - * firmware agents). + * operation. On MTL and beyond, a hardware lock will also be taken to + * serialize access not only for the driver, but also for external hardware and + * firmware agents. * * Context: Takes gt->mcr_lock. uncore->lock should *not* be held when this * function is called, although it may be acquired after this @@ -359,12 +358,40 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt, void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) { unsigned long __flags; + int err = 0; lockdep_assert_not_held(>->uncore->lock); + /* + * Starting with MTL, we need to coordinate not only with other + * driver threads, but also with hardware/firmware agents. A dedicated + * locking register is used. + */ + if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70)) + err = wait_for(intel_uncore_read_fw(gt->uncore, + MTL_STEER_SEMAPHORE) == 0x1, 100); + + /* + * Even on platforms with a hardware lock, we'll continue to grab + * a software spinlock too for lockdep purposes. If the hardware lock + * was already acquired, there should never be contention on the + * software lock. + */ spin_lock_irqsave(>->mcr_lock, __flags); *flags = __flags; + + /* + * In theory we should never fail to acquire the HW semaphore; this + * would indicate some hardware/firmware is misbehaving and not + * releasing it properly. + */ + if (err == -ETIMEDOUT) { + drm_err_ratelimited(>->i915->drm, + "GT%u hardware MCR steering semaphore timed out", + gt->info.id); + add_taint_for_CI(gt->i915, TAINT_WARN); /* CI is now unreliable */ + } } /** @@ -379,6 +406,9 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags) { spin_unlock_irqrestore(>->mcr_lock, flags); + + if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index fa12edafd4e2..993188211707 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -66,6 +66,7 @@ #define GMD_ID_MEDIA _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define MTL_STEER_SEMAPHORE _MMIO(0xfd0) #define MTL_MCR_SELECTOR _MMIO(0xfd4) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) -- cgit From c04712efb3755306ff3ab72a91df94108bff1f30 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 2 Dec 2022 14:35:28 -0800 Subject: drm/i915/mtl: Check full IP version when applying hw steering semaphore When determining whether the platform has a hardware-level steering semaphore (i.e., MTL and beyond), we need to use GRAPHICS_VER_FULL() to compare the full version rather than just the major version number returned by GRAPHICS_VER(). Reported-by: kernel test robot Fixes: 3100240bf846 ("drm/i915/mtl: Add hardware-level lock for steering") Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221202223528.714491-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 087e4ac5b68d..41a237509dcf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -367,7 +367,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) * driver threads, but also with hardware/firmware agents. A dedicated * locking register is used. */ - if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70)) + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) err = wait_for(intel_uncore_read_fw(gt->uncore, MTL_STEER_SEMAPHORE) == 0x1, 100); @@ -407,7 +407,7 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags) { spin_unlock_irqrestore(>->mcr_lock, flags); - if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70)) + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1); } -- cgit From c46c5fb725bedd73cf33511b6a52d82b57eaba2a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Dec 2022 14:22:10 -0800 Subject: drm/i915/gen12: Apply recommended L3 hashing mask The TGL/RKL/DG1/ADL performance tuning guide suggests programming a literal value of 0x2FC0100F for this register. The register's hardware default value is 0x2FC0108F, so this translates to just clearing one bit. Take this opportunity to also clean up the register definition and re-write its existing bits/fields in the preferred notation. Bspec: 31870 Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20221201222210.344152-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 9 +++++---- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 993188211707..d255c84b6eef 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -957,10 +957,11 @@ #define GEN7_DISABLE_SAMPLER_PREFETCH (1 << 30) #define GEN8_GARBCNTL _MMIO(0xb004) -#define GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7) -#define GEN11_ARBITRATION_PRIO_ORDER_MASK (0x3f << 22) -#define GEN11_HASH_CTRL_EXCL_MASK (0x7f << 0) -#define GEN11_HASH_CTRL_EXCL_BIT0 (1 << 0) +#define GEN11_ARBITRATION_PRIO_ORDER_MASK REG_GENMASK(27, 22) +#define GEN12_BUS_HASH_CTL_BIT_EXC REG_BIT(7) +#define GEN9_GAPS_TSV_CREDIT_DISABLE REG_BIT(7) +#define GEN11_HASH_CTRL_EXCL_MASK REG_GENMASK(6, 0) +#define GEN11_HASH_CTRL_EXCL_BIT0 REG_FIELD_PREP(GEN11_HASH_CTRL_EXCL_MASK, 0x1) #define GEN9_SCRATCH_LNCF1 _MMIO(0xb008) #define GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ff63b3859e6f..38746a71fc1f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2936,6 +2936,9 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, THREAD_EX_ARB_MODE_RR_AFTER_DEP); + + if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); } /* -- cgit From f2053d346615f2c6bb4c0663276274b2da3f6871 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 10 Nov 2022 09:58:23 -0800 Subject: drm/i915/guc: enable GuC GGTT invalidation from the start Invalidating the GuC TLBs while GuC is not loaded does not have negative consequences, so if we're starting the driver with GuC enabled we can use the GGTT invalidation function from the get-go, instead of switching to it when we initialize the GuC objects. In MTL, this fixes and issue where we try to overwrite the invalidation function twice (once for each GuC), due to the GGTT being shared between the primary and media GTs Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Radhakrishna Sripada Cc: John Harrison Cc: Aravind Iddamsetty Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221110175823.3867135-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 28 ++++------------------------ drivers/gpu/drm/i915/gt/intel_gtt.h | 2 -- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 7 ------- 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 7644738b9cdb..8e326332e7c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -988,7 +988,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; } - ggtt->invalidate = gen8_ggtt_invalidate; + if (intel_uc_wants_guc(&ggtt->vm.gt->uc)) + ggtt->invalidate = guc_ggtt_invalidate; + else + ggtt->invalidate = gen8_ggtt_invalidate; ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; @@ -1243,29 +1246,6 @@ int i915_ggtt_enable_hw(struct drm_i915_private *i915) return 0; } -void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) -{ - GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); - - ggtt->invalidate = guc_ggtt_invalidate; - - ggtt->invalidate(ggtt); -} - -void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) -{ - /* XXX Temporary pardon for error unload */ - if (ggtt->invalidate == gen8_ggtt_invalidate) - return; - - /* We should only be called after i915_ggtt_enable_guc() */ - GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); - - ggtt->invalidate = gen8_ggtt_invalidate; - - ggtt->invalidate(ggtt); -} - /** * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM * @vm: The VM to restore the mappings for diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index d1900fec6cd1..7e5b888c1b68 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -582,8 +582,6 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); int i915_ggtt_enable_hw(struct drm_i915_private *i915); -void i915_ggtt_enable_guc(struct i915_ggtt *ggtt); -void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); int i915_init_ggtt(struct drm_i915_private *i915); void i915_ggtt_driver_release(struct drm_i915_private *i915); void i915_ggtt_driver_late_release(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 52aede324788..c0b5aa6fde26 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -430,9 +430,6 @@ int intel_guc_init(struct intel_guc *guc) /* now that everything is perma-pinned, initialize the parameters */ guc_init_params(guc); - /* We need to notify the guc whenever we change the GGTT */ - i915_ggtt_enable_guc(gt->ggtt); - intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOADABLE); return 0; @@ -457,13 +454,9 @@ out: void intel_guc_fini(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - if (!intel_uc_fw_is_loadable(&guc->fw)) return; - i915_ggtt_disable_guc(gt->ggtt); - if (intel_guc_slpc_is_used(guc)) intel_guc_slpc_fini(&guc->slpc); -- cgit From 09f9b4418e417b6452d1bcd7a9544a68fc1e59d5 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 1 Dec 2022 00:58:01 +0100 Subject: drm/i915: Limit the display memory alignment to 32 bit instead of 64 The coming commit "drm/i915: Introduce guard pages to i915_vma" from Chris, was originally changing display_alignment to u32 from u64. The reason is that the display GGTT is and will be limited o 4GB. Put it in a separate patch and use "max(...)" instead of "max_t(64, ...)" when asigning the value. We can safely use max as we know beforehand that the comparison is between two u32 variables. Signed-off-by: Chris Wilson Signed-off-by: Andi Shyti Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221130235805.221010-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fb_pin.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 6900acbb1381..1aca7552a85d 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -91,7 +91,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, goto err; } - vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + vma->display_alignment = max(vma->display_alignment, alignment); i915_gem_object_flush_if_display(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index d44a152ce680..850776a783ac 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -444,7 +444,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return vma; - vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + vma->display_alignment = max(vma->display_alignment, alignment); i915_vma_mark_scanout(vma); i915_gem_object_flush_if_display_locked(obj); diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index ec0f6c9f57d0..0375812792b9 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -197,7 +197,6 @@ struct i915_vma { struct i915_fence_reg *fence; u64 size; - u64 display_alignment; struct i915_page_sizes page_sizes; /* mmap-offset associated with fencing for this vma */ @@ -205,6 +204,7 @@ struct i915_vma { u32 fence_size; u32 fence_alignment; + u32 display_alignment; /** * Count of the number of times this vma has been opened by different -- cgit From 8e4ee5e87ce60be439eca8d3a65bd870f6821902 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Dec 2022 00:58:02 +0100 Subject: drm/i915: Wrap all access to i915_vma.node.start|size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already wrap i915_vma.node.start for use with the GGTT, as there we can perform additional sanity checks that the node belongs to the GGTT and fits within the 32b registers. In the next couple of patches, we will introduce guard pages around the objects _inside_ the drm_mm_node allocation. That is we will offset the vma->pages so that the first page is at drm_mm_node.start + vma->guard (not 0 as is currently the case). All users must then not use i915_vma.node.start directly, but compute the guard offset, thus all users are converted to use a i915_vma_offset() wrapper. The notable exceptions are the selftests that are testing exact behaviour of i915_vma_pin/i915_vma_insert. Signed-off-by: Chris Wilson Signed-off-by: Tejas Upadhyay Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Signed-off-by: Tvrtko Ursulin Signed-off-by: Andi Shyti Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221130235805.221010-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 33 +++++++------- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 4 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_client_blt.c | 23 +++++----- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 15 ++++--- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 7 +-- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 2 +- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 3 +- drivers/gpu/drm/i915/gt/intel_renderstate.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 8 ++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 18 ++++---- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 15 ++++--- drivers/gpu/drm/i915/gt/selftest_lrc.c | 16 +++---- drivers/gpu/drm/i915/gt/selftest_ring_submission.c | 2 +- drivers/gpu/drm/i915/gt/selftest_rps.c | 12 ++--- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 8 ++-- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 25 ++++++----- drivers/gpu/drm/i915/i915_vma.h | 51 ++++++++++++++++++++-- drivers/gpu/drm/i915/i915_vma_resource.h | 10 +++-- drivers/gpu/drm/i915/selftests/i915_request.c | 20 ++++----- drivers/gpu/drm/i915/selftests/igt_spinner.c | 8 ++-- 29 files changed, 180 insertions(+), 122 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index ab385d18ddcc..05c3d4739c85 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -284,7 +284,7 @@ static int intelfb_create(struct drm_fb_helper *helper, /* Our framebuffer is the entirety of fbdev's system memory */ info->fix.smem_start = - (unsigned long)(ggtt->gmadr.start + vma->node.start); + (unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma)); info->fix.smem_len = vma->size; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7d742f7e52b8..16618a548026 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -378,22 +378,25 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, const struct i915_vma *vma, unsigned int flags) { - if (vma->node.size < entry->pad_to_size) + const u64 start = i915_vma_offset(vma); + const u64 size = i915_vma_size(vma); + + if (size < entry->pad_to_size) return true; - if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) + if (entry->alignment && !IS_ALIGNED(start, entry->alignment)) return true; if (flags & EXEC_OBJECT_PINNED && - vma->node.start != entry->offset) + start != entry->offset) return true; if (flags & __EXEC_OBJECT_NEEDS_BIAS && - vma->node.start < BATCH_OFFSET_BIAS) + start < BATCH_OFFSET_BIAS) return true; if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && - (vma->node.start + vma->node.size + 4095) >> 32) + (start + size + 4095) >> 32) return true; if (flags & __EXEC_OBJECT_NEEDS_MAP && @@ -439,7 +442,7 @@ eb_pin_vma(struct i915_execbuffer *eb, int err; if (vma->node.size) - pin_flags = vma->node.start; + pin_flags = __i915_vma_offset(vma); else pin_flags = entry->offset & PIN_OFFSET_MASK; @@ -662,8 +665,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb, if (err) return err; - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; + if (entry->offset != i915_vma_offset(vma)) { + entry->offset = i915_vma_offset(vma) | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; } @@ -983,8 +986,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) return err; if (!err) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; + if (entry->offset != i915_vma_offset(vma)) { + entry->offset = i915_vma_offset(vma) | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; } } else { @@ -1065,7 +1068,7 @@ static inline u64 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, const struct i915_vma *target) { - return gen8_canonical_addr((int)reloc->delta + target->node.start); + return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target)); } static void reloc_cache_init(struct reloc_cache *cache, @@ -1274,7 +1277,7 @@ static void *reloc_iomap(struct i915_vma *batch, if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - cache->node.start = vma->node.start; + cache->node.start = i915_ggtt_offset(vma); cache->node.mm = (void *)vma; } } @@ -1437,7 +1440,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, * more work needs to be done. */ if (!DBG_FORCE_RELOC && - gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) + gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -2367,7 +2370,7 @@ static int eb_request_submit(struct i915_execbuffer *eb, } err = rq->context->engine->emit_bb_start(rq, - batch->node.start + + i915_vma_offset(batch) + eb->batch_start_offset, batch_len, eb->batch_flags); @@ -2378,7 +2381,7 @@ static int eb_request_submit(struct i915_execbuffer *eb, GEM_BUG_ON(intel_context_is_parallel(rq->context)); GEM_BUG_ON(eb->batch_start_offset); err = rq->context->engine->emit_bb_start(rq, - eb->trampoline->node.start + + i915_vma_offset(eb->trampoline) + batch_len, 0, 0); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c29efdef8313..d73ba0f5c4c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -395,7 +395,7 @@ retry: /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, + (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->iomap); if (ret) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 8dc5c8874d8a..b1672e054b21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -400,7 +400,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr mutex_lock(&to_gt(i915)->ggtt->vm.mutex); list_for_each_entry_safe(vma, next, &to_gt(i915)->ggtt->vm.bound_list, vm_link) { - unsigned long count = vma->node.size >> PAGE_SHIFT; + unsigned long count = i915_vma_size(vma) >> PAGE_SHIFT; struct drm_i915_gem_object *obj = vma->obj; if (!vma->iomap || i915_vma_is_active(vma)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index fd42b89b7162..04bb909acdec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -168,11 +168,11 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, return true; size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride); - if (vma->node.size < size) + if (i915_vma_size(vma) < size) return false; alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); - if (!IS_ALIGNED(vma->node.start, alignment)) + if (!IS_ALIGNED(i915_ggtt_offset(vma), alignment)) return false; return true; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index beaf27e09e8a..e0c2ac9c8053 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -400,7 +400,7 @@ static int igt_check_page_sizes(struct i915_vma *vma) * Maintaining alignment is required to utilise huge pages in the ppGGT. */ if (i915_gem_object_is_lmem(obj) && - IS_ALIGNED(vma->node.start, SZ_2M) && + IS_ALIGNED(i915_vma_offset(vma), SZ_2M) && vma->page_sizes.sg & SZ_2M && vma->resource->page_sizes_gtt < SZ_2M) { pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n", diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 692a16914ca0..3bb1f7f0110e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -194,12 +194,12 @@ static int prepare_blit(const struct tiled_blits *t, *cs++ = src_4t | dst_4t | BLT_DEPTH_32 | dst_pitch; *cs++ = 0; *cs++ = t->height << 16 | t->width; - *cs++ = lower_32_bits(dst->vma->node.start); - *cs++ = upper_32_bits(dst->vma->node.start); + *cs++ = lower_32_bits(i915_vma_offset(dst->vma)); + *cs++ = upper_32_bits(i915_vma_offset(dst->vma)); *cs++ = 0; *cs++ = src_pitch; - *cs++ = lower_32_bits(src->vma->node.start); - *cs++ = upper_32_bits(src->vma->node.start); + *cs++ = lower_32_bits(i915_vma_offset(src->vma)); + *cs++ = upper_32_bits(i915_vma_offset(src->vma)); } else { if (ver >= 6) { *cs++ = MI_LOAD_REGISTER_IMM(1); @@ -240,14 +240,14 @@ static int prepare_blit(const struct tiled_blits *t, *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; *cs++ = 0; *cs++ = t->height << 16 | t->width; - *cs++ = lower_32_bits(dst->vma->node.start); + *cs++ = lower_32_bits(i915_vma_offset(dst->vma)); if (use_64b_reloc) - *cs++ = upper_32_bits(dst->vma->node.start); + *cs++ = upper_32_bits(i915_vma_offset(dst->vma)); *cs++ = 0; *cs++ = src_pitch; - *cs++ = lower_32_bits(src->vma->node.start); + *cs++ = lower_32_bits(i915_vma_offset(src->vma)); if (use_64b_reloc) - *cs++ = upper_32_bits(src->vma->node.start); + *cs++ = upper_32_bits(i915_vma_offset(src->vma)); } *cs++ = MI_BATCH_BUFFER_END; @@ -462,7 +462,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr) { int err; - if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { + if (drm_mm_node_allocated(&vma->node) && i915_vma_offset(vma) != addr) { err = i915_vma_unbind_unlocked(vma); if (err) return err; @@ -472,6 +472,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr) if (err) return err; + GEM_BUG_ON(i915_vma_offset(vma) != addr); return 0; } @@ -518,8 +519,8 @@ tiled_blit(struct tiled_blits *t, err = igt_vma_move_to_active_unlocked(dst->vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, - t->batch->node.start, - t->batch->node.size, + i915_vma_offset(t->batch), + i915_vma_size(t->batch), 0); i915_request_get(rq); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index a0ff51d71d07..ac02fb036592 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -914,8 +914,8 @@ static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, *cmd++ = MI_STORE_REGISTER_MEM_GEN8; *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); - *cmd++ = lower_32_bits(vma->node.start); - *cmd++ = upper_32_bits(vma->node.start); + *cmd++ = lower_32_bits(i915_vma_offset(vma)); + *cmd++ = upper_32_bits(i915_vma_offset(vma)); *cmd = MI_BATCH_BUFFER_END; __i915_gem_object_flush_map(rpcs, 0, 64); @@ -999,7 +999,8 @@ retry: } err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, + i915_vma_offset(batch), + i915_vma_size(batch), 0); if (err) goto skip_request; @@ -1560,7 +1561,8 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto skip_request; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + err = engine->emit_bb_start(rq, i915_vma_offset(vma), + i915_vma_size(vma), 0); if (err) goto skip_request; @@ -1665,7 +1667,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, *cmd++ = offset; *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; *cmd++ = reg; - *cmd++ = vma->node.start + result; + *cmd++ = i915_vma_offset(vma) + result; *cmd = MI_BATCH_BUFFER_END; i915_gem_object_flush_map(obj); @@ -1694,7 +1696,8 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto skip_request; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); + err = engine->emit_bb_start(rq, i915_vma_offset(vma), + i915_vma_size(vma), flags); if (err) goto skip_request; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 80e7fdd5d164..c04d317df96d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1608,7 +1608,7 @@ retry: err = i915_vma_move_to_active(vma, rq, 0); - err = engine->emit_bb_start(rq, vma->node.start, 0, 0); + err = engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); i915_request_get(rq); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 374b10ac430e..c147038230c4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -62,8 +62,8 @@ igt_emit_store_dw(struct i915_vma *vma, goto err; } - GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); - offset += vma->node.start; + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > i915_vma_size(vma)); + offset += i915_vma_offset(vma); for (n = 0; n < count; n++) { if (ver >= 8) { @@ -147,7 +147,8 @@ int igt_gpu_fill_dw(struct intel_context *ce, flags |= I915_DISPATCH_SECURE; err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, + i915_vma_offset(batch), + i915_vma_size(batch), flags); skip_request: diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index 317efb145787..d38b914d1206 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -106,7 +106,7 @@ static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) static u32 batch_addr(const struct batch_chunk *bc) { - return bc->vma->node.start; + return i915_vma_offset(bc->vma); } static void batch_add(struct batch_chunk *bc, const u32 d) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 995082d45cb2..7ac8ed13e1fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -220,7 +220,8 @@ static int fence_update(struct i915_fence_reg *fence, return ret; } - fence->start = vma->node.start; + GEM_BUG_ON(vma->fence_size > i915_vma_size(vma)); + fence->start = i915_ggtt_offset(vma); fence->size = vma->fence_size; fence->stride = i915_gem_object_get_stride(vma->obj); fence->tiling = i915_gem_object_get_tiling(vma->obj); diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 9c1ae070ee7b..4b56ec3743cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -63,7 +63,7 @@ static int render_state_setup(struct intel_renderstate *so, u32 s = rodata->batch[i]; if (i * 4 == rodata->reloc[reloc_index]) { - u64 r = s + so->vma->node.start; + u64 r = s + i915_vma_offset(so->vma); s = lower_32_bits(r); if (HAS_64BIT_RELOC(i915)) { diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index d5d6f1fadcae..7d6db5fc9245 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -895,7 +895,7 @@ static int clear_residuals(struct i915_request *rq) } ret = engine->emit_bb_start(rq, - engine->wa_ctx.vma->node.start, 0, + i915_vma_offset(engine->wa_ctx.vma), 0, 0); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 881b64f3e7b9..542ce6d2de19 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -178,7 +178,7 @@ static int perf_mi_bb_start(void *arg) goto out; err = rq->engine->emit_bb_start(rq, - batch->node.start, 8, + i915_vma_offset(batch), 8, 0); if (err) goto out; @@ -321,7 +321,7 @@ static int perf_mi_noop(void *arg) goto out; err = rq->engine->emit_bb_start(rq, - base->node.start, 8, + i915_vma_offset(base), 8, 0); if (err) goto out; @@ -331,8 +331,8 @@ static int perf_mi_noop(void *arg) goto out; err = rq->engine->emit_bb_start(rq, - nop->node.start, - nop->node.size, + i915_vma_offset(nop), + i915_vma_size(nop), 0); if (err) goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index ab2e9a6a2452..a619057543fd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2737,11 +2737,11 @@ static int create_gang(struct intel_engine_cs *engine, MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD; *cs++ = 0; - *cs++ = lower_32_bits(vma->node.start); - *cs++ = upper_32_bits(vma->node.start); + *cs++ = lower_32_bits(i915_vma_offset(vma)); + *cs++ = upper_32_bits(i915_vma_offset(vma)); if (*prev) { - u64 offset = (*prev)->batch->node.start; + u64 offset = i915_vma_offset((*prev)->batch); /* Terminate the spinner in the next lower priority batch. */ *cs++ = MI_STORE_DWORD_IMM_GEN4; @@ -2767,7 +2767,7 @@ static int create_gang(struct intel_engine_cs *engine, err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, - vma->node.start, + i915_vma_offset(vma), PAGE_SIZE, 0); i915_vma_unlock(vma); i915_request_add(rq); @@ -3095,7 +3095,7 @@ create_gpr_user(struct intel_engine_cs *engine, *cs++ = MI_MATH_ADD; *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); - addr = result->node.start + offset + i * sizeof(*cs); + addr = i915_vma_offset(result) + offset + i * sizeof(*cs); *cs++ = MI_STORE_REGISTER_MEM_GEN8; *cs++ = CS_GPR(engine, 2 * i); *cs++ = lower_32_bits(addr); @@ -3105,8 +3105,8 @@ create_gpr_user(struct intel_engine_cs *engine, MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD; *cs++ = i; - *cs++ = lower_32_bits(result->node.start); - *cs++ = upper_32_bits(result->node.start); + *cs++ = lower_32_bits(i915_vma_offset(result)); + *cs++ = upper_32_bits(i915_vma_offset(result)); } *cs++ = MI_BATCH_BUFFER_END; @@ -3186,7 +3186,7 @@ create_gpr_client(struct intel_engine_cs *engine, err = i915_vma_move_to_active(batch, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, - batch->node.start, + i915_vma_offset(batch), PAGE_SIZE, 0); i915_vma_unlock(batch); i915_vma_unpin(batch); @@ -3518,7 +3518,7 @@ static int smoke_submit(struct preempt_smoke *smoke, err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, - vma->node.start, + i915_vma_offset(vma), PAGE_SIZE, 0); i915_vma_unlock(vma); } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index bc05ef48c194..8b0d84f2aad2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -96,7 +96,8 @@ err_ctx: static u64 hws_address(const struct i915_vma *hws, const struct i915_request *rq) { - return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); + return i915_vma_offset(hws) + + offset_in_page(sizeof(u32) * rq->fence.context); } static struct i915_request * @@ -180,8 +181,8 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; - *batch++ = lower_32_bits(vma->node.start); - *batch++ = upper_32_bits(vma->node.start); + *batch++ = lower_32_bits(i915_vma_offset(vma)); + *batch++ = upper_32_bits(i915_vma_offset(vma)); } else if (GRAPHICS_VER(gt->i915) >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; @@ -194,7 +195,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; - *batch++ = lower_32_bits(vma->node.start); + *batch++ = lower_32_bits(i915_vma_offset(vma)); } else if (GRAPHICS_VER(gt->i915) >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; @@ -207,7 +208,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; - *batch++ = lower_32_bits(vma->node.start); + *batch++ = lower_32_bits(i915_vma_offset(vma)); } else { *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -219,7 +220,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; - *batch++ = lower_32_bits(vma->node.start); + *batch++ = lower_32_bits(i915_vma_offset(vma)); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ intel_gt_chipset_flush(engine->gt); @@ -234,7 +235,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) if (GRAPHICS_VER(gt->i915) <= 5) flags |= I915_DISPATCH_SECURE; - err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), PAGE_SIZE, flags); cancel_rq: if (err) { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 7c56ffd2c659..a61ae9d7e0a2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1030,8 +1030,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) while (len--) { *cs++ = MI_STORE_REGISTER_MEM_GEN8; *cs++ = hw[dw]; - *cs++ = lower_32_bits(scratch->node.start + x); - *cs++ = upper_32_bits(scratch->node.start + x); + *cs++ = lower_32_bits(i915_vma_offset(scratch) + x); + *cs++ = upper_32_bits(i915_vma_offset(scratch) + x); dw += 2; x += 4; @@ -1098,8 +1098,8 @@ record_registers(struct intel_context *ce, *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); - *cs++ = lower_32_bits(b_before->node.start); - *cs++ = upper_32_bits(b_before->node.start); + *cs++ = lower_32_bits(i915_vma_offset(b_before)); + *cs++ = upper_32_bits(i915_vma_offset(b_before)); *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; *cs++ = MI_SEMAPHORE_WAIT | @@ -1114,8 +1114,8 @@ record_registers(struct intel_context *ce, *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); - *cs++ = lower_32_bits(b_after->node.start); - *cs++ = upper_32_bits(b_after->node.start); + *cs++ = lower_32_bits(i915_vma_offset(b_after)); + *cs++ = upper_32_bits(i915_vma_offset(b_after)); intel_ring_advance(rq, cs); @@ -1236,8 +1236,8 @@ static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); - *cs++ = lower_32_bits(batch->node.start); - *cs++ = upper_32_bits(batch->node.start); + *cs++ = lower_32_bits(i915_vma_offset(batch)); + *cs++ = upper_32_bits(i915_vma_offset(batch)); *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 70f9ac1ec2c7..87ceb0f374b6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -50,7 +50,7 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine) } else { *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; } - *cs++ = vma->node.start + 4000; + *cs++ = i915_vma_offset(vma) + 4000; *cs++ = STACK_MAGIC; *cs++ = MI_BATCH_BUFFER_END; diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 39f1b7564170..6755bbc4ebda 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -122,14 +122,14 @@ create_spin_counter(struct intel_engine_cs *engine, if (srm) { *cs++ = MI_STORE_REGISTER_MEM_GEN8; *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); - *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); - *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); + *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); + *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); } } *cs++ = MI_BATCH_BUFFER_START_GEN8; - *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); - *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); + *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); + *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); GEM_BUG_ON(cs - base > end); i915_gem_object_flush_map(obj); @@ -655,7 +655,7 @@ int live_rps_frequency_cs(void *arg) err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, - vma->node.start, + i915_vma_offset(vma), PAGE_SIZE, 0); i915_request_add(rq); if (err) @@ -794,7 +794,7 @@ int live_rps_frequency_srm(void *arg) err = i915_vma_move_to_active(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, - vma->node.start, + i915_vma_offset(vma), PAGE_SIZE, 0); i915_request_add(rq); if (err) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 96e3861706d6..9c5449709161 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -521,7 +521,7 @@ static int check_dirty_whitelist(struct intel_context *ce) for (i = 0; i < engine->whitelist.count; i++) { u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); struct i915_gem_ww_ctx ww; - u64 addr = scratch->node.start; + u64 addr = i915_vma_offset(scratch); struct i915_request *rq; u32 srm, lrm, rsvd; u32 expect; @@ -640,7 +640,7 @@ retry: goto err_request; err = engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, + i915_vma_offset(batch), PAGE_SIZE, 0); if (err) goto err_request; @@ -870,7 +870,7 @@ static int read_whitelisted_registers(struct intel_context *ce, } for (i = 0; i < engine->whitelist.count; i++) { - u64 offset = results->node.start + sizeof(u32) * i; + u64 offset = i915_vma_offset(results) + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); /* Clear non priv flags */ @@ -942,7 +942,7 @@ static int scrub_whitelisted_registers(struct intel_context *ce) goto err_request; /* Perform the writes from an unprivileged "user" batch */ - err = engine->emit_bb_start(rq, batch->node.start, 0, 0); + err = engine->emit_bb_start(rq, i915_vma_offset(batch), 0, 0); err_request: err = request_add_sync(rq, err); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index f93e6122f247..ddf49c2dbb91 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1471,8 +1471,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, /* Defer failure until attempted use */ jump_whitelist = alloc_whitelist(batch_length); - shadow_addr = gen8_canonical_addr(shadow->node.start); - batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); + shadow_addr = gen8_canonical_addr(i915_vma_offset(shadow)); + batch_addr = gen8_canonical_addr(i915_vma_offset(batch) + batch_offset); /* * We use the batch length as size because the shadow object is as diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6c7ac73b69a5..a356ca490159 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -183,7 +183,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%s offset: %08llx, size: %08llx, pages: %s", stringify_vma_type(vma), - vma->node.start, vma->node.size, + i915_vma_offset(vma), i915_vma_size(vma), stringify_page_sizes(vma->resource->page_sizes_gtt, NULL, 0)); if (i915_vma_is_ggtt(vma) || i915_vma_is_dpt(vma)) { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c2bee21a7df2..bee6f3b61819 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2258,7 +2258,7 @@ retry: goto err_add_request; err = rq->engine->emit_bb_start(rq, - vma->node.start, 0, + i915_vma_offset(vma), 0, I915_DISPATCH_SECURE); if (err) goto err_add_request; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 726705b10637..fefee5fef38d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -418,8 +418,8 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes, obj->mm.rsgt, i915_gem_object_is_readonly(obj), i915_gem_object_is_lmem(obj), obj->mm.region, - vma->ops, vma->private, vma->node.start, - vma->node.size, vma->size); + vma->ops, vma->private, __i915_vma_offset(vma), + __i915_vma_size(vma), vma->size); } /** @@ -447,7 +447,7 @@ int i915_vma_bind(struct i915_vma *vma, lockdep_assert_held(&vma->vm->mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->size > vma->node.size); + GEM_BUG_ON(vma->size > i915_vma_size(vma)); if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start, vma->node.size, @@ -569,8 +569,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) vma->obj->base.size); } else if (i915_vma_is_map_and_fenceable(vma)) { ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, - vma->node.start, - vma->node.size); + i915_vma_offset(vma), + i915_vma_size(vma)); } else { ptr = (void __iomem *) i915_gem_object_pin_map(vma->obj, I915_MAP_WC); @@ -659,22 +659,22 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) return true; - if (vma->node.size < size) + if (i915_vma_size(vma) < size) return true; GEM_BUG_ON(alignment && !is_power_of_2(alignment)); - if (alignment && !IS_ALIGNED(vma->node.start, alignment)) + if (alignment && !IS_ALIGNED(i915_vma_offset(vma), alignment)) return true; if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) return true; if (flags & PIN_OFFSET_BIAS && - vma->node.start < (flags & PIN_OFFSET_MASK)) + i915_vma_offset(vma) < (flags & PIN_OFFSET_MASK)) return true; if (flags & PIN_OFFSET_FIXED && - vma->node.start != (flags & PIN_OFFSET_MASK)) + i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK)) return true; return false; @@ -687,10 +687,11 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!vma->fence_size); - fenceable = (vma->node.size >= vma->fence_size && - IS_ALIGNED(vma->node.start, vma->fence_alignment)); + fenceable = (i915_vma_size(vma) >= vma->fence_size && + IS_ALIGNED(i915_vma_offset(vma), vma->fence_alignment)); - mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; + mappable = i915_ggtt_offset(vma) + vma->fence_size <= + i915_vm_to_ggtt(vma->vm)->mappable_end; if (mappable && fenceable) set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 0757977a489b..3fd4512b1f65 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -125,13 +125,58 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) return !list_empty(&vma->closed_link); } +/* Internal use only. */ +static inline u64 __i915_vma_size(const struct i915_vma *vma) +{ + return vma->node.size; +} + +/** + * i915_vma_offset - Obtain the va range size of the vma + * @vma: The vma + * + * GPU virtual address space may be allocated with padding. This + * function returns the effective virtual address range size + * with padding subtracted. + * + * Return: The effective virtual address range size. + */ +static inline u64 i915_vma_size(const struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + return __i915_vma_size(vma); +} + +/* Internal use only. */ +static inline u64 __i915_vma_offset(const struct i915_vma *vma) +{ + return vma->node.start; +} + +/** + * i915_vma_offset - Obtain the va offset of the vma + * @vma: The vma + * + * GPU virtual address space may be allocated with padding. This + * function returns the effective virtual address offset the gpu + * should use to access the bound data. + * + * Return: The effective virtual address offset. + */ +static inline u64 i915_vma_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + return __i915_vma_offset(vma); +} + static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(upper_32_bits(vma->node.start)); - GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); - return lower_32_bits(vma->node.start); + GEM_BUG_ON(upper_32_bits(i915_vma_offset(vma))); + GEM_BUG_ON(upper_32_bits(i915_vma_offset(vma) + + i915_vma_size(vma) - 1)); + return lower_32_bits(i915_vma_offset(vma)); } static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma) diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index 06923d1816e7..54edf3739ca0 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -52,8 +52,10 @@ struct i915_page_sizes { * @mr: The memory region of the object pointed to by the vma. * @ops: Pointer to the backend i915_vma_ops. * @private: Bind backend private info. - * @start: Offset into the address space of bind range start. - * @node_size: Size of the allocated range manager node. + * @start: Offset into the address space of bind range start. Note that + * this is after any padding that might have been allocated. + * @node_size: Size of the allocated range manager node with padding + * subtracted. * @vma_size: Bind size. * @page_sizes_gtt: Resulting page sizes from the bind operation. * @bound_flags: Flags indicating binding status. @@ -174,8 +176,8 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res) * @mr: The memory region of the object the vma points to. * @ops: The backend ops. * @private: Bind backend private info. - * @start: Offset into the address space of bind range start. - * @node_size: Size of the allocated range manager node. + * @start: Offset into the address space of bind range start after padding. + * @node_size: Size of the allocated range manager node minus padding. * @size: Bind size. * * Initializes a vma resource allocated using i915_vma_resource_alloc(). diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 0daa8669181d..6fe22b096bdd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1017,8 +1017,8 @@ empty_request(struct intel_engine_cs *engine, return request; err = engine->emit_bb_start(request, - batch->node.start, - batch->node.size, + i915_vma_offset(batch), + i915_vma_size(batch), I915_DISPATCH_SECURE); if (err) goto out_request; @@ -1138,14 +1138,14 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (ver >= 8) { *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; - *cmd++ = lower_32_bits(vma->node.start); - *cmd++ = upper_32_bits(vma->node.start); + *cmd++ = lower_32_bits(i915_vma_offset(vma)); + *cmd++ = upper_32_bits(i915_vma_offset(vma)); } else if (ver >= 6) { *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; - *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = lower_32_bits(i915_vma_offset(vma)); } else { *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = lower_32_bits(i915_vma_offset(vma)); } *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ @@ -1227,8 +1227,8 @@ static int live_all_engines(void *arg) GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], - batch->node.start, - batch->node.size, + i915_vma_offset(batch), + i915_vma_size(batch), 0); GEM_BUG_ON(err); request[idx]->batch = batch; @@ -1354,8 +1354,8 @@ static int live_sequential_engines(void *arg) GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], - batch->node.start, - batch->node.size, + i915_vma_offset(batch), + i915_vma_size(batch), 0); GEM_BUG_ON(err); request[idx]->batch = batch; diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 16978ac59797..618d9386d554 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -116,7 +116,7 @@ static unsigned int seqno_offset(u64 fence) static u64 hws_address(const struct i915_vma *hws, const struct i915_request *rq) { - return hws->node.start + seqno_offset(rq->fence.context); + return i915_vma_offset(hws) + seqno_offset(rq->fence.context); } struct i915_request * @@ -187,8 +187,8 @@ igt_spinner_create_request(struct igt_spinner *spin, *batch++ = MI_BATCH_BUFFER_START; else *batch++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *batch++ = lower_32_bits(vma->node.start); - *batch++ = upper_32_bits(vma->node.start); + *batch++ = lower_32_bits(i915_vma_offset(vma)); + *batch++ = upper_32_bits(i915_vma_offset(vma)); *batch++ = MI_BATCH_BUFFER_END; /* not reached */ @@ -203,7 +203,7 @@ igt_spinner_create_request(struct igt_spinner *spin, flags = 0; if (GRAPHICS_VER(rq->engine->i915) <= 5) flags |= I915_DISPATCH_SECURE; - err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + err = engine->emit_bb_start(rq, i915_vma_offset(vma), PAGE_SIZE, flags); cancel_rq: if (err) { -- cgit From 6110225144d1136db5b026a22efbd76cee197027 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Dec 2022 21:39:12 +0100 Subject: drm/i915: Introduce guard pages to i915_vma Introduce the concept of padding the i915_vma with guard pages before and after. The major consequence is that all ordinary uses of i915_vma must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size directly, as the drm_mm_node will include the guard pages that surround our object. The biggest connundrum is how exactly to mix requesting a fixed address with guard pages, particularly through the existing uABI. The user does not know about guard pages, so such must be transparent to the user, and so the execobj.offset must be that of the object itself excluding the guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages. The caveat is that some placements will be impossible with guard pages, as wrap arounds need to be avoided, and the vma itself will require a larger node. We must not report EINVAL but ENOSPC as these are unavailable locations within the GTT rather than conflicting user requirements. In the next patch, we start using guard pages for scanout objects. While these are limited to GGTT vma, on a few platforms these vma (or at least an alias of the vma) is shared with userspace, so we may leak the existence of such guards if we are not careful to ensure that the execobj.offset is transparent and excludes the guards. (On such platforms like ivb, without full-ppgtt, userspace has to use relocations so the presence of more untouchable regions within its GTT such be of no further issue.) Signed-off-by: Chris Wilson Signed-off-by: Tejas Upadhyay Signed-off-by: Tvrtko Ursulin Signed-off-by: Andi Shyti Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221201203912.346110-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 14 ++++++++--- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++- drivers/gpu/drm/i915/i915_vma.c | 43 ++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_vma.h | 5 ++-- drivers/gpu/drm/i915/i915_vma_resource.c | 4 +-- drivers/gpu/drm/i915/i915_vma_resource.h | 7 +++++- drivers/gpu/drm/i915/i915_vma_types.h | 1 + 7 files changed, 60 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8e326332e7c3..0292937e0925 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -296,8 +296,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, */ gte = (gen8_pte_t __iomem *)ggtt->gsm; - gte += vma_res->start / I915_GTT_PAGE_SIZE; - end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; + gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; + end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0]->encode); + end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; for_each_sgt_daddr(addr, iter, vma_res->bi.pages) gen8_set_pte(gte++, pte_encode | addr); @@ -347,9 +350,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, dma_addr_t addr; gte = (gen6_pte_t __iomem *)ggtt->gsm; - gte += vma_res->start / I915_GTT_PAGE_SIZE; - end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; + gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; + end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; + while (gte < end) + iowrite32(vm->scratch[0]->encode, gte++); + end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; for_each_sgt_daddr(addr, iter, vma_res->bi.pages) iowrite32(vm->pte_encode(addr, level, flags), gte++); GEM_BUG_ON(gte > end); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 8c2f57eb5dda..243419783052 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -44,7 +44,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_HIGH BIT_ULL(5) #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_VALIDATE BIT_ULL(8) /* validate placement only, no need to call unpin() */ +#define PIN_OFFSET_GUARD BIT_ULL(8) +#define PIN_VALIDATE BIT_ULL(9) /* validate placement only, no need to call unpin() */ #define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ #define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fefee5fef38d..34f0e6c923c2 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -419,7 +419,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, obj->mm.rsgt, i915_gem_object_is_readonly(obj), i915_gem_object_is_lmem(obj), obj->mm.region, vma->ops, vma->private, __i915_vma_offset(vma), - __i915_vma_size(vma), vma->size); + __i915_vma_size(vma), vma->size, vma->guard); } /** @@ -677,6 +677,10 @@ bool i915_vma_misplaced(const struct i915_vma *vma, i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK)) return true; + if (flags & PIN_OFFSET_GUARD && + vma->guard < (flags & PIN_OFFSET_MASK)) + return true; + return false; } @@ -749,15 +753,16 @@ static int i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags) { - unsigned long color; + unsigned long color, guard; u64 start, end; int ret; GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(hweight64(flags & (PIN_OFFSET_GUARD | PIN_OFFSET_FIXED | PIN_OFFSET_BIAS)) > 1); size = max(size, vma->size); - alignment = max(alignment, vma->display_alignment); + alignment = max_t(typeof(alignment), alignment, vma->display_alignment); if (flags & PIN_MAPPABLE) { size = max_t(typeof(size), size, vma->fence_size); alignment = max_t(typeof(alignment), @@ -768,6 +773,18 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); GEM_BUG_ON(!is_power_of_2(alignment)); + guard = vma->guard; /* retain guard across rebinds */ + if (flags & PIN_OFFSET_GUARD) { + GEM_BUG_ON(overflows_type(flags & PIN_OFFSET_MASK, u32)); + guard = max_t(u32, guard, flags & PIN_OFFSET_MASK); + } + /* + * As we align the node upon insertion, but the hardware gets + * node.start + guard, the easiest way to make that work is + * to make the guard a multiple of the alignment size. + */ + guard = ALIGN(guard, alignment); + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); @@ -780,11 +797,12 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj)); - /* If binding the object/GGTT view requires more space than the entire + /* + * If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ - if (size > end) { + if (size > end - 2 * guard) { drm_dbg(&to_i915(vma->obj->base.dev)->drm, "Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n", size, flags & PIN_MAPPABLE ? "mappable" : "total", end); @@ -801,13 +819,23 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) return -EINVAL; + /* + * The caller knows not of the guard added by others and + * requests for the offset of the start of its buffer + * to be fixed, which may not be the same as the position + * of the vma->node due to the guard pages. + */ + if (offset < guard || offset + size > end - guard) + return -ENOSPC; ret = i915_gem_gtt_reserve(vma->vm, ww, &vma->node, - size, offset, color, - flags); + size + 2 * guard, + offset - guard, + color, flags); if (ret) return ret; } else { + size += 2 * guard; /* * We only support huge gtt pages through the 48b PPGTT, * however we also don't want to force any alignment for @@ -855,6 +883,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); list_move_tail(&vma->vm_link, &vma->vm->bound_list); + vma->guard = guard; return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 3fd4512b1f65..ed5c9d682a1b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -128,7 +128,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) /* Internal use only. */ static inline u64 __i915_vma_size(const struct i915_vma *vma) { - return vma->node.size; + return vma->node.size - 2 * vma->guard; } /** @@ -150,7 +150,8 @@ static inline u64 i915_vma_size(const struct i915_vma *vma) /* Internal use only. */ static inline u64 __i915_vma_offset(const struct i915_vma *vma) { - return vma->node.start; + /* The actual start of the vma->pages is after the guard pages. */ + return vma->node.start + vma->guard; } /** diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c index de1342dbfa12..6ba7a7feceba 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.c +++ b/drivers/gpu/drm/i915/i915_vma_resource.c @@ -34,8 +34,8 @@ static struct kmem_cache *slab_vma_resources; * and removal of fences increases as O(ln(pending_unbinds)) instead of * O(1) for a single fence without interval tree. */ -#define VMA_RES_START(_node) ((_node)->start) -#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1) +#define VMA_RES_START(_node) ((_node)->start - (_node)->guard) +#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1) INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, u64, __subtree_last, VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index 54edf3739ca0..c1864e3d0b43 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -57,6 +57,7 @@ struct i915_page_sizes { * @node_size: Size of the allocated range manager node with padding * subtracted. * @vma_size: Bind size. + * @guard: The size of guard area preceding and trailing the bind. * @page_sizes_gtt: Resulting page sizes from the bind operation. * @bound_flags: Flags indicating binding status. * @allocated: Backend private data. TODO: Should move into @private. @@ -115,6 +116,7 @@ struct i915_vma_resource { u64 start; u64 node_size; u64 vma_size; + u32 guard; u32 page_sizes_gtt; u32 bound_flags; @@ -179,6 +181,7 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res) * @start: Offset into the address space of bind range start after padding. * @node_size: Size of the allocated range manager node minus padding. * @size: Bind size. + * @guard: The size of the guard area preceding and trailing the bind. * * Initializes a vma resource allocated using i915_vma_resource_alloc(). * The reason for having separate allocate and initialize function is that @@ -197,7 +200,8 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res, void *private, u64 start, u64 node_size, - u64 size) + u64 size, + u32 guard) { __i915_vma_resource_init(vma_res); vma_res->vm = vm; @@ -215,6 +219,7 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res, vma_res->start = start; vma_res->node_size = node_size; vma_res->vma_size = size; + vma_res->guard = guard; } static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res) diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 0375812792b9..77fda2244d16 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -202,6 +202,7 @@ struct i915_vma { /* mmap-offset associated with fencing for this vma */ struct i915_mmap_offset *mmo; + u32 guard; /* padding allocated around vma->pages within the node */ u32 fence_size; u32 fence_alignment; u32 display_alignment; -- cgit From eea380ad6b4234d70db544b15bcdcd4e76bc6136 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Dec 2022 00:58:04 +0100 Subject: drm/i915: Refine VT-d scanout workaround VT-d may cause overfetch of the scanout PTE, both before and after the vma (depending on the scanout orientation). bspec recommends that we provide a tile-row in either directions, and suggests using 168 PTE, warning that the accesses will wrap around the ends of the GGTT. Currently, we fill the entire GGTT with scratch pages when using VT-d to always ensure there are valid entries around every vma, including scanout. However, writing every PTE is slow as on recent devices we perform 8MiB of uncached writes, incurring an extra 100ms during resume. If instead we focus on only putting guard pages around scanout, we can avoid touching the whole GGTT. To avoid having to introduce extra nodes around each scanout vma, we adjust the scanout drm_mm_node to be smaller than the allocated space, and fixup the extra PTE during dma binding. Signed-off-by: Chris Wilson Signed-off-by: Tejas Upadhyay Signed-off-by: Tvrtko Ursulin Signed-off-by: Andi Shyti Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221130235805.221010-5-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 13 +++++++++++++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 25 +------------------------ 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 850776a783ac..9969e687ad85 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -17,6 +17,8 @@ #include "i915_gem_object.h" #include "i915_vma.h" +#define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ + static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); @@ -424,6 +426,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (ret) return ERR_PTR(ret); + /* VT-d may overfetch before/after the vma, so pad with scratch */ + if (intel_scanout_needs_vtd_wa(i915)) { + unsigned int guard = VTD_GUARD; + + if (i915_gem_object_is_tiled(obj)) + guard = max(guard, + i915_gem_object_get_tile_row_size(obj)); + + flags |= PIN_OFFSET_GUARD | guard; + } + /* * As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 0292937e0925..cf9b153f6f66 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -376,27 +376,6 @@ static void nop_clear_range(struct i915_address_space *vm, { } -static void gen8_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch[0]->encode; - gen8_pte_t __iomem *gtt_base = - (gen8_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - for (i = 0; i < num_entries; i++) - gen8_set_pte(>t_base[i], scratch_pte); -} - static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) { /* @@ -968,8 +947,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.cleanup = gen6_gmch_remove; ggtt->vm.insert_page = gen8_ggtt_insert_page; ggtt->vm.clear_range = nop_clear_range; - if (intel_scanout_needs_vtd_wa(i915)) - ggtt->vm.clear_range = gen8_ggtt_clear_range; ggtt->vm.insert_entries = gen8_ggtt_insert_entries; @@ -1131,7 +1108,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.clear_range = nop_clear_range; - if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) + if (!HAS_FULL_PPGTT(i915)) ggtt->vm.clear_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; -- cgit From de3a9ab97069488b23427726e86b8628f4fe278e Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 1 Dec 2022 00:58:05 +0100 Subject: Revert "drm/i915: Improve on suspend / resume time with VT-d enabled" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2ef6efa79fecd5e3457b324155d35524d95f2b6b. Checking the presence if the IRST (Intel Rapid Start Technology) through the ACPI to decide whether to rebuild or not the GGTT puts us at the mercy of the boot firmware and we need to unnecessarily rely on third parties. Because now we avoid adding scratch pages to the entire GGTT we don't need this hack anymore. Signed-off-by: Andi Shyti Cc: Thomas Hellström Cc: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221130235805.221010-6-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 69 +++++++----------------------------- drivers/gpu/drm/i915/gt/intel_gtt.h | 24 ------------- drivers/gpu/drm/i915/i915_driver.c | 16 --------- 3 files changed, 13 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index cf9b153f6f66..0c7fe360f873 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -27,13 +27,6 @@ #include "intel_gtt.h" #include "gen8_ppgtt.h" -static inline bool suspend_retains_ptes(struct i915_address_space *vm) -{ - return GRAPHICS_VER(vm->i915) >= 8 && - !HAS_LMEM(vm->i915) && - vm->is_ggtt; -} - static void i915_ggtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -105,23 +98,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) return 0; } -/* - * Return the value of the last GGTT pte cast to an u64, if - * the system is supposed to retain ptes across resume. 0 otherwise. - */ -static u64 read_last_pte(struct i915_address_space *vm) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *ptep; - - if (!suspend_retains_ptes(vm)) - return 0; - - GEM_BUG_ON(GRAPHICS_VER(vm->i915) < 8); - ptep = (typeof(ptep))ggtt->gsm + (ggtt_total_entries(ggtt) - 1); - return readq(ptep); -} - /** * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM * @vm: The VM to suspend the mappings for @@ -185,10 +161,7 @@ retry: i915_gem_object_unlock(obj); } - if (!suspend_retains_ptes(vm)) - vm->clear_range(vm, 0, vm->total); - else - i915_vm_to_ggtt(vm)->probed_pte = read_last_pte(vm); + vm->clear_range(vm, 0, vm->total); vm->skip_pte_rewrite = save_skip_rewrite; @@ -545,8 +518,6 @@ static int init_ggtt(struct i915_ggtt *ggtt) struct drm_mm_node *entry; int ret; - ggtt->pte_lost = true; - /* * GuC requires all resources that we're sharing with it to be placed in * non-WOPCM memory. If GuC is not present or not in use we still need a @@ -1243,20 +1214,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) { struct i915_vma *vma; bool write_domain_objs = false; - bool retained_ptes; drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); - /* - * First fill our portion of the GTT with scratch pages if - * they were not retained across suspend. - */ - retained_ptes = suspend_retains_ptes(vm) && - !i915_vm_to_ggtt(vm)->pte_lost && - !GEM_WARN_ON(i915_vm_to_ggtt(vm)->probed_pte != read_last_pte(vm)); - - if (!retained_ptes) - vm->clear_range(vm, 0, vm->total); + /* First fill our portion of the GTT with scratch pages */ + vm->clear_range(vm, 0, vm->total); /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &vm->bound_list, vm_link) { @@ -1265,16 +1227,16 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) atomic_read(&vma->flags) & I915_VMA_BIND_MASK; GEM_BUG_ON(!was_bound); - if (!retained_ptes) { - /* - * Clear the bound flags of the vma resource to allow - * ptes to be repopulated. - */ - vma->resource->bound_flags = 0; - vma->ops->bind_vma(vm, NULL, vma->resource, - obj ? obj->cache_level : 0, - was_bound); - } + + /* + * Clear the bound flags of the vma resource to allow + * ptes to be repopulated. + */ + vma->resource->bound_flags = 0; + vma->ops->bind_vma(vm, NULL, vma->resource, + obj ? obj->cache_level : 0, + was_bound); + if (obj) { /* only used during resume => exclusive access */ write_domain_objs |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; @@ -1301,8 +1263,3 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) intel_ggtt_restore_fences(ggtt); } - -void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val) -{ - to_gt(i915)->ggtt->pte_lost = val; -} diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 7e5b888c1b68..5a775310d3fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -355,19 +355,6 @@ struct i915_ggtt { bool do_idle_maps; - /** - * @pte_lost: Are ptes lost on resume? - * - * Whether the system was recently restored from hibernate and - * thus may have lost pte content. - */ - bool pte_lost; - - /** - * @probed_pte: Probed pte value on suspend. Re-checked on resume. - */ - u64 probed_pte; - int mtrr; /** Bit 6 swizzling required for X tiling */ @@ -602,17 +589,6 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); -/** - * i915_ggtt_mark_pte_lost - Mark ggtt ptes as lost or clear such a marking - * @i915 The device private. - * @val whether the ptes should be marked as lost. - * - * In some cases pte content is retained across suspend, but typically lost - * across hibernate. Typically they should be marked as lost on - * hibernation restore and such marking cleared on suspend. - */ -void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val); - void fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 4e1bb3c23c63..4cc3ced83959 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -102,9 +102,6 @@ #include "intel_region_ttm.h" #include "vlv_suspend.h" -/* Intel Rapid Start Technology ACPI device name */ -static const char irst_name[] = "INT3392"; - static const struct drm_driver i915_drm_driver; static void i915_release_bridge_dev(struct drm_device *dev, @@ -1496,8 +1493,6 @@ static int i915_pm_suspend(struct device *kdev) return -ENODEV; } - i915_ggtt_mark_pte_lost(i915, false); - if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -1550,14 +1545,6 @@ static int i915_pm_resume(struct device *kdev) if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - /* - * If IRST is enabled, or if we can't detect whether it's enabled, - * then we must assume we lost the GGTT page table entries, since - * they are not retained if IRST decided to enter S4. - */ - if (!IS_ENABLED(CONFIG_ACPI) || acpi_dev_present(irst_name, NULL, -1)) - i915_ggtt_mark_pte_lost(i915, true); - return i915_drm_resume(&i915->drm); } @@ -1617,9 +1604,6 @@ static int i915_pm_restore_early(struct device *kdev) static int i915_pm_restore(struct device *kdev) { - struct drm_i915_private *i915 = kdev_to_i915(kdev); - - i915_ggtt_mark_pte_lost(i915, true); return i915_pm_resume(kdev); } -- cgit From 3d0f98fa66bc459d0ba516d1d46a0b22e3005244 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 2 Dec 2022 19:14:54 -0800 Subject: drm/i915/hwmon: Silence "mailbox access failed" warning in snb_pcode_read hwm_pcode_read_i1 is called during i915 load. This results in the following warning from snb_pcode_read because POWER_SETUP_SUBCOMMAND_READ_I1 is unsupported on DG1/DG2. [drm:snb_pcode_read [i915]] warning: pcode (read from mbox 47c) \ mailbox access failed for snb_pcode_read_p [i915]: -6 The code handles the unsupported command but the warning in dmesg is a red herring which has resulted in a couple of bugs being filed. Therefore silence the warning by avoiding calling snb_pcode_read_p for DG1/DG2. Signed-off-by: Ashutosh Dixit Reviewed-by: Anshuman Gupta Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20221203031454.1280538-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index c588a17f97e9..cca7a4350ec8 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -293,6 +293,10 @@ static const struct hwmon_channel_info *hwm_gt_info[] = { /* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ static int hwm_pcode_read_i1(struct drm_i915_private *i915, u32 *uval) { + /* Avoid ILLEGAL_SUBCOMMAND "mailbox access failed" warning in snb_pcode_read */ + if (IS_DG1(i915) || IS_DG2(i915)) + return -ENXIO; + return snb_pcode_read_p(&i915->uncore, PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0, uval); } -- cgit From 4050e6f211ea521eb703c921cdb15b905be882b2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 23 Nov 2022 18:49:16 +0200 Subject: drm/i915/gt: remove some limited use register access wrappers Remove rmw_set(), rmw_clear(), clear_register(), rmw_set_fw(), and rmw_clear_fw(). They're just one too many levels of abstraction for register access, for very specific purposes. clear_register() seems like a micro-optimization bypassing the write when the register is already clear, but that trick has ceased to work since commit 06b975d58fd6 ("drm/i915: make intel_uncore_rmw() write unconditionally"). Just clear the register in the most obvious way. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Reviewed-by: Matt Roper Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20221123164916.4128733-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 29 +++++++---------------------- drivers/gpu/drm/i915/gt/intel_reset.c | 18 ++++-------------- 2 files changed, 11 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 42b9e2bc7477..2bf2ec7f30e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -218,21 +218,6 @@ out: return ret; } -static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) -{ - intel_uncore_rmw(uncore, reg, 0, set); -} - -static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) -{ - intel_uncore_rmw(uncore, reg, clr, 0); -} - -static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) -{ - intel_uncore_rmw(uncore, reg, 0, 0); -} - static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) { GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); @@ -258,14 +243,14 @@ intel_gt_clear_error_registers(struct intel_gt *gt, u32 eir; if (GRAPHICS_VER(i915) != 2) - clear_register(uncore, PGTBL_ER); + intel_uncore_write(uncore, PGTBL_ER, 0); if (GRAPHICS_VER(i915) < 4) - clear_register(uncore, IPEIR(RENDER_RING_BASE)); + intel_uncore_write(uncore, IPEIR(RENDER_RING_BASE), 0); else - clear_register(uncore, IPEIR_I965); + intel_uncore_write(uncore, IPEIR_I965, 0); - clear_register(uncore, EIR); + intel_uncore_write(uncore, EIR, 0); eir = intel_uncore_read(uncore, EIR); if (eir) { /* @@ -273,7 +258,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, * mask them. */ drm_dbg(>->i915->drm, "EIR stuck: 0x%08x, masking\n", eir); - rmw_set(uncore, EMR, eir); + intel_uncore_rmw(uncore, EMR, 0, eir); intel_uncore_write(uncore, GEN2_IIR, I915_MASTER_ERROR_INTERRUPT); } @@ -283,10 +268,10 @@ intel_gt_clear_error_registers(struct intel_gt *gt, RING_FAULT_VALID, 0); intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); } else if (GRAPHICS_VER(i915) >= 12) { - rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_rmw(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID, 0); intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); } else if (GRAPHICS_VER(i915) >= 8) { - rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); + intel_uncore_rmw(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID, 0); intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); } else if (GRAPHICS_VER(i915) >= 6) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 24736ebee17c..ffde89c5835a 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -35,16 +35,6 @@ /* XXX How to handle concurrent GGTT updates using tiling registers? */ #define RESET_UNDER_STOP_MACHINE 0 -static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) -{ - intel_uncore_rmw_fw(uncore, reg, 0, set); -} - -static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) -{ - intel_uncore_rmw_fw(uncore, reg, clr, 0); -} - static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { struct drm_i915_file_private *file_priv = ctx->file_priv; @@ -212,7 +202,7 @@ static int g4x_do_reset(struct intel_gt *gt, int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw_fw(uncore, VDECCLK_GATE_D, 0, VCP_UNIT_CLOCK_GATE_DISABLE); intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); pci_write_config_byte(pdev, I915_GDRST, @@ -234,7 +224,7 @@ static int g4x_do_reset(struct intel_gt *gt, out: pci_write_config_byte(pdev, I915_GDRST, 0); - rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); + intel_uncore_rmw_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE, 0); intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); return ret; @@ -448,7 +438,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, * to reset it as well (we will unlock it once the reset sequence is * completed). */ - rmw_set_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); + intel_uncore_rmw_fw(uncore, sfc_lock.lock_reg, 0, sfc_lock.lock_bit); ret = __intel_wait_for_register_fw(uncore, sfc_lock.ack_reg, @@ -498,7 +488,7 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) get_sfc_forced_lock_data(engine, &sfc_lock); - rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); + intel_uncore_rmw_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit, 0); } static int __gen11_reset_engines(struct intel_gt *gt, -- cgit From 95c713d722017b26e301303713d638e0b95b1f68 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Wed, 23 Nov 2022 15:53:42 -0800 Subject: drm/i915/perf: Do not parse context image for HSW An earlier commit introduced a mechanism to parse the context image to find the OA context control offset. This resulted in an NPD on haswell when gem_context was passed into i915_perf_open_ioctl params. Haswell does not support logical ring contexts, so ensure that the context image is parsed only for platforms with logical ring contexts and also validate lrc_reg_state. v2: Fix build failure v3: Fix checkpatch error Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7432 Fixes: a5c3a3cbf029 ("drm/i915/perf: Determine gen12 oa ctx offset at runtime") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221123235342.713068-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index bee6f3b61819..963175fa6ab1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1382,6 +1382,9 @@ static u32 oa_context_image_offset(struct intel_context *ce, u32 reg) u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; u32 *state = ce->lrc_reg_state; + if (drm_WARN_ON(&ce->engine->i915->drm, !state)) + return U32_MAX; + for (offset = 0; offset < len; ) { if (IS_MI_LRI_CMD(state[offset])) { /* @@ -1446,7 +1449,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) if (IS_ERR(ce)) return PTR_ERR(ce); - if (engine_supports_mi_query(stream->engine)) { + if (engine_supports_mi_query(stream->engine) && + HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) { /* * We are enabling perf query here. If we don't find the context * offset here, just return an error. -- cgit From 242c4b91cd35b66c4aed9286b72352f69c3e4050 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 8 Dec 2022 12:05:16 -0800 Subject: drm/i915/uc: Introduce GSC FW On MTL the GSC FW needs to be loaded on the media GT by the graphics driver. We're going to treat it like a new uc_fw, so add the initial defs and init/fini functions for it. Similarly to the other FWs, the GSC FW path can be overridden via modparam. The modparam can also be used to disable the GSC FW loading by setting it to an empty string. Note that the new structure has been called intel_gsc_uc to avoid confusion with the existing intel_gsc, which instead represents the heci gsc interfaces. v2: re-order Makefile list to be properly sorted (Jani, Alan), better comment (alan) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Jani Nikula Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20221208200521.2928378-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/Makefile | 10 +++-- drivers/gpu/drm/i915/gt/intel_gt.h | 5 +++ drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c | 70 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h | 36 ++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 17 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc.h | 3 ++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 29 +++++++++++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 7 +++- drivers/gpu/drm/i915/i915_params.c | 3 ++ drivers/gpu/drm/i915/i915_params.h | 1 + 10 files changed, 172 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d09248c3b176..83cd12341d97 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -188,9 +188,8 @@ i915-y += \ i915_vma_resource.o # general-purpose microcontroller (GuC) support -i915-y += gt/uc/intel_uc.o \ - gt/uc/intel_uc_debugfs.o \ - gt/uc/intel_uc_fw.o \ +i915-y += \ + gt/uc/intel_gsc_uc.o \ gt/uc/intel_guc.o \ gt/uc/intel_guc_ads.o \ gt/uc/intel_guc_capture.o \ @@ -205,7 +204,10 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ - gt/uc/intel_huc_fw.o + gt/uc/intel_huc_fw.o \ + gt/uc/intel_uc.o \ + gt/uc/intel_uc_debugfs.o \ + gt/uc/intel_uc_fw.o # graphics system controller (GSC) support i915-y += gt/intel_gsc.o diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index e0365d556248..d2f4fbde5f9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -39,6 +39,11 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) return container_of(huc, struct intel_gt, uc.huc); } +static inline struct intel_gt *gsc_uc_to_gt(struct intel_gsc_uc *gsc_uc) +{ + return container_of(gsc_uc, struct intel_gt, uc.gsc); +} + static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) { return container_of(gsc, struct intel_gt, gsc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c new file mode 100644 index 000000000000..7abbbb09f432 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "gt/intel_gt.h" +#include "intel_gsc_uc.h" +#include "i915_drv.h" + +static bool gsc_engine_supported(struct intel_gt *gt) +{ + intel_engine_mask_t mask; + + /* + * We reach here from i915_driver_early_probe for the primary GT before + * its engine mask is set, so we use the device info engine mask for it. + * For other GTs we expect the GT-specific mask to be set before we + * call this function. + */ + GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask); + + if (gt_is_root(gt)) + mask = RUNTIME_INFO(gt->i915)->platform_engine_mask; + else + mask = gt->info.engine_mask; + + return __HAS_ENGINE(mask, GSC0); +} + +void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) +{ + intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC); + + /* we can arrive here from i915_driver_early_probe for primary + * GT with it being not fully setup hence check device info's + * engine mask + */ + if (!gsc_engine_supported(gsc_uc_to_gt(gsc))) { + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); + return; + } +} + +int intel_gsc_uc_init(struct intel_gsc_uc *gsc) +{ + struct drm_i915_private *i915 = gsc_uc_to_gt(gsc)->i915; + int err; + + err = intel_uc_fw_init(&gsc->fw); + if (err) + goto out; + + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + i915_probe_error(i915, "failed with %d\n", err); + return err; +} + +void intel_gsc_uc_fini(struct intel_gsc_uc *gsc) +{ + if (!intel_uc_fw_is_loadable(&gsc->fw)) + return; + + intel_uc_fw_fini(&gsc->fw); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h new file mode 100644 index 000000000000..ea2b1c0713b8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _INTEL_GSC_UC_H_ +#define _INTEL_GSC_UC_H_ + +#include "intel_uc_fw.h" + +struct intel_gsc_uc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; +}; + +void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc); +int intel_gsc_uc_init(struct intel_gsc_uc *gsc); +void intel_gsc_uc_fini(struct intel_gsc_uc *gsc); + +static inline bool intel_gsc_uc_is_supported(struct intel_gsc_uc *gsc) +{ + return intel_uc_fw_is_supported(&gsc->fw); +} + +static inline bool intel_gsc_uc_is_wanted(struct intel_gsc_uc *gsc) +{ + return intel_uc_fw_is_enabled(&gsc->fw); +} + +static inline bool intel_gsc_uc_is_used(struct intel_gsc_uc *gsc) +{ + GEM_BUG_ON(__intel_uc_fw_status(&gsc->fw) == INTEL_UC_FIRMWARE_SELECTED); + return intel_uc_fw_is_available(&gsc->fw); +} + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4f4b519e12c1..6f74586f87d8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -7,6 +7,7 @@ #include "gt/intel_gt.h" #include "gt/intel_reset.h" +#include "intel_gsc_uc.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" @@ -126,6 +127,7 @@ void intel_uc_init_early(struct intel_uc *uc) intel_guc_init_early(&uc->guc); intel_huc_init_early(&uc->huc); + intel_gsc_uc_init_early(&uc->gsc); __confirm_options(uc); @@ -296,15 +298,26 @@ static void __uc_fetch_firmwares(struct intel_uc *uc) INTEL_UC_FIRMWARE_ERROR); } + if (intel_uc_wants_gsc_uc(uc)) { + drm_dbg(&uc_to_gt(uc)->i915->drm, + "Failed to fetch GuC: %d disabling GSC\n", err); + intel_uc_fw_change_status(&uc->gsc.fw, + INTEL_UC_FIRMWARE_ERROR); + } + return; } if (intel_uc_wants_huc(uc)) intel_uc_fw_fetch(&uc->huc.fw); + + if (intel_uc_wants_gsc_uc(uc)) + intel_uc_fw_fetch(&uc->gsc.fw); } static void __uc_cleanup_firmwares(struct intel_uc *uc) { + intel_uc_fw_cleanup_fetch(&uc->gsc.fw); intel_uc_fw_cleanup_fetch(&uc->huc.fw); intel_uc_fw_cleanup_fetch(&uc->guc.fw); } @@ -330,11 +343,15 @@ static int __uc_init(struct intel_uc *uc) if (intel_uc_uses_huc(uc)) intel_huc_init(huc); + if (intel_uc_uses_gsc_uc(uc)) + intel_gsc_uc_init(&uc->gsc); + return 0; } static void __uc_fini(struct intel_uc *uc) { + intel_gsc_uc_fini(&uc->gsc); intel_huc_fini(&uc->huc); intel_guc_fini(&uc->guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index a8f38c2c60e2..5d0f1bcc381e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -6,6 +6,7 @@ #ifndef _INTEL_UC_H_ #define _INTEL_UC_H_ +#include "intel_gsc_uc.h" #include "intel_guc.h" #include "intel_guc_rc.h" #include "intel_guc_submission.h" @@ -27,6 +28,7 @@ struct intel_uc_ops { struct intel_uc { struct intel_uc_ops const *ops; + struct intel_gsc_uc gsc; struct intel_guc guc; struct intel_huc huc; @@ -87,6 +89,7 @@ uc_state_checkers(huc, huc); uc_state_checkers(guc, guc_submission); uc_state_checkers(guc, guc_slpc); uc_state_checkers(guc, guc_rc); +uc_state_checkers(gsc, gsc_uc); #undef uc_state_checkers #undef __uc_state_checker diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 6c83a8b66c9e..1d6249d4b8ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -19,11 +19,18 @@ static inline struct intel_gt * ____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) { - if (type == INTEL_UC_FW_TYPE_GUC) + GEM_BUG_ON(type >= INTEL_UC_FW_NUM_TYPES); + + switch (type) { + case INTEL_UC_FW_TYPE_GUC: return container_of(uc_fw, struct intel_gt, uc.guc.fw); + case INTEL_UC_FW_TYPE_HUC: + return container_of(uc_fw, struct intel_gt, uc.huc.fw); + case INTEL_UC_FW_TYPE_GSC: + return container_of(uc_fw, struct intel_gt, uc.gsc.fw); + } - GEM_BUG_ON(type != INTEL_UC_FW_TYPE_HUC); - return container_of(uc_fw, struct intel_gt, uc.huc.fw); + return NULL; } static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) @@ -246,6 +253,14 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) int i; bool found; + /* + * GSC FW support is still not fully in place, so we're not defining + * the FW blob yet because we don't want the driver to attempt to load + * it until we're ready for it. + */ + if (uc_fw->type == INTEL_UC_FW_TYPE_GSC) + return; + /* * The only difference between the ADL GuC FWs is the HWConfig support. * ADL-N does not support HWConfig, so we should use the same binary as @@ -375,6 +390,11 @@ static const char *__override_huc_firmware_path(struct drm_i915_private *i915) return ""; } +static const char *__override_gsc_firmware_path(struct drm_i915_private *i915) +{ + return i915->params.gsc_firmware_path; +} + static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { const char *path = NULL; @@ -386,6 +406,9 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc case INTEL_UC_FW_TYPE_HUC: path = __override_huc_firmware_path(i915); break; + case INTEL_UC_FW_TYPE_GSC: + path = __override_gsc_firmware_path(i915); + break; } if (unlikely(path)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 3ab87c54a398..f4310516b3e6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -61,9 +61,10 @@ enum intel_uc_fw_status { enum intel_uc_fw_type { INTEL_UC_FW_TYPE_GUC = 0, - INTEL_UC_FW_TYPE_HUC + INTEL_UC_FW_TYPE_HUC, + INTEL_UC_FW_TYPE_GSC, }; -#define INTEL_UC_FW_NUM_TYPES 2 +#define INTEL_UC_FW_NUM_TYPES 3 struct intel_uc_fw_ver { u32 major; @@ -204,6 +205,8 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) return "GuC"; case INTEL_UC_FW_TYPE_HUC: return "HuC"; + case INTEL_UC_FW_TYPE_GSC: + return "GSC"; } return "uC"; } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index d1e4d528cb17..61578f2860cd 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -192,6 +192,9 @@ i915_param_named_unsafe(huc_firmware_path, charp, 0400, i915_param_named_unsafe(dmc_firmware_path, charp, 0400, "DMC firmware path to use instead of the default one"); +i915_param_named_unsafe(gsc_firmware_path, charp, 0400, + "GSC firmware path to use instead of the default one"); + i915_param_named_unsafe(enable_dp_mst, bool, 0400, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 2733cb6cfe09..3f51f90145b6 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -64,6 +64,7 @@ struct drm_printer; param(char *, guc_firmware_path, NULL, 0400) \ param(char *, huc_firmware_path, NULL, 0400) \ param(char *, dmc_firmware_path, NULL, 0400) \ + param(char *, gsc_firmware_path, NULL, 0400) \ param(bool, memtest, false, 0400) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ param(int, edp_vswing, 0, 0400) \ -- cgit From f0ce5178a38918adfef728f43dd544616b05e3e3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 8 Dec 2022 12:05:17 -0800 Subject: drm/i915/gsc: Skip the version check when fetching the GSC FW The current exectation from the FW side is that the driver will query the GSC FW version after the FW is loaded, similarly to what the mei driver does on DG2. However, we're discussing with the FW team if there is a way to extract the version from the bin file before loading, so we can keep the code the same as for older FWs. Since the GSC FW version is not currently required for functionality and is only needed for debug purposes, we can skip the FW version for now at fetch time and add it later on when we've agreed on the approach. v2: rebased on uc_fw version struct changes. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: Rodrigo Vivi #v1 Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20221208200521.2928378-3-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 1d6249d4b8ef..78ab60c07a2b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -655,6 +655,26 @@ static bool guc_check_version_range(struct intel_uc_fw *uc_fw) return true; } +static int check_fw_header(struct intel_gt *gt, + const struct firmware *fw, + struct intel_uc_fw *uc_fw) +{ + int err = 0; + + /* GSC FW version is queried after the FW is loaded */ + if (uc_fw->type == INTEL_UC_FW_TYPE_GSC) + return 0; + + if (uc_fw->loaded_via_gsc) + err = check_gsc_manifest(fw, uc_fw); + else + err = check_ccs_header(gt, fw, uc_fw); + if (err) + return err; + + return 0; +} + /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware @@ -724,17 +744,14 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (err) goto fail; - if (uc_fw->loaded_via_gsc) - err = check_gsc_manifest(fw, uc_fw); - else - err = check_ccs_header(gt, fw, uc_fw); + err = check_fw_header(gt, fw, uc_fw); if (err) goto fail; if (uc_fw->type == INTEL_UC_FW_TYPE_GUC && !guc_check_version_range(uc_fw)) goto fail; - if (uc_fw->file_wanted.ver.major) { + if (uc_fw->file_wanted.ver.major && uc_fw->file_selected.ver.major) { /* Check the file's major version was as it claimed */ if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) { drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", @@ -751,7 +768,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) } } - if (old_ver) { + if (old_ver && uc_fw->file_selected.ver.major) { /* Preserve the version that was really wanted */ memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); -- cgit From 15bd4a67e914dbee6b6ba5dfd32a09cbf7419a5b Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 8 Dec 2022 12:05:18 -0800 Subject: drm/i915/gsc: GSC firmware loading GSC FW is loaded by submitting a dedicated command via the GSC engine. The memory area used for loading the FW is then re-purposed as local memory for the GSC itself, so we use a separate allocation instead of using the one where we keep the firmware stored for reload. The GSC is not reset as part of GT reset, so we only need to load it on first boot and S3/S4 exit. v2: use REG_* for register fields definitions (Rodrigo), move to WQ immediately v3: mark worker function as static Bspec: 63347, 65346 Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Rodrigo Vivi Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20221208200521.2928378-4-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_engine.h | 2 + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 7 + drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 187 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h | 15 +++ drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c | 69 +++++++++- drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h | 11 ++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 20 ++- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 1 + 10 files changed, 313 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 83cd12341d97..f9b6844a4854 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -189,6 +189,7 @@ i915-y += \ # general-purpose microcontroller (GuC) support i915-y += \ + gt/uc/intel_gsc_fw.o \ gt/uc/intel_gsc_uc.o \ gt/uc/intel_guc.o \ gt/uc/intel_guc_ads.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index cbc8b857d5f7..0e24af5efee9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -172,6 +172,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) #define I915_GEM_HWS_PXP 0x60 #define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32)) +#define I915_GEM_HWS_GSC 0x62 +#define I915_GEM_HWS_GSC_ADDR (I915_GEM_HWS_GSC * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 #define I915_HWS_CSB_BUF0_INDEX 0x10 diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index f50ea92910d9..2af1ae3831df 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -21,6 +21,7 @@ #define INSTR_CLIENT_SHIFT 29 #define INSTR_MI_CLIENT 0x0 #define INSTR_BC_CLIENT 0x2 +#define INSTR_GSC_CLIENT 0x2 /* MTL+ */ #define INSTR_RC_CLIENT 0x3 #define INSTR_SUBCLIENT_SHIFT 27 #define INSTR_SUBCLIENT_MASK 0x18000000 @@ -432,6 +433,12 @@ #define COLOR_BLT ((0x2<<29)|(0x40<<22)) #define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) +#define GSC_INSTR(opcode, data, flags) \ + (__INSTR(INSTR_GSC_CLIENT) | (opcode) << 22 | (data) << 9 | (flags)) + +#define GSC_FW_LOAD GSC_INSTR(1, 0, 2) +#define HECI1_FW_LIMIT_VALID (1 << 31) + /* * Used to convert any address to canonical form. * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c new file mode 100644 index 000000000000..f88069ab71ab --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt.h" +#include "gt/intel_ring.h" +#include "intel_gsc_fw.h" + +#define GSC_FW_STATUS_REG _MMIO(0x116C40) +#define GSC_FW_CURRENT_STATE REG_GENMASK(3, 0) +#define GSC_FW_CURRENT_STATE_RESET 0 +#define GSC_FW_INIT_COMPLETE_BIT REG_BIT(9) + +static bool gsc_is_in_reset(struct intel_uncore *uncore) +{ + u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG); + + return REG_FIELD_GET(GSC_FW_CURRENT_STATE, fw_status) == + GSC_FW_CURRENT_STATE_RESET; +} + +bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc) +{ + struct intel_uncore *uncore = gsc_uc_to_gt(gsc)->uncore; + u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG); + + return fw_status & GSC_FW_INIT_COMPLETE_BIT; +} + +static int emit_gsc_fw_load(struct i915_request *rq, struct intel_gsc_uc *gsc) +{ + u32 offset = i915_ggtt_offset(gsc->local); + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GSC_FW_LOAD; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = (gsc->local->size / SZ_4K) | HECI1_FW_LIMIT_VALID; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gsc_fw_load(struct intel_gsc_uc *gsc) +{ + struct intel_context *ce = gsc->ce; + struct i915_request *rq; + int err; + + if (!ce) + return -ENODEV; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + if (ce->engine->emit_init_breadcrumb) { + err = ce->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + err = emit_gsc_fw_load(rq, gsc); + if (err) + goto out_rq; + + err = ce->engine->emit_flush(rq, 0); + +out_rq: + i915_request_get(rq); + + if (unlikely(err)) + i915_request_set_error_once(rq, err); + + i915_request_add(rq); + + if (!err && i915_request_wait(rq, 0, msecs_to_jiffies(500)) < 0) + err = -ETIME; + + i915_request_put(rq); + + if (err) + drm_err(&gsc_uc_to_gt(gsc)->i915->drm, + "Request submission for GSC load failed (%d)\n", + err); + + return err; +} + +static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + void *src, *dst; + + if (!gsc->local) + return -ENODEV; + + obj = gsc->local->obj; + + if (obj->base.size < gsc->fw.size) + return -ENOSPC; + + dst = i915_gem_object_pin_map_unlocked(obj, + i915_coherent_map_type(i915, obj, true)); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + src = i915_gem_object_pin_map_unlocked(gsc->fw.obj, + i915_coherent_map_type(i915, gsc->fw.obj, true)); + if (IS_ERR(src)) { + i915_gem_object_unpin_map(obj); + return PTR_ERR(src); + } + + memset(dst, 0, obj->base.size); + memcpy(dst, src, gsc->fw.size); + + i915_gem_object_unpin_map(gsc->fw.obj); + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gsc_fw_wait(struct intel_gt *gt) +{ + return intel_wait_for_register(gt->uncore, + GSC_FW_STATUS_REG, + GSC_FW_INIT_COMPLETE_BIT, + GSC_FW_INIT_COMPLETE_BIT, + 500); +} + +int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct intel_uc_fw *gsc_fw = &gsc->fw; + int err; + + /* check current fw status */ + if (intel_gsc_uc_fw_init_done(gsc)) { + if (GEM_WARN_ON(!intel_uc_fw_is_loaded(gsc_fw))) + intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED); + return -EEXIST; + } + + if (!intel_uc_fw_is_loadable(gsc_fw)) + return -ENOEXEC; + + /* FW blob is ok, so clean the status */ + intel_uc_fw_sanitize(&gsc->fw); + + if (!gsc_is_in_reset(gt->uncore)) + return -EIO; + + err = gsc_fw_load_prepare(gsc); + if (err) + goto fail; + + err = gsc_fw_load(gsc); + if (err) + goto fail; + + err = gsc_fw_wait(gt); + if (err) + goto fail; + + /* FW is not fully operational until we enable SW proxy */ + intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED); + + drm_info(>->i915->drm, "Loaded GSC firmware %s\n", + gsc_fw->file_selected.path); + + return 0; + +fail: + return intel_uc_fw_mark_load_failed(gsc_fw, err); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h new file mode 100644 index 000000000000..4b5dbb44afb4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _INTEL_GSC_FW_H_ +#define _INTEL_GSC_FW_H_ + +#include + +struct intel_gsc_uc; + +int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc); +bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc); +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index 7abbbb09f432..fd21dbd2663b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -7,8 +7,19 @@ #include "gt/intel_gt.h" #include "intel_gsc_uc.h" +#include "intel_gsc_fw.h" #include "i915_drv.h" +static void gsc_work(struct work_struct *work) +{ + struct intel_gsc_uc *gsc = container_of(work, typeof(*gsc), work); + struct intel_gt *gt = gsc_uc_to_gt(gsc); + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + intel_gsc_uc_fw_upload(gsc); +} + static bool gsc_engine_supported(struct intel_gt *gt) { intel_engine_mask_t mask; @@ -32,6 +43,7 @@ static bool gsc_engine_supported(struct intel_gt *gt) void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) { intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC); + INIT_WORK(&gsc->work, gsc_work); /* we can arrive here from i915_driver_early_probe for primary * GT with it being not fully setup hence check device info's @@ -45,17 +57,46 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) int intel_gsc_uc_init(struct intel_gsc_uc *gsc) { - struct drm_i915_private *i915 = gsc_uc_to_gt(gsc)->i915; + static struct lock_class_key gsc_lock; + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine = gt->engine[GSC0]; + struct intel_context *ce; + struct i915_vma *vma; int err; err = intel_uc_fw_init(&gsc->fw); if (err) goto out; + vma = intel_guc_allocate_vma(>->uc.guc, SZ_8M); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_fw; + } + + gsc->local = vma; + + ce = intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, + I915_GEM_HWS_GSC_ADDR, + &gsc_lock, "gsc_context"); + if (IS_ERR(ce)) { + drm_err(>->i915->drm, + "failed to create GSC CS ctx for FW communication\n"); + err = PTR_ERR(ce); + goto out_vma; + } + + gsc->ce = ce; + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOADABLE); return 0; +out_vma: + i915_vma_unpin_and_release(&gsc->local, 0); +out_fw: + intel_uc_fw_fini(&gsc->fw); out: i915_probe_error(i915, "failed with %d\n", err); return err; @@ -66,5 +107,31 @@ void intel_gsc_uc_fini(struct intel_gsc_uc *gsc) if (!intel_uc_fw_is_loadable(&gsc->fw)) return; + flush_work(&gsc->work); + + if (gsc->ce) + intel_engine_destroy_pinned_context(fetch_and_zero(&gsc->ce)); + + i915_vma_unpin_and_release(&gsc->local, 0); + intel_uc_fw_fini(&gsc->fw); } + +void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc) +{ + if (!intel_uc_fw_is_loadable(&gsc->fw)) + return; + + flush_work(&gsc->work); +} + +void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) +{ + if (!intel_uc_fw_is_loadable(&gsc->fw)) + return; + + if (intel_gsc_uc_fw_init_done(gsc)) + return; + + queue_work(system_unbound_wq, &gsc->work); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h index ea2b1c0713b8..03fd0a8e8db1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h @@ -8,14 +8,25 @@ #include "intel_uc_fw.h" +struct i915_vma; +struct intel_context; + struct intel_gsc_uc { /* Generic uC firmware management */ struct intel_uc_fw fw; + + /* GSC-specific additions */ + struct i915_vma *local; /* private memory for GSC usage */ + struct intel_context *ce; /* for submission to GSC FW via GSC engine */ + + struct work_struct work; /* for delayed load */ }; void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc); int intel_gsc_uc_init(struct intel_gsc_uc *gsc); void intel_gsc_uc_fini(struct intel_gsc_uc *gsc); +void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc); +void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc); static inline bool intel_gsc_uc_is_supported(struct intel_gsc_uc *gsc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6f74586f87d8..9a8a1abf71d7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -7,6 +7,7 @@ #include "gt/intel_gt.h" #include "gt/intel_reset.h" +#include "intel_gsc_fw.h" #include "intel_gsc_uc.h" #include "intel_guc.h" #include "intel_guc_ads.h" @@ -548,6 +549,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(&uc_to_gt(uc)->rps); } + intel_gsc_uc_load_start(&uc->gsc); + drm_info(&i915->drm, "GuC submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); drm_info(&i915->drm, "GuC SLPC %s\n", @@ -676,6 +679,9 @@ void intel_uc_suspend(struct intel_uc *uc) intel_wakeref_t wakeref; int err; + /* flush the GSC worker */ + intel_gsc_uc_suspend(&uc->gsc); + if (!intel_guc_is_ready(guc)) { guc->interrupts.enabled = false; return; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 78ab60c07a2b..d6ff6c584c1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -930,6 +930,20 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) return ret; } +int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + + GEM_BUG_ON(!intel_uc_fw_is_loadable(uc_fw)); + + i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + err); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); + + return err; +} + /** * intel_uc_fw_upload - load uC firmware using custom loader * @uc_fw: uC firmware @@ -966,11 +980,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) return 0; fail: - i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - err); - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); - return err; + return intel_uc_fw_mark_load_failed(uc_fw, err); } static inline bool uc_fw_need_rsa_in_memory(struct intel_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index f4310516b3e6..6ba00e6b3975 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -289,6 +289,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); int intel_uc_fw_init(struct intel_uc_fw *uc_fw); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); +int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif -- cgit From 5a44fcd73498c29293ff2b520d6f02e49c68d59f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 8 Dec 2022 12:05:19 -0800 Subject: drm/i915/gsc: Do a driver-FLR on unload if GSC was loaded If the GSC was loaded, the only way to stop it during the driver unload flow is to do a driver-FLR. The driver-initiated FLR is not the same as PCI config space FLR in that it doesn't reset the SGUnit and doesn't modify the PCI config space. Thus, it doesn't require a re-enumeration of the PCI BARs. However, the driver-FLR does cause a memory wipe of graphics memory on all discrete GPU platforms or a wipe limited to stolen memory on the integrated GPU platforms. We perform the FLR as the last action before releasing the MMIO bar, so that we don't have to care about the consequences of the reset on the unload flow. v2: rename FLR function, add comment to explain FLR impact (Rodrigo), better explain why GSC needs FLR (Alan) Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Alan Previn Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221208200521.2928378-5-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 23 +++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 3 ++ drivers/gpu/drm/i915/intel_uncore.c | 56 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uncore.h | 13 +++++++ 4 files changed, 95 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index f88069ab71ab..e73d4440c5e8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -166,6 +166,29 @@ int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc) if (err) goto fail; + /* + * GSC is only killed by an FLR, so we need to trigger one on unload to + * make sure we stop it. This is because we assign a chunk of memory to + * the GSC as part of the FW load , so we need to make sure it stops + * using it when we release it to the system on driver unload. Note that + * this is not a problem of the unload per-se, because the GSC will not + * touch that memory unless there are requests for it coming from the + * driver; therefore, no accesses will happen while i915 is not loaded, + * but if we re-load the driver then the GSC might wake up and try to + * access that old memory location again. + * Given that an FLR is a very disruptive action (see the FLR function + * for details), we want to do it as the last action before releasing + * the access to the MMIO bar, which means we need to do it as part of + * the primary uncore cleanup. + * An alternative approach to the FLR would be to use a memory location + * that survives driver unload, like e.g. stolen memory, and keep the + * GSC loaded across reloads. However, this requires us to make sure we + * preserve that memory location on unload and then determine and + * reserve its offset on each subsequent load, which is not trivial, so + * it is easier to just kill everything and start fresh. + */ + intel_uncore_set_flr_on_fini(>->i915->uncore); + err = gsc_fw_load(gsc); if (err) goto fail; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8db5293c3873..358036e3fc6e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,6 +186,9 @@ #define GU_CNTL _MMIO(0x101010) #define LMEM_INIT REG_BIT(7) +#define DRIVERFLR REG_BIT(31) +#define GU_DEBUG _MMIO(0x101018) +#define DRIVERFLR_STATUS REG_BIT(31) #define GEN6_STOLEN_RESERVED _MMIO(0x1082C0) #define GEN6_STOLEN_RESERVED_ADDR_MASK (0xFFF << 20) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6c25c9e7090a..6a4006d25949 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2702,6 +2702,59 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, } } +/* + * The driver-initiated FLR is the highest level of reset that we can trigger + * from within the driver. It is different from the PCI FLR in that it doesn't + * fully reset the SGUnit and doesn't modify the PCI config space and therefore + * it doesn't require a re-enumeration of the PCI BARs. However, the + * driver-initiated FLR does still cause a reset of both GT and display and a + * memory wipe of local and stolen memory, so recovery would require a full HW + * re-init and saving/restoring (or re-populating) the wiped memory. Since we + * perform the FLR as the very last action before releasing access to the HW + * during the driver release flow, we don't attempt recovery at all, because + * if/when a new instance of i915 is bound to the device it will do a full + * re-init anyway. + */ +static void driver_initiated_flr(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */ + int ret; + + drm_dbg(&i915->drm, "Triggering Driver-FLR\n"); + + /* + * Make sure any pending FLR requests have cleared by waiting for the + * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS + * to make sure it's not still set from a prior attempt (it's a write to + * clear bit). + * Note that we should never be in a situation where a previous attempt + * is still pending (unless the HW is totally dead), but better to be + * safe in case something unexpected happens + */ + ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms); + if (ret) { + drm_err(&i915->drm, + "Failed to wait for Driver-FLR bit to clear! %d\n", + ret); + return; + } + intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS); + + /* Trigger the actual Driver-FLR */ + intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR); + + ret = intel_wait_for_register_fw(uncore, GU_DEBUG, + DRIVERFLR_STATUS, DRIVERFLR_STATUS, + flr_timeout_ms); + if (ret) { + drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret); + return; + } + + intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS); +} + /* Called via drm-managed action */ void intel_uncore_fini_mmio(struct drm_device *dev, void *data) { @@ -2715,6 +2768,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, void *data) intel_uncore_fw_domains_fini(uncore); iosf_mbi_punit_release(); } + + if (intel_uncore_needs_flr_on_fini(uncore)) + driver_initiated_flr(uncore); } /** diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index e9e38490815d..9ea1f4864a3a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -153,6 +153,7 @@ struct intel_uncore { #define UNCORE_HAS_FPGA_DBG_UNCLAIMED BIT(1) #define UNCORE_HAS_DBG_UNCLAIMED BIT(2) #define UNCORE_HAS_FIFO BIT(3) +#define UNCORE_NEEDS_FLR_ON_FINI BIT(4) const struct intel_forcewake_range *fw_domains_table; unsigned int fw_domains_table_entries; @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } +static inline bool +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore) +{ + return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI; +} + +static inline bool +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore) +{ + return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI; +} + void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915); void intel_uncore_init_early(struct intel_uncore *uncore, struct intel_gt *gt); -- cgit From 6b7cbdbe20b33943b86cb8d752ade6841e83fc42 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Thu, 8 Dec 2022 12:05:20 -0800 Subject: drm/i915/gsc: Disable GSC engine and power well if FW is not selected The GSC CS is only used for communicating with the GSC FW, so no need to initialize it if we're not going to use the FW. If we're not using neither the engine nor the microcontoller, then we can also disable the power well. IMPORTANT: lack of GSC FW breaks media C6 due to opposing requirements between CS setup and forcewake idleness. See in-code comment for detail. Signed-off-by: Jonathan Cavitt Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: John C Harrison Cc: Rodrigo Vivi Cc: Vinay Belgaumkar Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221208200521.2928378-6-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/intel_uncore.c | 3 +++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f63829abf66c..8ede4898905a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -892,6 +892,24 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) engine_mask_apply_compute_fuses(gt); engine_mask_apply_copy_fuses(gt); + /* + * The only use of the GSC CS is to load and communicate with the GSC + * FW, so we have no use for it if we don't have the FW. + * + * IMPORTANT: in cases where we don't have the GSC FW, we have a + * catch-22 situation that breaks media C6 due to 2 requirements: + * 1) once turned on, the GSC power well will not go to sleep unless the + * GSC FW is loaded. + * 2) to enable idling (which is required for media C6) we need to + * initialize the IDLE_MSG register for the GSC CS and do at least 1 + * submission, which will wake up the GSC power well. + */ + if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(>->uc)) { + drm_notice(>->i915->drm, + "No GSC FW selected, disabling GSC CS and media C6\n"); + info->engine_mask &= ~BIT(GSC0); + } + return info->engine_mask; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6a4006d25949..25eac164c4ba 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2700,6 +2700,9 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, if (fw_domains & BIT(domain_id)) fw_domain_fini(uncore, domain_id); } + + if ((fw_domains & BIT(FW_DOMAIN_ID_GSC)) && !HAS_ENGINE(gt, GSC0)) + fw_domain_fini(uncore, FW_DOMAIN_ID_GSC); } /* -- cgit From e6d6e9d0b83d49c330f89ee8d3d1728a930c933e Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 8 Dec 2022 12:05:21 -0800 Subject: drm/i915/mtl: MTL has one GSC CS on the media GT Now that we have the GSC FW support code as a user to the GSC CS, we can add the relevant flag to the engine mask. Note that the engine will still be disabled until we define the GSC FW binary file. Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221208200521.2928378-7-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index cf3b28d71d2b..81952c60f267 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1125,7 +1125,7 @@ static const struct intel_gt_definition xelpmp_extra_gt[] = { .type = GT_MEDIA, .name = "Standalone Media GT", .gsi_offset = MTL_MEDIA_GSI_BASE, - .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2), + .engine_mask = BIT(VECS0) | BIT(VCS0) | BIT(VCS2) | BIT(GSC0), }, {} }; -- cgit From f67986b0119c048c6537cdc93da511f7ffdb2338 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 8 Dec 2022 10:05:42 -0800 Subject: drm/i915/pxp: Promote pxp subsystem to top-level of i915 Starting with MTL, there will be two GT-tiles, a render and media tile. PXP as a service for supporting workloads with protected contexts and protected buffers can be subscribed by process workloads on any tile. However, depending on the platform, only one of the tiles is used for control events pertaining to PXP operation (such as creating the arbitration session and session tear-down). PXP as a global feature is accessible via batch buffer instructions on any engine/tile and the coherency across tiles is handled implicitly by the HW. In fact, for the foreseeable future, we are expecting this single-control-tile for the PXP subsystem. In MTL, it's the standalone media tile (not the root tile) because it contains the VDBOX and KCR engine (among the assets PXP relies on for those events). Looking at the current code design, each tile is represented by the intel_gt structure while the intel_pxp structure currently hangs off the intel_gt structure. Keeping the intel_pxp structure within the intel_gt structure makes some internal functionalities more straight forward but adds code complexity to code readability and maintainibility to many external-to-pxp subsystems which may need to pick the correct intel_gt structure. An example of this would be the intel_pxp_is_active or intel_pxp_is_enabled functionality which should be viewed as a global level inquiry, not a per-gt inquiry. That said, this series promotes the intel_pxp structure into the drm_i915_private structure making it a top-level subsystem and the PXP subsystem will select the control gt internally and keep a pointer to it for internal reference. This promotion comes with two noteworthy changes: 1. Exported pxp functions that are called by external subsystems (such as intel_pxp_enabled/active) will have to check implicitly if i915->pxp is valid as that structure will not be allocated for HW that doesn't support PXP. 2. Since GT is now considered a soft-dependency of PXP we are ensuring that GT init happens before PXP init and vice versa for fini. This causes a minor ordering change whereby we previously called intel_pxp_suspend after intel_uc_suspend but now is before i915_gem_suspend_late but the change is required for correct dependency flows. Additionally, this re-order change doesn't have any impact because at that point in either case, the top level entry to i915 won't observe any PXP events (since the GPU was quiesced during suspend_prepare). Also, any PXP event doesn't really matter when we disable the PXP HW (global GT irqs are already off anyway, so even if there was a bug that generated spurious events we wouldn't see it and we would just clean it up on resume which is okay since the default fallback action for PXP would be to keep the sessions off at this suspend stage). Changes from prior revs: v11: - Reformat a comment (Tvrtko). v10: - Change the code flow for intel_pxp_init to make it more cleaner and readible with better comments explaining the difference between full-PXP-feature vs the partial-teelink inits depending on the platform. Additionally, only do the pxp allocation when we are certain the subsystem is needed. (Tvrtko). v9: - Cosmetic cleanups in supported/enabled/active. (Daniele). - Add comments for intel_pxp_init and pxp_get_ctrl_gt that explain the functional flow for when PXP is not supported but the backend-assets are needed for HuC authentication (Daniele and Tvrtko). - Fix two remaining functions that are accessible outside PXP that need to be checking pxp ptrs before using them: intel_pxp_irq_handler and intel_pxp_huc_load_and_auth (Tvrtko and Daniele). - User helper macro in pxp-debugfs (Tvrtko). v8: - Remove pxp_to_gt macro (Daniele). - Fix a bug in pxp_get_ctrl_gt for the case of MTL and we don't support GSC-FW on it. (Daniele). - Leave i915->pxp as NULL if we dont support PXP and in line with that, do additional validity check on i915->pxp for intel_pxp_is_supported/enabled/active (Daniele). - Remove unncessary include header from intel_gt_debugfs.c and check drm_minor i915->drm.primary (Daniele). - Other cosmetics / minor issues / more comments on suspend flow order change (Daniele). v7: - Drop i915_dev_to_pxp and in intel_pxp_init use 'i915->pxp' through out instead of local variable newpxp. (Rodrigo) - In the case intel_pxp_fini is called during driver unload but after i915 loading failed without pxp being allocated, check i915->pxp before referencing it. (Alan) v6: - Remove HAS_PXP macro and replace it with intel_pxp_is_supported because : [1] introduction of 'ctrl_gt' means we correct this for MTL's upcoming series now. [2] Also, this has little impact globally as its only used by PXP-internal callers at the moment. - Change intel_pxp_init/fini to take in i915 as its input to avoid ptr-to-ptr in init/fini calls.(Jani). - Remove the backpointer from pxp->i915 since we can use pxp->ctrl_gt->i915 if we need it. (Rodrigo). v5: - Switch from series to single patch (Rodrigo). - change function name from pxp_get_kcr_owner_gt to pxp_get_ctrl_gt. - Fix CI BAT failure by removing redundant call to intel_pxp_fini from driver-remove. - NOTE: remaining open still persists on using ptr-to-ptr and back-ptr. v4: - Instead of maintaining intel_pxp as an intel_gt structure member and creating a number of convoluted helpers that takes in i915 as input and redirects to the correct intel_gt or takes any intel_gt and internally replaces with the correct intel_gt, promote it to be a top-level i915 structure. v3: - Rename gt level helper functions to "intel_pxp_is_enabled/ supported/ active_on_gt" (Daniele) - Upgrade _gt_supports_pxp to replace what was intel_gtpxp_is supported as the new intel_pxp_is_supported_on_gt to check for PXP feature support vs the tee support for huc authentication. Fix pxp-debugfs-registration to use only the former to decide support. (Daniele) - Couple minor optimizations. v2: - Avoid introduction of new device info or gt variables and use existing checks / macros to differentiate the correct GT->PXP control ownership (Daniele Ceraolo Spurio) - Don't reuse the updated global-checkers for per-GT callers (such as other files within PXP) to avoid unnecessary GT-reparsing, expose a replacement helper like the prior ones. (Daniele). v1: - Add one more patch to the series for the intel_pxp suspend/resume for similar refactoring References: https://patchwork.freedesktop.org/patch/msgid/20221202011407.4068371-1-alan.previn.teres.alexis@intel.com Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Acked-by: Tvrtko Ursulin Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221208180542.998148-1-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +- drivers/gpu/drm/i915/gem/i915_gem_create.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c | 5 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 2 - drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 8 -- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 - drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c | 2 +- drivers/gpu/drm/i915/i915_driver.c | 18 +++ drivers/gpu/drm/i915/i915_drv.h | 7 +- drivers/gpu/drm/i915/pxp/intel_pxp.c | 128 ++++++++++++++++----- drivers/gpu/drm/i915/pxp/intel_pxp.h | 9 +- drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c | 8 +- drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c | 36 +++--- drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h | 4 +- drivers/gpu/drm/i915/pxp/intel_pxp_huc.c | 9 +- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 18 ++- drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 6 +- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 10 +- drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 31 +++-- drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 8 ++ 23 files changed, 210 insertions(+), 116 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 7cb713043408..dab9fdfa9212 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1841,7 +1841,7 @@ static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - return intel_pxp_key_check(&to_gt(i915)->pxp, obj, false) == 0; + return intel_pxp_key_check(i915->pxp, obj, false) == 0; } static bool pxp_is_borked(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7f2831efc798..46e71f62fcec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -257,7 +257,7 @@ static int proto_context_set_protected(struct drm_i915_private *i915, if (!protected) { pc->uses_protected_content = false; - } else if (!intel_pxp_is_enabled(&to_gt(i915)->pxp)) { + } else if (!intel_pxp_is_enabled(i915->pxp)) { ret = -ENODEV; } else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) || !(pc->user_flags & BIT(UCONTEXT_BANNABLE))) { @@ -271,8 +271,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915, */ pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (!intel_pxp_is_active(&to_gt(i915)->pxp)) - ret = intel_pxp_start(&to_gt(i915)->pxp); + if (!intel_pxp_is_active(i915->pxp)) + ret = intel_pxp_start(i915->pxp); } return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 33673fe7ee0a..005a7f842784 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -384,7 +384,7 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data if (ext.flags) return -EINVAL; - if (!intel_pxp_is_enabled(&to_gt(ext_data->i915)->pxp)) + if (!intel_pxp_is_enabled(ext_data->i915->pxp)) return -ENODEV; ext_data->flags |= I915_BO_PROTECTED; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 16618a548026..2223179ff28b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -871,7 +871,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) */ if (i915_gem_context_uses_protected_content(eb->gem_context) && i915_gem_object_is_protected(obj)) { - err = intel_pxp_key_check(&vm->gt->pxp, obj, true); + err = intel_pxp_key_check(eb->i915->pxp, obj, true); if (err) { i915_gem_object_put(obj); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2bf2ec7f30e4..5dfa95be2b34 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -8,7 +8,6 @@ #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" -#include "pxp/intel_pxp.h" #include "i915_drv.h" #include "i915_perf_oa_regs.h" @@ -746,8 +745,6 @@ int intel_gt_init(struct intel_gt *gt) intel_migrate_init(>->migrate, gt); - intel_pxp_init(>->pxp); - goto out_fw; err_gt: __intel_gt_disable(gt); @@ -787,8 +784,6 @@ void intel_gt_driver_unregister(struct intel_gt *gt) intel_rps_driver_unregister(>->rps); intel_gsc_fini(>->gsc); - intel_pxp_fini(>->pxp); - /* * Upon unregistering the device to prevent any new users, cancel * all in-flight requests so that we can quickly unbind the active diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index dd53641f3637..5fc2df01aa0d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -12,7 +12,6 @@ #include "intel_gt_mcr.h" #include "intel_gt_pm_debugfs.h" #include "intel_sseu_debugfs.h" -#include "pxp/intel_pxp_debugfs.h" #include "uc/intel_uc_debugfs.h" int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val) @@ -99,7 +98,6 @@ void intel_gt_debugfs_register(struct intel_gt *gt) intel_sseu_debugfs_register(gt, root); intel_uc_debugfs_register(>->uc, root); - intel_pxp_debugfs_register(>->pxp, root); } void intel_gt_debugfs_register_files(struct dentry *root, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 6f6b9e04d916..8fac2660e16b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -76,7 +76,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, return gen11_rps_irq_handler(&media_gt->rps, iir); if (instance == OTHER_KCR_INSTANCE) - return intel_pxp_irq_handler(>->pxp, iir); + return intel_pxp_irq_handler(gt->i915->pxp, iir); if (instance == OTHER_GSC_INSTANCE) return intel_gsc_irq_handler(gt, iir); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 833b7682643f..ffccffadc3ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -303,8 +303,6 @@ int intel_gt_resume(struct intel_gt *gt) intel_uc_resume(>->uc); - intel_pxp_resume(>->pxp); - user_forcewake(gt, false); out_fw: @@ -338,8 +336,6 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) { user_forcewake(gt, true); wait_for_suspend(gt); - - intel_pxp_suspend_prepare(>->pxp); } static suspend_state_t pm_suspend_target(void) @@ -364,7 +360,6 @@ void intel_gt_suspend_late(struct intel_gt *gt) GEM_BUG_ON(gt->awake); intel_uc_suspend(>->uc); - intel_pxp_suspend(>->pxp); /* * On disabling the device, we want to turn off HW access to memory @@ -392,7 +387,6 @@ void intel_gt_suspend_late(struct intel_gt *gt) void intel_gt_runtime_suspend(struct intel_gt *gt) { - intel_pxp_runtime_suspend(>->pxp); intel_uc_runtime_suspend(>->uc); GT_TRACE(gt, "\n"); @@ -410,8 +404,6 @@ int intel_gt_runtime_resume(struct intel_gt *gt) if (ret) return ret; - intel_pxp_runtime_resume(>->pxp); - return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index f519aa4a004a..0b6da2aa9718 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -30,7 +30,6 @@ #include "intel_rps_types.h" #include "intel_migrate_types.h" #include "intel_wakeref.h" -#include "pxp/intel_pxp_types.h" #include "intel_wopcm.h" struct drm_i915_private; @@ -275,8 +274,6 @@ struct intel_gt { u8 wb_index; /* Only used on HAS_L3_CCS_READ() platforms */ } mocs; - struct intel_pxp pxp; - /* gt/gtN sysfs */ struct kobject sysfs_gt; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index 4f246416db17..534b0aa43316 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -32,7 +32,7 @@ int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc) GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw)); - ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp); + ret = intel_pxp_huc_load_and_auth(huc_to_gt(huc)->i915->pxp); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 4cc3ced83959..2cb694160b30 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -73,6 +73,8 @@ #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" +#include "pxp/intel_pxp.h" +#include "pxp/intel_pxp_debugfs.h" #include "pxp/intel_pxp_pm.h" #include "i915_file_private.h" @@ -756,6 +758,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) intel_gt_driver_register(gt); + intel_pxp_debugfs_register(dev_priv->pxp); + i915_hwmon_register(dev_priv); intel_display_driver_register(dev_priv); @@ -787,6 +791,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_display_driver_unregister(dev_priv); + intel_pxp_fini(dev_priv); + for_each_gt(gt, dev_priv, i) intel_gt_driver_unregister(gt); @@ -930,6 +936,8 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_cleanup_modeset2; + intel_pxp_init(i915); + ret = intel_modeset_init(i915); if (ret) goto out_cleanup_gem; @@ -1165,6 +1173,8 @@ static int i915_drm_prepare(struct drm_device *dev) { struct drm_i915_private *i915 = to_i915(dev); + intel_pxp_suspend_prepare(i915->pxp); + /* * NB intel_display_suspend() may issue new requests after we've * ostensibly marked the GPU as ready-to-sleep here. We need to @@ -1245,6 +1255,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) disable_rpm_wakeref_asserts(rpm); + intel_pxp_suspend(dev_priv->pxp); + i915_gem_suspend_late(dev_priv); for_each_gt(gt, dev_priv, i) @@ -1357,6 +1369,8 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev_priv); + intel_pxp_resume(dev_priv->pxp); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); intel_hpd_init(dev_priv); @@ -1627,6 +1641,8 @@ static int intel_runtime_suspend(struct device *kdev) */ i915_gem_runtime_suspend(dev_priv); + intel_pxp_runtime_suspend(dev_priv->pxp); + for_each_gt(gt, dev_priv, i) intel_gt_runtime_suspend(gt); @@ -1731,6 +1747,8 @@ static int intel_runtime_resume(struct device *kdev) for_each_gt(gt, dev_priv, i) intel_gt_runtime_resume(gt); + intel_pxp_runtime_resume(dev_priv->pxp); + /* * On VLV/CHV display interrupts are part of the display * power well, so hpd is reinitialized from there. For diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7e3820d2c404..32b2b85685bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,6 +72,7 @@ struct intel_encoder; struct intel_limit; struct intel_overlay_error_state; struct vlv_s0ix_state; +struct intel_pxp; #define I915_GEM_GPU_DOMAINS \ (I915_GEM_DOMAIN_RENDER | \ @@ -379,6 +380,8 @@ struct drm_i915_private { struct file *mmap_singleton; } gem; + struct intel_pxp *pxp; + u8 pch_ssc_use; /* For i915gm/i945gm vblank irq workaround */ @@ -933,10 +936,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_GLOBAL_MOCS_REGISTERS(dev_priv) (INTEL_INFO(dev_priv)->has_global_mocs) -#define HAS_PXP(dev_priv) ((IS_ENABLED(CONFIG_DRM_I915_PXP) && \ - INTEL_INFO(dev_priv)->has_pxp) && \ - VDBOX_MASK(to_gt(dev_priv))) - #define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) #define HAS_GMD_ID(i915) (INTEL_INFO(i915)->has_gmd_id) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 5efe61f67546..cfc9af8b3d21 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -3,13 +3,19 @@ * Copyright(c) 2020 Intel Corporation. */ #include + +#include "gem/i915_gem_context.h" + +#include "gt/intel_context.h" +#include "gt/intel_gt.h" + +#include "i915_drv.h" + #include "intel_pxp.h" #include "intel_pxp_irq.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" -#include "gem/i915_gem_context.h" -#include "gt/intel_context.h" -#include "i915_drv.h" +#include "intel_pxp_types.h" /** * DOC: PXP @@ -39,19 +45,19 @@ * performed via the mei_pxp component module. */ -struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) +bool intel_pxp_is_supported(const struct intel_pxp *pxp) { - return container_of(pxp, struct intel_gt, pxp); + return IS_ENABLED(CONFIG_DRM_I915_PXP) && pxp; } bool intel_pxp_is_enabled(const struct intel_pxp *pxp) { - return pxp->ce; + return IS_ENABLED(CONFIG_DRM_I915_PXP) && pxp && pxp->ce; } bool intel_pxp_is_active(const struct intel_pxp *pxp) { - return pxp->arb_is_valid; + return IS_ENABLED(CONFIG_DRM_I915_PXP) && pxp && pxp->arb_is_valid; } /* KCR register definitions */ @@ -74,7 +80,7 @@ static void kcr_pxp_disable(struct intel_gt *gt) static int create_vcs_context(struct intel_pxp *pxp) { static struct lock_class_key pxp_lock; - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; struct intel_engine_cs *engine; struct intel_context *ce; int i; @@ -109,7 +115,7 @@ static void destroy_vcs_context(struct intel_pxp *pxp) static void pxp_init_full(struct intel_pxp *pxp) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; int ret; /* @@ -138,31 +144,97 @@ out_context: destroy_vcs_context(pxp); } -void intel_pxp_init(struct intel_pxp *pxp) +static struct intel_gt *find_gt_for_required_teelink(struct drm_i915_private *i915) { - struct intel_gt *gt = pxp_to_gt(pxp); + /* + * NOTE: Only certain platforms require PXP-tee-backend dependencies + * for HuC authentication. For now, its limited to DG2. + */ + if (IS_ENABLED(CONFIG_INTEL_MEI_PXP) && IS_ENABLED(CONFIG_INTEL_MEI_GSC) && + intel_huc_is_loaded_by_gsc(&i915->gt0.uc.huc) && intel_uc_uses_huc(&i915->gt0.uc)) + return &i915->gt0; - /* we rely on the mei PXP module */ - if (!IS_ENABLED(CONFIG_INTEL_MEI_PXP)) - return; + return NULL; +} + +static struct intel_gt *find_gt_for_required_protected_content(struct drm_i915_private *i915) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_PXP) || !INTEL_INFO(i915)->has_pxp) + return NULL; /* - * If HuC is loaded by GSC but PXP is disabled, we can skip the init of - * the full PXP session/object management and just init the tee channel. + * For MTL onwards, PXP-controller-GT needs to have a valid GSC engine + * on the media GT. NOTE: if we have a media-tile with a GSC-engine, + * the VDBOX is already present so skip that check */ - if (HAS_PXP(gt->i915)) - pxp_init_full(pxp); - else if (intel_huc_is_loaded_by_gsc(>->uc.huc) && intel_uc_uses_huc(>->uc)) - intel_pxp_tee_component_init(pxp); + if (i915->media_gt && HAS_ENGINE(i915->media_gt, GSC0)) + return i915->media_gt; + + /* + * Else we rely on mei-pxp module but only on legacy platforms + * prior to having separate media GTs and has a valid VDBOX. + */ + if (IS_ENABLED(CONFIG_INTEL_MEI_PXP) && !i915->media_gt && VDBOX_MASK(&i915->gt0)) + return &i915->gt0; + + return NULL; } -void intel_pxp_fini(struct intel_pxp *pxp) +int intel_pxp_init(struct drm_i915_private *i915) { - pxp->arb_is_valid = false; + struct intel_gt *gt; + bool is_full_feature = false; - intel_pxp_tee_component_fini(pxp); + /* + * NOTE: Get the ctrl_gt before checking intel_pxp_is_supported since + * we still need it if PXP's backend tee transport is needed. + */ + gt = find_gt_for_required_protected_content(i915); + if (gt) + is_full_feature = true; + else + gt = find_gt_for_required_teelink(i915); - destroy_vcs_context(pxp); + if (!gt) + return -ENODEV; + + /* + * At this point, we will either enable full featured PXP capabilities + * including session and object management, or we will init the backend tee + * channel for internal users such as HuC loading by GSC + */ + i915->pxp = kzalloc(sizeof(*i915->pxp), GFP_KERNEL); + if (!i915->pxp) + return -ENOMEM; + + i915->pxp->ctrl_gt = gt; + + /* + * If full PXP feature is not available but HuC is loaded by GSC on pre-MTL + * such as DG2, we can skip the init of the full PXP session/object management + * and just init the tee channel. + */ + if (is_full_feature) + pxp_init_full(i915->pxp); + else + intel_pxp_tee_component_init(i915->pxp); + + return 0; +} + +void intel_pxp_fini(struct drm_i915_private *i915) +{ + if (!i915->pxp) + return; + + i915->pxp->arb_is_valid = false; + + intel_pxp_tee_component_fini(i915->pxp); + + destroy_vcs_context(i915->pxp); + + kfree(i915->pxp); + i915->pxp = NULL; } void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp) @@ -173,7 +245,7 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp) static void pxp_queue_termination(struct intel_pxp *pxp) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; /* * We want to get the same effect as if we received a termination @@ -238,13 +310,13 @@ unlock: void intel_pxp_init_hw(struct intel_pxp *pxp) { - kcr_pxp_enable(pxp_to_gt(pxp)); + kcr_pxp_enable(pxp->ctrl_gt); intel_pxp_irq_enable(pxp); } void intel_pxp_fini_hw(struct intel_pxp *pxp) { - kcr_pxp_disable(pxp_to_gt(pxp)); + kcr_pxp_disable(pxp->ctrl_gt); intel_pxp_irq_disable(pxp); } @@ -278,7 +350,7 @@ int intel_pxp_key_check(struct intel_pxp *pxp, void intel_pxp_invalidate(struct intel_pxp *pxp) { - struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; struct i915_gem_context *ctx, *cn; /* ban all contexts marked as protected */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 2da309088c6d..04440fada711 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -9,15 +9,16 @@ #include #include -struct intel_pxp; struct drm_i915_gem_object; +struct drm_i915_private; +struct intel_pxp; -struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp); +bool intel_pxp_is_supported(const struct intel_pxp *pxp); bool intel_pxp_is_enabled(const struct intel_pxp *pxp); bool intel_pxp_is_active(const struct intel_pxp *pxp); -void intel_pxp_init(struct intel_pxp *pxp); -void intel_pxp_fini(struct intel_pxp *pxp); +int intel_pxp_init(struct drm_i915_private *i915); +void intel_pxp_fini(struct drm_i915_private *i915); void intel_pxp_init_hw(struct intel_pxp *pxp); void intel_pxp_fini_hw(struct intel_pxp *pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c index f41e45763d0d..0eee51c4a772 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c @@ -3,9 +3,6 @@ * Copyright(c) 2020, Intel Corporation. All rights reserved. */ -#include "intel_pxp.h" -#include "intel_pxp_cmd.h" -#include "intel_pxp_session.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gpu_commands.h" @@ -13,6 +10,11 @@ #include "i915_trace.h" +#include "intel_pxp.h" +#include "intel_pxp_cmd.h" +#include "intel_pxp_session.h" +#include "intel_pxp_types.h" + /* stall until prior PXP and MFX/HCP/HUC objects are cmopleted */ #define MFX_WAIT_PXP (MFX_WAIT | \ MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c index 4359e8be4101..4b8e70caa3ad 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c @@ -9,18 +9,20 @@ #include #include "gt/intel_gt_debugfs.h" + #include "i915_drv.h" + #include "intel_pxp.h" #include "intel_pxp_debugfs.h" #include "intel_pxp_irq.h" +#include "intel_pxp_types.h" static int pxp_info_show(struct seq_file *m, void *data) { struct intel_pxp *pxp = m->private; struct drm_printer p = drm_seq_file_printer(m); - bool enabled = intel_pxp_is_enabled(pxp); - if (!enabled) { + if (!intel_pxp_is_enabled(pxp)) { drm_printf(&p, "pxp disabled\n"); return 0; } @@ -30,7 +32,8 @@ static int pxp_info_show(struct seq_file *m, void *data) return 0; } -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(pxp_info); + +DEFINE_SHOW_ATTRIBUTE(pxp_info); static int pxp_terminate_get(void *data, u64 *val) { @@ -41,7 +44,7 @@ static int pxp_terminate_get(void *data, u64 *val) static int pxp_terminate_set(void *data, u64 val) { struct intel_pxp *pxp = data; - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; if (!intel_pxp_is_active(pxp)) return -ENODEV; @@ -59,23 +62,26 @@ static int pxp_terminate_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(pxp_terminate_fops, pxp_terminate_get, pxp_terminate_set, "%llx\n"); -void intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *gt_root) + +void intel_pxp_debugfs_register(struct intel_pxp *pxp) { - static const struct intel_gt_debugfs_file files[] = { - { "info", &pxp_info_fops, NULL }, - { "terminate_state", &pxp_terminate_fops, NULL }, - }; - struct dentry *root; + struct drm_minor *minor; + struct dentry *pxproot; - if (!gt_root) + if (!intel_pxp_is_supported(pxp)) return; - if (!HAS_PXP((pxp_to_gt(pxp)->i915))) + minor = pxp->ctrl_gt->i915->drm.primary; + if (!minor->debugfs_root) return; - root = debugfs_create_dir("pxp", gt_root); - if (IS_ERR(root)) + pxproot = debugfs_create_dir("pxp", minor->debugfs_root); + if (IS_ERR(pxproot)) return; - intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), pxp); + debugfs_create_file("info", 0444, pxproot, + pxp, &pxp_info_fops); + + debugfs_create_file("terminate_state", 0644, pxproot, + pxp, &pxp_terminate_fops); } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h index 7e0c3d2f5d7e..299382b59e66 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h @@ -10,10 +10,10 @@ struct intel_pxp; struct dentry; #ifdef CONFIG_DRM_I915_PXP -void intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *root); +void intel_pxp_debugfs_register(struct intel_pxp *pxp); #else static inline void -intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *root) +intel_pxp_debugfs_register(struct intel_pxp *pxp) { } #endif diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c index 2e1165522950..812c8bc7005e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c @@ -18,8 +18,8 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp) { - struct intel_gt *gt = pxp_to_gt(pxp); - struct intel_huc *huc = >->uc.huc; + struct intel_gt *gt; + struct intel_huc *huc; struct pxp43_start_huc_auth_in huc_in = {0}; struct pxp43_start_huc_auth_out huc_out = {0}; dma_addr_t huc_phys_addr; @@ -27,9 +27,12 @@ int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp) u8 fence_id = 0; int err; - if (!pxp->pxp_component) + if (!pxp || !pxp->pxp_component) return -ENODEV; + gt = pxp->ctrl_gt; + huc = >->uc.huc; + huc_phys_addr = i915_gem_object_get_dma_address(huc->fw.obj, 0); /* write the PXP message into the lmem (the sg list) */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c index c28be430718a..91e9622c07d0 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c @@ -3,14 +3,18 @@ * Copyright(c) 2020 Intel Corporation. */ #include -#include "intel_pxp.h" -#include "intel_pxp_irq.h" -#include "intel_pxp_session.h" + #include "gt/intel_gt_irq.h" #include "gt/intel_gt_regs.h" #include "gt/intel_gt_types.h" + #include "i915_irq.h" #include "i915_reg.h" + +#include "intel_pxp.h" +#include "intel_pxp_irq.h" +#include "intel_pxp_session.h" +#include "intel_pxp_types.h" #include "intel_runtime_pm.h" /** @@ -20,11 +24,13 @@ */ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt; if (GEM_WARN_ON(!intel_pxp_is_enabled(pxp))) return; + gt = pxp->ctrl_gt; + lockdep_assert_held(gt->irq_lock); if (unlikely(!iir)) @@ -62,7 +68,7 @@ static inline void pxp_irq_reset(struct intel_gt *gt) void intel_pxp_irq_enable(struct intel_pxp *pxp) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; spin_lock_irq(gt->irq_lock); @@ -77,7 +83,7 @@ void intel_pxp_irq_enable(struct intel_pxp *pxp) void intel_pxp_irq_disable(struct intel_pxp *pxp) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; /* * We always need to submit a global termination when we re-enable the diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c index 6a7d4e2ee138..892d39cc61c1 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -3,11 +3,13 @@ * Copyright(c) 2020 Intel Corporation. */ +#include "i915_drv.h" + #include "intel_pxp.h" #include "intel_pxp_irq.h" #include "intel_pxp_pm.h" #include "intel_pxp_session.h" -#include "i915_drv.h" +#include "intel_pxp_types.h" void intel_pxp_suspend_prepare(struct intel_pxp *pxp) { @@ -26,7 +28,7 @@ void intel_pxp_suspend(struct intel_pxp *pxp) if (!intel_pxp_is_enabled(pxp)) return; - with_intel_runtime_pm(&pxp_to_gt(pxp)->i915->runtime_pm, wakeref) { + with_intel_runtime_pm(&pxp->ctrl_gt->i915->runtime_pm, wakeref) { intel_pxp_fini_hw(pxp); pxp->hw_state_invalidated = false; } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 85572360c71a..ae413580b81a 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -20,7 +20,7 @@ static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) { - struct intel_uncore *uncore = pxp_to_gt(pxp)->uncore; + struct intel_uncore *uncore = pxp->ctrl_gt->uncore; intel_wakeref_t wakeref; u32 sip = 0; @@ -33,7 +33,7 @@ static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_play) { - struct intel_uncore *uncore = pxp_to_gt(pxp)->uncore; + struct intel_uncore *uncore = pxp->ctrl_gt->uncore; intel_wakeref_t wakeref; u32 mask = BIT(id); int ret; @@ -56,7 +56,7 @@ static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_pla static int pxp_create_arb_session(struct intel_pxp *pxp) { - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; int ret; pxp->arb_is_valid = false; @@ -90,7 +90,7 @@ static int pxp_create_arb_session(struct intel_pxp *pxp) static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) { int ret; - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; /* must mark termination in progress calling this function */ GEM_WARN_ON(pxp->arb_is_valid); @@ -141,7 +141,7 @@ static void pxp_terminate_complete(struct intel_pxp *pxp) static void pxp_session_work(struct work_struct *work) { struct intel_pxp *pxp = container_of(work, typeof(*pxp), session_work); - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; intel_wakeref_t wakeref; u32 events = 0; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index b0c9170b1395..d50354bfb993 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -11,25 +11,20 @@ #include "gem/i915_gem_lmem.h" #include "i915_drv.h" + #include "intel_pxp.h" -#include "intel_pxp_session.h" -#include "intel_pxp_tee.h" #include "intel_pxp_cmd_interface_42.h" #include "intel_pxp_huc.h" - -static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev) -{ - struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); - - return &to_gt(i915)->pxp; -} +#include "intel_pxp_session.h" +#include "intel_pxp_tee.h" +#include "intel_pxp_types.h" static int intel_pxp_tee_io_message(struct intel_pxp *pxp, void *msg_in, u32 msg_in_size, void *msg_out, u32 msg_out_max_size, u32 *msg_out_rcv_size) { - struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; struct i915_pxp_component *pxp_component = pxp->pxp_component; int ret = 0; @@ -79,7 +74,7 @@ int intel_pxp_tee_stream_message(struct intel_pxp *pxp, { /* TODO: for bigger objects we need to use a sg of 4k pages */ const size_t max_msg_size = PAGE_SIZE; - struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; struct i915_pxp_component *pxp_component = pxp->pxp_component; unsigned int offset = 0; struct scatterlist *sg; @@ -127,8 +122,8 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev, struct device *tee_kdev, void *data) { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); - struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); - struct intel_uc *uc = &pxp_to_gt(pxp)->uc; + struct intel_pxp *pxp = i915->pxp; + struct intel_uc *uc = &pxp->ctrl_gt->uc; intel_wakeref_t wakeref; int ret = 0; @@ -164,7 +159,7 @@ static void i915_pxp_tee_component_unbind(struct device *i915_kdev, struct device *tee_kdev, void *data) { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); - struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev); + struct intel_pxp *pxp = i915->pxp; intel_wakeref_t wakeref; if (intel_pxp_is_enabled(pxp)) @@ -183,7 +178,7 @@ static const struct component_ops i915_pxp_tee_component_ops = { static int alloc_streaming_command(struct intel_pxp *pxp) { - struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; struct drm_i915_gem_object *obj = NULL; void *cmd; int err; @@ -244,7 +239,7 @@ static void free_streaming_command(struct intel_pxp *pxp) int intel_pxp_tee_component_init(struct intel_pxp *pxp) { int ret; - struct intel_gt *gt = pxp_to_gt(pxp); + struct intel_gt *gt = pxp->ctrl_gt; struct drm_i915_private *i915 = gt->i915; mutex_init(&pxp->tee_mutex); @@ -271,7 +266,7 @@ out_free: void intel_pxp_tee_component_fini(struct intel_pxp *pxp) { - struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; if (!pxp->pxp_component_added) return; @@ -285,7 +280,7 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp) int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, int arb_session_id) { - struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915; + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; struct pxp42_create_arb_in msg_in = {0}; struct pxp42_create_arb_out msg_out = {0}; int ret; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index f74b1e11a505..7dc5f08d1583 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -12,12 +12,20 @@ #include struct intel_context; +struct intel_gt; struct i915_pxp_component; +struct drm_i915_private; /** * struct intel_pxp - pxp state */ struct intel_pxp { + /** + * @ctrl_gt: poiner to the tile that owns the controls for PXP subsystem assets that + * the VDBOX, the KCR engine (and GSC CS depending on the platform) + */ + struct intel_gt *ctrl_gt; + /** * @pxp_component: i915_pxp_component struct of the bound mei_pxp * module. Only set and cleared inside component bind/unbind functions, -- cgit From 35168a6c4ed53db4f786858bac23b1474fd7d0dc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Dec 2022 12:28:42 +0000 Subject: drm/i915/migrate: Account for the reserved_space If the ring is nearly full when calling into emit_pte(), we might incorrectly trample the reserved_space when constructing the packet to emit the PTEs. This then triggers the GEM_BUG_ON(rq->reserved_space > ring->space) when later submitting the request, since the request itself doesn't have enough space left in the ring to emit things like workarounds, breadcrumbs etc. v2: Fix the whitespace errors Testcase: igt@i915_selftests@live_emit_pte_full_ring Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7535 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6889 Fixes: cf586021642d ("drm/i915/gt: Pipelined page migration") Signed-off-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Andrzej Hajda Cc: Andi Shyti Cc: Nirmoy Das Cc: # v5.15+ Tested-by: Nirmoy Das Reviewed-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221202122844.428006-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index b405a04135ca..b783f6f740c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -342,6 +342,16 @@ static int emit_no_arbitration(struct i915_request *rq) return 0; } +static int max_pte_pkt_size(struct i915_request *rq, int pkt) +{ + struct intel_ring *ring = rq->ring; + + pkt = min_t(int, pkt, (ring->space - rq->reserved_space) / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + return pkt; +} + static int emit_pte(struct i915_request *rq, struct sgt_dma *it, enum i915_cache_level cache_level, @@ -388,8 +398,7 @@ static int emit_pte(struct i915_request *rq, return PTR_ERR(cs); /* Pack as many PTE updates as possible into a single MI command */ - pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_length); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ @@ -422,8 +431,7 @@ static int emit_pte(struct i915_request *rq, } } - pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_rem); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); -- cgit From f7f0ca5788d399e5e523c59fd119df359498864d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 2 Dec 2022 12:28:43 +0000 Subject: drm/i915/selftests: use live_subtests for live_migrate Probably a good idea to do an igt_flush_test() at the end of each subtest, just to be sure the previous work has been flushed and doesn't somehow interfere with the current subtest. Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Andi Shyti Cc: Andrzej Hajda Cc: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221202122844.428006-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/selftest_migrate.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 0dc5309c90a4..1eab025ac002 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -486,7 +486,8 @@ global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) static int live_migrate_copy(void *arg) { - struct intel_migrate *migrate = arg; + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; struct drm_i915_private *i915 = migrate->context->engine->i915; I915_RND_STATE(prng); int i; @@ -507,7 +508,8 @@ static int live_migrate_copy(void *arg) static int live_migrate_clear(void *arg) { - struct intel_migrate *migrate = arg; + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; struct drm_i915_private *i915 = migrate->context->engine->i915; I915_RND_STATE(prng); int i; @@ -593,7 +595,10 @@ static int __thread_migrate_copy(void *arg) static int thread_migrate_copy(void *arg) { - return threaded_migrate(arg, __thread_migrate_copy, 0); + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; + + return threaded_migrate(migrate, __thread_migrate_copy, 0); } static int __thread_global_copy(void *arg) @@ -605,7 +610,10 @@ static int __thread_global_copy(void *arg) static int thread_global_copy(void *arg) { - return threaded_migrate(arg, __thread_global_copy, 0); + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; + + return threaded_migrate(migrate, __thread_global_copy, 0); } static int __thread_migrate_clear(void *arg) @@ -624,12 +632,18 @@ static int __thread_global_clear(void *arg) static int thread_migrate_clear(void *arg) { - return threaded_migrate(arg, __thread_migrate_clear, 0); + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; + + return threaded_migrate(migrate, __thread_migrate_clear, 0); } static int thread_global_clear(void *arg) { - return threaded_migrate(arg, __thread_global_clear, 0); + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; + + return threaded_migrate(migrate, __thread_global_clear, 0); } int intel_migrate_live_selftests(struct drm_i915_private *i915) @@ -647,7 +661,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) if (!gt->migrate.context) return 0; - return i915_subtests(tests, >->migrate); + return intel_gt_live_subtests(tests, gt); } static struct drm_i915_gem_object * -- cgit From e288e178738fff41d90454317d9333d88c263fa1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 2 Dec 2022 12:28:44 +0000 Subject: drm/i915/selftests: exercise emit_pte() with nearly full ring Simple regression test to check that we don't trample the rq->reserved_space when returning from emit_pte(), if the ring is nearly full. v2: Make spinner_kill() static v3: Reduce the ring size further, which should mean we need to execute less noops; hopefully this appeases bsw. Also add some debug logging. v4: Fix the min request construction to account for reserved_space + I915_EMIT_PTE_NUM_DWORDS v5: Use a simple on-stack timer to kill the spinner instead of kthread (Chris) References: https://gitlab.freedesktop.org/drm/intel/-/issues/7535 References: https://gitlab.freedesktop.org/drm/intel/-/issues/6889 Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Andi Shyti Cc: Andrzej Hajda Cc: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221202122844.428006-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c | 145 +++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index b783f6f740c8..e08a739b7091 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -352,6 +352,8 @@ static int max_pte_pkt_size(struct i915_request *rq, int pkt) return pkt; } +#define I915_EMIT_PTE_NUM_DWORDS 6 + static int emit_pte(struct i915_request *rq, struct sgt_dma *it, enum i915_cache_level cache_level, @@ -393,7 +395,7 @@ static int emit_pte(struct i915_request *rq, offset += (u64)rq->engine->instance << 32; - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, I915_EMIT_PTE_NUM_DWORDS); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -416,7 +418,7 @@ static int emit_pte(struct i915_request *rq, intel_ring_advance(rq, cs); intel_ring_update_space(ring); - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, I915_EMIT_PTE_NUM_DWORDS); if (IS_ERR(cs)) return PTR_ERR(cs); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 1eab025ac002..e677f2da093d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" +#include "selftests/igt_spinner.h" #include "selftests/i915_random.h" static const unsigned int sizes[] = { @@ -529,6 +530,149 @@ static int live_migrate_clear(void *arg) return 0; } +struct spinner_timer { + struct timer_list timer; + struct igt_spinner spin; +}; + +static void spinner_kill(struct timer_list *timer) +{ + struct spinner_timer *st = from_timer(st, timer, timer); + + igt_spinner_end(&st->spin); + pr_info("%s\n", __func__); +} + +static int live_emit_pte_full_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_request *rq, *prev; + struct spinner_timer st; + struct sgt_dma it; + int len, sz, err; + u32 *cs; + + /* + * Simple regression test to check that we don't trample the + * rq->reserved_space when returning from emit_pte(), if the ring is + * nearly full. + */ + + if (igt_spinner_init(&st.spin, to_gt(i915))) + return -ENOMEM; + + obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_spinner; + } + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) + goto out_obj; + + ce = intel_migrate_create_context(migrate); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_obj; + } + + ce->ring_size = SZ_4K; /* Not too big */ + + err = intel_context_pin(ce); + if (err) + goto out_put; + + rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&st.spin, rq)) { + err = -EIO; + goto out_unpin; + } + + /* + * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS + + * ring->reserved_space at the end. To actually emit the PTEs we require + * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is + * greater than PAGE_SIZE. The correct behaviour is to wait for more + * ring space in emit_pte(), otherwise we trample on the reserved_space + * resulting in crashes when later submitting the rq. + */ + + prev = NULL; + do { + if (prev) + i915_request_add(rq); + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) - + I915_EMIT_PTE_NUM_DWORDS; + sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) - + I915_EMIT_PTE_NUM_DWORDS); + cs = intel_ring_begin(rq, sz); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_rq; + } + + memset32(cs, MI_NOOP, sz); + cs += sz; + intel_ring_advance(rq, cs); + + pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz); + + prev = rq; + } while (rq->ring->space > (rq->reserved_space + + I915_EMIT_PTE_NUM_DWORDS * sizeof(u32))); + + timer_setup_on_stack(&st.timer, spinner_kill, 0); + mod_timer(&st.timer, jiffies + 2 * HZ); + + /* + * This should wait for the spinner to be killed, otherwise we should go + * down in flames when doing i915_request_add(). + */ + pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space); + it = sg_sgt(obj->mm.pages->sgl); + len = emit_pte(rq, &it, obj->cache_level, false, 0, CHUNK_SZ); + if (!len) { + err = -EINVAL; + goto out_rq; + } + if (len < 0) { + err = len; + goto out_rq; + } + +out_rq: + i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ + del_timer_sync(&st.timer); + destroy_timer_on_stack(&st.timer); +out_unpin: + intel_context_unpin(ce); +out_put: + intel_context_put(ce); +out_obj: + i915_gem_object_put(obj); +out_spinner: + igt_spinner_fini(&st.spin); + return err; +} + struct threaded_migrate { struct intel_migrate *migrate; struct task_struct *tsk; @@ -651,6 +795,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_migrate_copy), SUBTEST(live_migrate_clear), + SUBTEST(live_emit_pte_full_ring), SUBTEST(thread_migrate_copy), SUBTEST(thread_migrate_clear), SUBTEST(thread_global_copy), -- cgit From 89270d002b7440ec5c6e92f7cac524ab7954a016 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 9 Dec 2022 13:14:59 +0100 Subject: drm/i915: remove struct_member macro Since it is used only to get type of member it can be replaced with typeof_member. Signed-off-by: Andrzej Hajda Reviewed-by: Jani Nikula Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221209121459.3496148-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/i915_utils.h | 4 +--- drivers/gpu/drm/i915/intel_runtime_pm.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 6c14d13364bf..b64192d9c7da 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -149,8 +149,6 @@ bool i915_error_injected(void); #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) #define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) -#define struct_member(T, member) (((T *)0)->member) - #define fetch_and_zero(ptr) ({ \ typeof(*ptr) __T = *(ptr); \ *(ptr) = (typeof(*ptr))0; \ @@ -170,7 +168,7 @@ static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) */ #define container_of_user(ptr, type, member) ({ \ void __user *__mptr = (void __user *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), struct_member(type, member)) && \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ !__same_type(*(ptr), void), \ "pointer type mismatch in container_of()"); \ ((type __user *)(__mptr - offsetof(type, member))); }) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h index 98b8b28baaa1..e592e8d6499a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.h +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h @@ -96,7 +96,7 @@ struct intel_runtime_pm { }; #define BITS_PER_WAKEREF \ - BITS_PER_TYPE(struct_member(struct intel_runtime_pm, wakeref_count)) + BITS_PER_TYPE(typeof_member(struct intel_runtime_pm, wakeref_count)) #define INTEL_RPM_WAKELOCK_SHIFT (BITS_PER_WAKEREF / 2) #define INTEL_RPM_WAKELOCK_BIAS (1 << INTEL_RPM_WAKELOCK_SHIFT) #define INTEL_RPM_RAW_WAKEREF_MASK (INTEL_RPM_WAKELOCK_BIAS - 1) -- cgit From b29d26fbcb862526d5047caec82878be2eb75c0f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 12 Dec 2022 17:19:57 +0000 Subject: drm/i915/migrate: fix corner case in CCS aux copying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case of lmem -> lmem transfers, which is currently only possible with small-bar systems, we need to ensure we copy the CCS aux state as-is, rather than nuke it. This should fix some nasty display corruption sometimes seen on DG2 small-bar systems, when also using DG2_RC_CCS_CC for the surface. Fixes: e3afc690188b ("drm/i915/display: consider DG2_RC_CCS_CC when migrating buffers") Signed-off-by: Matthew Auld Cc: Ville Syrjälä Cc: Nirmoy Das Cc: Andrzej Hajda Cc: Shuicheng Lin Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20221212171958.82593-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 37 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index e08a739b7091..3f638f198796 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -839,14 +839,35 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - /* - * While we can't always restore/manage the CCS state, - * we still need to ensure we don't leak the CCS state - * from the previous user, so make sure we overwrite it - * with something. - */ - err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS, - dst_offset, DIRECT_ACCESS, len); + if (src_is_lmem) { + /* + * If the src is already in lmem, then we must + * be doing an lmem -> lmem transfer, and so + * should be safe to directly copy the CCS + * state. In this case we have either + * initialised the CCS aux state when first + * clearing the pages (since it is already + * allocated in lmem), or the user has + * potentially populated it, in which case we + * need to copy the CCS state as-is. + */ + err = emit_copy_ccs(rq, + dst_offset, INDIRECT_ACCESS, + src_offset, INDIRECT_ACCESS, + len); + } else { + /* + * While we can't always restore/manage the CCS + * state, we still need to ensure we don't leak + * the CCS state from the previous user, so make + * sure we overwrite it with something. + */ + err = emit_copy_ccs(rq, + dst_offset, INDIRECT_ACCESS, + dst_offset, DIRECT_ACCESS, + len); + } + if (err) goto out_rq; -- cgit From 95df9cc24bee8a09d39c62bcef4319b984814e18 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 12 Dec 2022 17:19:58 +0000 Subject: drm/i915/ttm: consider CCS for backup objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems we can have one or more framebuffers that are still pinned when suspending lmem, in such a case we end up creating a shmem backup object, instead of evicting the object directly, but this will skip copying the CCS aux state, since we don't allocate the extra storage for the CCS pages as part of the ttm_tt construction. Since we can already deal with pinned objects just fine, it doesn't seem too nasty to just extend to support dealing with the CCS aux state, if the object is a pinned framebuffer. This fixes display corruption (like in gnome-shell) seen on DG2 when returning from suspend. Fixes: da0595ae91da ("drm/i915/migrate: Evict and restore the flatccs capable lmem obj") Signed-off-by: Matthew Auld Cc: Ville Syrjälä Cc: Nirmoy Das Cc: Andrzej Hajda Cc: Shuicheng Lin Cc: # v5.19+ Tested-by: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20221212171958.82593-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 10 ++++++---- drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 18 +++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 733696057761..1a0886b8aaa1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -785,6 +785,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) return false; + if (obj->flags & I915_BO_ALLOC_CCS_AUX) + return true; + for (i = 0; i < obj->mm.n_placements; i++) { /* Compression is not allowed for the objects with smem placement */ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d0d6772e6f36..ab4c2f90a564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -327,16 +327,18 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +#define I915_BO_ALLOC_CCS_AUX BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ - I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) + I915_BO_ALLOC_GPU_ONLY | \ + I915_BO_ALLOC_CCS_AUX) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 07e49f22f2de..7e67742bc65e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -50,6 +50,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, container_of(bo->bdev, typeof(*i915), bdev); struct drm_i915_gem_object *backup; struct ttm_operation_ctx ctx = {}; + unsigned int flags; int err = 0; if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) @@ -65,7 +66,22 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) return 0; - backup = i915_gem_object_create_shmem(i915, obj->base.size); + /* + * It seems that we might have some framebuffers still pinned at this + * stage, but for such objects we might also need to deal with the CCS + * aux state. Make sure we force the save/restore of the CCS state, + * otherwise we might observe display corruption, when returning from + * suspend. + */ + flags = 0; + if (i915_gem_object_needs_ccs_pages(obj)) { + WARN_ON_ONCE(!i915_gem_object_is_framebuffer(obj)); + WARN_ON_ONCE(!pm_apply->allow_gpu); + + flags = I915_BO_ALLOC_CCS_AUX; + } + backup = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + obj->base.size, 0, flags); if (IS_ERR(backup)) return PTR_ERR(backup); -- cgit From 4d5cf7b1680a1e6db327e3c935ef58325cbedb2c Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 14 Dec 2022 08:54:39 +0100 Subject: drm/i915: fix TLB invalidation for Gen12.50 video and compute engines In case of Gen12.50 video and compute engines, TLB_INV registers are masked - to modify one bit, corresponding bit in upper half of the register must be enabled, otherwise nothing happens. Fixes: 77fa9efc16a9 ("drm/i915/xehp: Create separate reg definitions for new MCR registers") Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221214075439.402485-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 5dfa95be2b34..92b075c72536 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1099,9 +1099,15 @@ static void mmio_invalidate_full(struct intel_gt *gt) continue; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + u32 val = BIT(engine->instance); + + if (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS) + val = _MASKED_BIT_ENABLE(val); intel_gt_mcr_multicast_write_fw(gt, xehp_regs[engine->class], - BIT(engine->instance)); + val); } else { rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (!i915_mmio_reg_offset(rb.reg)) -- cgit From 44da2032063502d32129350656934668d11087d1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Dec 2022 15:41:19 -0800 Subject: drm/i915/dg2: Return Wa_22012654132 to just specific steppings Programming of the ENABLE_PREFETCH_INTO_IC bit originally showed up in both the general DG2 tuning guide (applicable to all DG2 variants/steppings) and under Wa_22012654132 (applicable only to specific steppings). It has now been removed from the tuning guide, and the guidance is to only program it in the specific steppings associated with the workaround. Bspec: 68331 Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20221213234119.2963317-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 38746a71fc1f..c1c2ea303e9d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2912,20 +2912,6 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_DG2(i915)) { wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); - - /* - * This is also listed as Wa_22012654132 for certain DG2 - * steppings, but the tuning setting programming is a superset - * since it applies to all DG2 variants and steppings. - * - * Note that register 0xE420 is write-only and cannot be read - * back for verification on DG2 (due to Wa_14012342262), so - * we need to explicitly skip the readback. - */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); } /* @@ -3021,6 +3007,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_18017747507:dg2 */ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } + + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) + /* + * Wa_22012654132 + * + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. + */ + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } static void -- cgit From a4b6e74c88cc9c15257d1aaee8024d8eaa9813e7 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 12 Dec 2022 14:08:59 -0800 Subject: drm/i915/mtl: Resize noa_wait BO size to save restore GPR regs On MTL, gt->scratch was using stolen lmem. An MI_SRM to stolen lmem caused a hang that was attributed to saving and restoring the GPR registers used for noa_wait. Add an additional page in noa_wait BO to save/restore GPR registers for the noa_wait logic. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221212220902.1819159-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_types.h | 6 ------ drivers/gpu/drm/i915/i915_perf.c | 25 +++++++++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 0b6da2aa9718..f08c2556aa25 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -304,12 +304,6 @@ enum intel_gt_scratch_field { /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, - - /* 6 * 8 bytes */ - INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048, - - /* 4 bytes */ - INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096, }; #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 963175fa6ab1..cdd66f21c3ba 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1845,8 +1845,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, for (d = 0; d < dword_count; d++) { *cs++ = cmd; *cs++ = i915_mmio_reg_offset(reg) + 4 * d; - *cs++ = intel_gt_scratch_offset(stream->engine->gt, - offset) + 4 * d; + *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d; *cs++ = 0; } @@ -1879,7 +1878,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) MI_PREDICATE_RESULT_2_ENGINE(base) : MI_PREDICATE_RESULT_1(RENDER_RING_BASE); - bo = i915_gem_object_create_internal(i915, 4096); + /* + * gt->scratch was being used to save/restore the GPR registers, but on + * MTL the scratch uses stolen lmem. An MI_SRM to this memory region + * causes an engine hang. Instead allocate an additional page here to + * save/restore GPR registers + */ + bo = i915_gem_object_create_internal(i915, 8192); if (IS_ERR(bo)) { drm_err(&i915->drm, "Failed to allocate NOA wait batchbuffer\n"); @@ -1913,14 +1918,19 @@ retry: goto err_unpin; } + stream->noa_wait = vma; + +#define GPR_SAVE_OFFSET 4096 +#define PREDICATE_SAVE_OFFSET 4160 + /* Save registers. */ for (i = 0; i < N_CS_GPR; i++) cs = save_restore_register( stream, cs, true /* save */, CS_GPR(i), - INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + GPR_SAVE_OFFSET + 8 * i, 2); cs = save_restore_register( stream, cs, true /* save */, mi_predicate_result, - INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + PREDICATE_SAVE_OFFSET, 1); /* First timestamp snapshot location. */ ts0 = cs; @@ -2036,10 +2046,10 @@ retry: for (i = 0; i < N_CS_GPR; i++) cs = save_restore_register( stream, cs, false /* restore */, CS_GPR(i), - INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + GPR_SAVE_OFFSET + 8 * i, 2); cs = save_restore_register( stream, cs, false /* restore */, mi_predicate_result, - INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + PREDICATE_SAVE_OFFSET, 1); /* And return to the ring. */ *cs++ = MI_BATCH_BUFFER_END; @@ -2049,7 +2059,6 @@ retry: i915_gem_object_flush_map(bo); __i915_gem_object_release_map(bo); - stream->noa_wait = vma; goto out_ww; err_unpin: -- cgit From a6b443020faca5f56d1d28e9d7ceab0e386e9e7f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 12 Dec 2022 14:09:00 -0800 Subject: drm/i915/mtl: Add Wa_14015846243 to fix OA vs CS timestamp mismatch Similar to ACM, OA timestamp that is part of the OA report is shifted when compared to the CS timestamp. Add MTL to the WA. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221212220902.1819159-3-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index cdd66f21c3ba..aedc4cfc46c9 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3139,8 +3139,11 @@ get_sseu_config(struct intel_sseu *out_sseu, */ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) { - /* Wa_18013179988:dg2 */ - if (IS_DG2(i915)) { + /* + * Wa_18013179988:dg2 + * Wa_14015846243:mtl + */ + if (IS_DG2(i915) || IS_METEORLAKE(i915)) { intel_wakeref_t wakeref; u32 reg, shift; -- cgit From d654ae8b9870d3951fd32ff8c60473ee6c1e7d4c Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 12 Dec 2022 14:09:01 -0800 Subject: drm/i915/mtl: Update OA mux whitelist for MTL 0x20cc (WAIT_FOR_RC6_EXIT on other platforms) is repurposed on MTL. Use a separate mux table to verify oa configs passed by user. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221212220902.1819159-4-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index aedc4cfc46c9..b0ada4aca660 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4321,6 +4321,17 @@ static const struct i915_range gen12_oa_mux_regs[] = { {} }; +/* + * Ref: 14010536224: + * 0x20cc is repurposed on MTL, so use a separate array for MTL. + */ +static const struct i915_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ +}; + static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { return reg_in_range_table(addr, gen7_oa_b_counters); @@ -4364,7 +4375,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen12_oa_mux_regs); + if (IS_METEORLAKE(perf->i915)) + return reg_in_range_table(addr, mtl_oa_mux_regs); + else + return reg_in_range_table(addr, gen12_oa_mux_regs); } static u32 mask_reg_value(u32 reg, u32 val) -- cgit From d0fa30be3178724117bee95be4d7c576b246dd7f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 12 Dec 2022 14:09:02 -0800 Subject: drm/i915/mtl: Add OA support by enabling 32 bit OAG formats for MTL Without an entry in oa_init_supported_formats, OA will not be functional in MTL. Enable OA support by enabling 32 bit OAG formats for MTL. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20228 Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221212220902.1819159-5-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b0ada4aca660..6fb47e79294f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4775,6 +4775,7 @@ static void oa_init_supported_formats(struct i915_perf *perf) break; case INTEL_DG2: + case INTEL_METEORLAKE: oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); break; -- cgit From 801fa7a81f6da533cc5442fc40e32c72b76cd42a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 16 Dec 2022 11:34:56 +0000 Subject: drm/i915: improve the catch-all evict to handle lock contention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catch-all evict can fail due to object lock contention, since it only goes as far as trylocking the object, due to us already holding the vm->mutex. Doing a full object lock here can deadlock, since the vm->mutex is always our inner lock. Add another execbuf pass which drops the vm->mutex and then tries to grab the object will the full lock, before then retrying the eviction. This should be good enough for now to fix the immediate regression with userspace seeing -ENOSPC from execbuf due to contended object locks during GTT eviction. v2 (Mani) - Also revamp the docs for the different passes. Testcase: igt@gem_ppgtt@shrink-vs-evict-* Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627 References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570 References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558 Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Thomas Hellström Cc: Tvrtko Ursulin Cc: Andrzej Hajda Cc: Mani Milani Cc: # v5.18+ Reviewed-by: Mani Milani Tested-by: Mani Milani Link: https://patchwork.freedesktop.org/patch/msgid/20221216113456.414183-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 59 ++++++++++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 37 +++++++++++----- drivers/gpu/drm/i915/i915_gem_evict.h | 4 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- 6 files changed, 82 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 2223179ff28b..c0e3e9108fc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -732,32 +732,69 @@ static int eb_reserve(struct i915_execbuffer *eb) bool unpinned; /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 2 phases: + * We have one more buffers that we couldn't bind, which could be due to + * various reasons. To resolve this we have 4 passes, with every next + * level turning the screws tighter: * - * 1. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 2. Bind new objects. + * 0. Unbind all objects that do not match the GTT constraints for the + * execbuffer (fenceable, mappable, alignment etc). Bind all new + * objects. This avoids unnecessary unbinding of later objects in order + * to make room for the earlier objects *unless* we need to defragment. * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. + * 1. Reorder the buffers, where objects with the most restrictive + * placement requirements go first (ignoring fixed location buffers for + * now). For example, objects needing the mappable aperture (the first + * 256M of GTT), should go first vs objects that can be placed just + * about anywhere. Repeat the previous pass. * - * Defragmenting is skipped if all objects are pinned at a fixed location. + * 2. Consider buffers that are pinned at a fixed location. Also try to + * evict the entire VM this time, leaving only objects that we were + * unable to lock. Try again to bind the buffers. (still using the new + * buffer order). + * + * 3. We likely have object lock contention for one or more stubborn + * objects in the VM, for which we need to evict to make forward + * progress (perhaps we are fighting the shrinker?). When evicting the + * VM this time around, anything that we can't lock we now track using + * the busy_bo, using the full lock (after dropping the vm->mutex to + * prevent deadlocks), instead of trylock. We then continue to evict the + * VM, this time with the stubborn object locked, which we can now + * hopefully unbind (if still bound in the VM). Repeat until the VM is + * evicted. Finally we should be able bind everything. */ - for (pass = 0; pass <= 2; pass++) { + for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1) - unpinned = eb_unbind(eb, pass == 2); + unpinned = eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) { - err = i915_gem_evict_vm(eb->context->vm, &eb->ww); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); + mutex_unlock(&eb->context->vm->mutex); + } + if (err) + return err; + } + + if (pass == 3) { +retry: + err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (!err) { + struct drm_i915_gem_object *busy_bo = NULL; + + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); mutex_unlock(&eb->context->vm->mutex); + if (err && busy_bo) { + err = i915_gem_object_lock(busy_bo, &eb->ww); + i915_gem_object_put(busy_bo); + if (!err) + goto retry; + } } if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index d73ba0f5c4c5..4f69bff63068 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ retry: if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) { - ret = i915_gem_evict_vm(&ggtt->vm, &ww); + ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4cfe36b0366b..c02ebd6900ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * @vm: Address space to cleanse * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm * will be able to evict vma's locked by the ww as well. + * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then + * in the event i915_gem_evict_vm() is unable to trylock an object for eviction, + * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop + * the vm->mutex, before trying again to acquire the contended lock. The caller + * also owns a reference to the object. * * This function evicts all vmas from a vm. * @@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo) { int ret = 0; @@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */ - if (dying_vma(vma) || + if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; } - if (!i915_gem_object_trylock(vma->obj, ww)) + if (!i915_gem_object_trylock(vma->obj, ww)) { + if (busy_bo) { + *busy_bo = vma->obj; /* holds ref */ + ret = -EBUSY; + break; + } + i915_gem_object_put(vma->obj); continue; + } __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); @@ -498,25 +511,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break; - ret = 0; /* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } + if (!dying_vma(vma)) + i915_gem_object_put(vma->obj); } list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } i915_gem_object_unlock(vma->obj); + i915_gem_object_put(vma->obj); } } while (ret == 0); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object; int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, - struct i915_gem_ww_ctx *ww); + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo); #endif /* __I915_GEM_EVICT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 34f0e6c923c2..7d044888ac33 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1599,7 +1599,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */ - i915_gem_evict_vm(vm, NULL); + i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg) /* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, NULL); + err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", @@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg) for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, &ww); + err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } -- cgit From 08d34f12fead958d17e32d57d8061c14f9104373 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 13 Dec 2022 13:00:10 +0100 Subject: drm/i915/selftests: Remove hardcoded value with a macro Use MI_USE_GGTT instead of hardcoded value. Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Reviewed-by: Andrzej Hajda Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20221213120010.5857-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index c228fe4aba50..3bef1beec7cb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -222,7 +222,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) } if (GRAPHICS_VER(ctx->engine->i915) >= 8) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; -- cgit From 7ccf9a5386a48b86c1abc0e7cb42f4b8f961af38 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 19 Dec 2022 12:29:33 +0100 Subject: drm/i915: Use helper func to find out map type Use i915_coherent_map_type() function to find out map_type of the shmem obj. v2: handle non-llc platform(Matt) Signed-off-by: Nirmoy Das Reviewed-by: Andrzej Hajda Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20221219112933.21417-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/shmem_utils.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 402f085f3a02..449c9ed44382 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -8,6 +8,7 @@ #include #include +#include "i915_drv.h" #include "gem/i915_gem_object.h" #include "gem/i915_gem_lmem.h" #include "shmem_utils.h" @@ -32,6 +33,8 @@ struct file *shmem_create_from_data(const char *name, void *data, size_t len) struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + enum i915_map_type map_type; struct file *file; void *ptr; @@ -41,8 +44,8 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) return file; } - ptr = i915_gem_object_pin_map_unlocked(obj, i915_gem_object_is_lmem(obj) ? - I915_MAP_WC : I915_MAP_WB); + map_type = i915_coherent_map_type(i915, obj, true); + ptr = i915_gem_object_pin_map_unlocked(obj, map_type); if (IS_ERR(ptr)) return ERR_CAST(ptr); -- cgit From d830e0dc2e2d4826ebc6a429c2bc098848c9eeda Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 21 Dec 2022 11:30:30 -0800 Subject: drm/i915/guc: Fix a static analysis warning A static analyser was complaining about not checking for null pointers. However, the location of the complaint can only be reached in the first place if said pointer is non-null. Basically, if we are using a v69 GuC then the descriptor pool is guaranteed to be alocated at start of day or submission will be disabled with an ENOMEM error. And if we are using a later GuC that does not use a descriptor pool then the v69 submission function would not be called. So, not a possible null at that point in the code. Hence adding a GEM_BUG_ON(!ptr) to keep the tool happy. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20221221193031.687266-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 16218757ff27..34bcf44873c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2533,6 +2533,7 @@ static void prepare_context_registration_info_v69(struct intel_context *ce) i915_gem_object_is_lmem(ce->ring->vma->obj)); desc = __get_lrc_desc_v69(guc, ctx_id); + GEM_BUG_ON(!desc); desc->engine_class = engine_class_to_guc_class(engine->class); desc->engine_submit_mask = engine->logical_mask; desc->hw_context_desc = ce->lrc.lrca; -- cgit From 4071d98b296a5bc5fd4b15ec651bd05800ec9510 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 21 Dec 2022 11:30:31 -0800 Subject: drm/i915/uc: Fix two issues with over-size firmware files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case where a firmware file is too large (e.g. someone downloaded a web page ASCII dump from github...), the firmware object is released but the pointer is not zerod. If no other firmware file was found then release would be called again leading to a double kfree. Also, the size check was only being applied to the initial firmware load not any of the subsequent attempts. So move the check into a wrapper that is used for all loads. Fixes: 016241168dc5 ("drm/i915/uc: use different ggtt pin offsets for uc loads") Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Rodrigo Vivi Cc: Matt Roper Cc: Jani Nikula Cc: Matthew Auld Cc: "Thomas Hellström" Link: https://patchwork.freedesktop.org/patch/msgid/20221221193031.687266-4-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 42 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d6ff6c584c1e..65672ff82605 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -675,6 +675,32 @@ static int check_fw_header(struct intel_gt *gt, return 0; } +static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **fw) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct device *dev = gt->i915->drm.dev; + int err; + + err = firmware_request_nowarn(fw, uc_fw->file_selected.path, dev); + + if (err) + return err; + + if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) { + drm_err(>->i915->drm, + "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); + + /* try to find another blob to load */ + release_firmware(*fw); + *fw = NULL; + return -ENOENT; + } + + return 0; +} + /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware @@ -688,7 +714,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) struct intel_gt *gt = __uc_fw_to_gt(uc_fw); struct drm_i915_private *i915 = gt->i915; struct intel_uc_fw_file file_ideal; - struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; bool old_ver = false; @@ -704,20 +729,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + err = try_firmware_load(uc_fw, &fw); memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal)); - if (!err && fw->size > INTEL_UC_RSVD_GGTT_PER_FW) { - drm_err(&i915->drm, - "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - fw->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); - - /* try to find another blob to load */ - release_firmware(fw); - err = -ENOENT; - } - /* Any error is terminal if overriding. Don't bother searching for older versions */ if (err && intel_uc_fw_is_overridden(uc_fw)) goto fail; @@ -738,7 +752,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) break; } - err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + err = try_firmware_load(uc_fw, &fw); } if (err) -- cgit From f47e6306afd3b625414922361e6b8c1cd6e28c8d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Dec 2022 21:22:47 +0200 Subject: drm/i915/gem: Typecheck page lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to check that we avoid integer overflows when looking up a page, and so fix all the instances where we have mistakenly used a plain integer instead of a more suitable long. Be pedantic and add integer typechecking to the lookup so that we can be sure that we are safe. And it also uses pgoff_t as our page lookups must remain compatible with the page cache, pgoff_t is currently exactly unsigned long. v2: Move added i915_utils's macro into drm_util header (Jani N) v3: Make not use the same macro name on a function. (Mauro) For kernel-doc, macros and functions are handled in the same namespace, the same macro name on a function prevents ever adding documentation for it. v4: Add kernel-doc markups to the kAPI functions and macros (Mauoro) v5: Fix an alignment to match open parenthesis v6: Rebase v10: Use assert_typable instead of exactly_pgoff_t() macro. (Kees) v11: Change the use of assert_typable to assert_same_typable (G.G) v12: Change to use static_assert(__castable_to_type(n ,T)) style since the assert_same_typable() macro has been dropped. (G.G) v13: Change the use of __castable_to_type() to castable_to_type() Remove an unnecessary header include line. (G.G) v16: Fix "ERROR:SPACING" Checkpatch report (G.G) Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Thomas Hellström Cc: Kees Cook Signed-off-by: Chris Wilson Co-developed-by: Gwan-gyeong Mun Signed-off-by: Gwan-gyeong Mun Reviewed-by: Nirmoy Das (v2) Reviewed-by: Mauro Carvalho Chehab (v3) Reviewed-by: Andrzej Hajda (v5) Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221228192252.917299-2-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 293 +++++++++++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 27 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 12 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 +- .../gpu/drm/i915/gem/selftests/i915_gem_object.c | 8 +- drivers/gpu/drm/i915/i915_gem.c | 18 +- drivers/gpu/drm/i915/i915_vma.c | 8 +- 9 files changed, 322 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1a0886b8aaa1..e6d4efde4fc5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -427,10 +427,11 @@ void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj, static void i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) { + pgoff_t idx = offset >> PAGE_SHIFT; void *src_map; void *src_ptr; - src_map = kmap_atomic(i915_gem_object_get_page(obj, offset >> PAGE_SHIFT)); + src_map = kmap_atomic(i915_gem_object_get_page(obj, idx)); src_ptr = src_map + offset_in_page(offset); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) @@ -443,9 +444,10 @@ i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, static void i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) { + pgoff_t idx = offset >> PAGE_SHIFT; + dma_addr_t dma = i915_gem_object_get_dma_address(obj, idx); void __iomem *src_map; void __iomem *src_ptr; - dma_addr_t dma = i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT); src_map = io_mapping_map_wc(&obj->mm.region->iomap, dma - obj->mm.region->region.start, @@ -484,6 +486,7 @@ static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj) */ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) { + GEM_BUG_ON(overflows_type(offset >> PAGE_SHIFT, pgoff_t)); GEM_BUG_ON(offset >= obj->base.size); GEM_BUG_ON(offset_in_page(offset) > PAGE_SIZE - size); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 3db53769864c..f7b562e255a5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -27,8 +27,10 @@ enum intel_region_id; * spot such a local variable, please consider fixing! * * Aside from our own locals (for which we have no excuse!): - * - sg_table embeds unsigned int for num_pages - * - get_user_pages*() mixed ints with longs + * - sg_table embeds unsigned int for nents + * + * We can check for invalidly typed locals with typecheck(), see for example + * i915_gem_object_get_sg(). */ #define GEM_CHECK_SIZE_OVERFLOW(sz) \ GEM_WARN_ON((sz) >> PAGE_SHIFT > INT_MAX) @@ -363,44 +365,289 @@ i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj) int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, unsigned int tiling, unsigned int stride); +/** + * __i915_gem_object_page_iter_get_sg - helper to find the target scatterlist + * pointer and the target page position using pgoff_t n input argument and + * i915_gem_object_page_iter + * @obj: i915 GEM buffer object + * @iter: i915 GEM buffer object page iterator + * @n: page offset + * @offset: searched physical offset, + * it will be used for returning physical page offset value + * + * Context: Takes and releases the mutex lock of the i915_gem_object_page_iter. + * Takes and releases the RCU lock to search the radix_tree of + * i915_gem_object_page_iter. + * + * Returns: + * The target scatterlist pointer and the target page position. + * + * Recommended to use wrapper macro: i915_gem_object_page_iter_get_sg() + */ struct scatterlist * -__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - struct i915_gem_object_page_iter *iter, - unsigned int n, - unsigned int *offset, bool dma); +__i915_gem_object_page_iter_get_sg(struct drm_i915_gem_object *obj, + struct i915_gem_object_page_iter *iter, + pgoff_t n, + unsigned int *offset); +/** + * i915_gem_object_page_iter_get_sg - wrapper macro for + * __i915_gem_object_page_iter_get_sg() + * @obj: i915 GEM buffer object + * @it: i915 GEM buffer object page iterator + * @n: page offset + * @offset: searched physical offset, + * it will be used for returning physical page offset value + * + * Context: Takes and releases the mutex lock of the i915_gem_object_page_iter. + * Takes and releases the RCU lock to search the radix_tree of + * i915_gem_object_page_iter. + * + * Returns: + * The target scatterlist pointer and the target page position. + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_page_iter_get_sg(). + */ +#define i915_gem_object_page_iter_get_sg(obj, it, n, offset) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_page_iter_get_sg(obj, it, n, offset); \ +}) + +/** + * __i915_gem_object_get_sg - helper to find the target scatterlist + * pointer and the target page position using pgoff_t n input argument and + * drm_i915_gem_object. It uses an internal shmem scatterlist lookup function. + * @obj: i915 GEM buffer object + * @n: page offset + * @offset: searched physical offset, + * it will be used for returning physical page offset value + * + * It uses drm_i915_gem_object's internal shmem scatterlist lookup function as + * i915_gem_object_page_iter and calls __i915_gem_object_page_iter_get_sg(). + * + * Returns: + * The target scatterlist pointer and the target page position. + * + * Recommended to use wrapper macro: i915_gem_object_get_sg() + * See also __i915_gem_object_page_iter_get_sg() + */ static inline struct scatterlist * -i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - unsigned int n, - unsigned int *offset) +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, pgoff_t n, + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false); + return __i915_gem_object_page_iter_get_sg(obj, &obj->mm.get_page, n, offset); } +/** + * i915_gem_object_get_sg - wrapper macro for __i915_gem_object_get_sg() + * @obj: i915 GEM buffer object + * @n: page offset + * @offset: searched physical offset, + * it will be used for returning physical page offset value + * + * Returns: + * The target scatterlist pointer and the target page position. + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_get_sg(). + * See also __i915_gem_object_page_iter_get_sg() + */ +#define i915_gem_object_get_sg(obj, n, offset) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_get_sg(obj, n, offset); \ +}) + +/** + * __i915_gem_object_get_sg_dma - helper to find the target scatterlist + * pointer and the target page position using pgoff_t n input argument and + * drm_i915_gem_object. It uses an internal DMA mapped scatterlist lookup function + * @obj: i915 GEM buffer object + * @n: page offset + * @offset: searched physical offset, + * it will be used for returning physical page offset value + * + * It uses drm_i915_gem_object's internal DMA mapped scatterlist lookup function + * as i915_gem_object_page_iter and calls __i915_gem_object_page_iter_get_sg(). + * + * Returns: + * The target scatterlist pointer and the target page position. + * + * Recommended to use wrapper macro: i915_gem_object_get_sg_dma() + * See also __i915_gem_object_page_iter_get_sg() + */ static inline struct scatterlist * -i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, - unsigned int n, - unsigned int *offset) +__i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, pgoff_t n, + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true); + return __i915_gem_object_page_iter_get_sg(obj, &obj->mm.get_dma_page, n, offset); } +/** + * i915_gem_object_get_sg_dma - wrapper macro for __i915_gem_object_get_sg_dma() + * @obj: i915 GEM buffer object + * @n: page offset + * @offset: searched physical offset, + * it will be used for returning physical page offset value + * + * Returns: + * The target scatterlist pointer and the target page position. + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_get_sg_dma(). + * See also __i915_gem_object_page_iter_get_sg() + */ +#define i915_gem_object_get_sg_dma(obj, n, offset) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_get_sg_dma(obj, n, offset); \ +}) + +/** + * __i915_gem_object_get_page - helper to find the target page with a page offset + * @obj: i915 GEM buffer object + * @n: page offset + * + * It uses drm_i915_gem_object's internal shmem scatterlist lookup function as + * i915_gem_object_page_iter and calls __i915_gem_object_page_iter_get_sg() + * internally. + * + * Returns: + * The target page pointer. + * + * Recommended to use wrapper macro: i915_gem_object_get_page() + * See also __i915_gem_object_page_iter_get_sg() + */ struct page * -i915_gem_object_get_page(struct drm_i915_gem_object *obj, - unsigned int n); +__i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n); +/** + * i915_gem_object_get_page - wrapper macro for __i915_gem_object_get_page + * @obj: i915 GEM buffer object + * @n: page offset + * + * Returns: + * The target page pointer. + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_get_page(). + * See also __i915_gem_object_page_iter_get_sg() + */ +#define i915_gem_object_get_page(obj, n) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_get_page(obj, n); \ +}) + +/** + * __i915_gem_object_get_dirty_page - helper to find the target page with a page + * offset + * @obj: i915 GEM buffer object + * @n: page offset + * + * It works like i915_gem_object_get_page(), but it marks the returned page dirty. + * + * Returns: + * The target page pointer. + * + * Recommended to use wrapper macro: i915_gem_object_get_dirty_page() + * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_page() + */ struct page * -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, - unsigned int n); +__i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, pgoff_t n); + +/** + * i915_gem_object_get_dirty_page - wrapper macro for __i915_gem_object_get_dirty_page + * @obj: i915 GEM buffer object + * @n: page offset + * + * Returns: + * The target page pointer. + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_get_dirty_page(). + * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_page() + */ +#define i915_gem_object_get_dirty_page(obj, n) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_get_dirty_page(obj, n); \ +}) +/** + * __i915_gem_object_get_dma_address_len - helper to get bus addresses of + * targeted DMA mapped scatterlist from i915 GEM buffer object and it's length + * @obj: i915 GEM buffer object + * @n: page offset + * @len: DMA mapped scatterlist's DMA bus addresses length to return + * + * Returns: + * Bus addresses of targeted DMA mapped scatterlist + * + * Recommended to use wrapper macro: i915_gem_object_get_dma_address_len() + * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_sg_dma() + */ dma_addr_t -i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, - unsigned long n, - unsigned int *len); +__i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, pgoff_t n, + unsigned int *len); +/** + * i915_gem_object_get_dma_address_len - wrapper macro for + * __i915_gem_object_get_dma_address_len + * @obj: i915 GEM buffer object + * @n: page offset + * @len: DMA mapped scatterlist's DMA bus addresses length to return + * + * Returns: + * Bus addresses of targeted DMA mapped scatterlist + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_get_dma_address_len(). + * See also __i915_gem_object_page_iter_get_sg() and + * __i915_gem_object_get_dma_address_len() + */ +#define i915_gem_object_get_dma_address_len(obj, n, len) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_get_dma_address_len(obj, n, len); \ +}) + +/** + * __i915_gem_object_get_dma_address - helper to get bus addresses of + * targeted DMA mapped scatterlist from i915 GEM buffer object + * @obj: i915 GEM buffer object + * @n: page offset + * + * Returns: + * Bus addresses of targeted DMA mapped scatterlis + * + * Recommended to use wrapper macro: i915_gem_object_get_dma_address() + * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_sg_dma() + */ dma_addr_t -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, - unsigned long n); +__i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, pgoff_t n); + +/** + * i915_gem_object_get_dma_address - wrapper macro for + * __i915_gem_object_get_dma_address + * @obj: i915 GEM buffer object + * @n: page offset + * + * Returns: + * Bus addresses of targeted DMA mapped scatterlist + * + * In order to avoid the truncation of the input parameter, it checks the page + * offset n's type from the input parameter before calling + * __i915_gem_object_get_dma_address(). + * See also __i915_gem_object_page_iter_get_sg() and + * __i915_gem_object_get_dma_address() + */ +#define i915_gem_object_get_dma_address(obj, n) ({ \ + static_assert(castable_to_type(n, pgoff_t)); \ + __i915_gem_object_get_dma_address(obj, n); \ +}) void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 05a27723ebb8..ecd86130b74f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -521,14 +521,16 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj) } struct scatterlist * -__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - struct i915_gem_object_page_iter *iter, - unsigned int n, - unsigned int *offset, - bool dma) +__i915_gem_object_page_iter_get_sg(struct drm_i915_gem_object *obj, + struct i915_gem_object_page_iter *iter, + pgoff_t n, + unsigned int *offset) + { - struct scatterlist *sg; + const bool dma = iter == &obj->mm.get_dma_page || + iter == &obj->ttm.get_io_page; unsigned int idx, count; + struct scatterlist *sg; might_sleep(); GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); @@ -636,7 +638,7 @@ lookup: } struct page * -i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) +__i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n) { struct scatterlist *sg; unsigned int offset; @@ -649,8 +651,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) /* Like i915_gem_object_get_page(), but mark the returned page dirty */ struct page * -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, - unsigned int n) +__i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, pgoff_t n) { struct page *page; @@ -662,9 +663,8 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, } dma_addr_t -i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, - unsigned long n, - unsigned int *len) +__i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, + pgoff_t n, unsigned int *len) { struct scatterlist *sg; unsigned int offset; @@ -678,8 +678,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, } dma_addr_t -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, - unsigned long n) +__i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, pgoff_t n) { return i915_gem_object_get_dma_address_len(obj, n, NULL); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 1e50fb0d6bfc..4ca717890aa9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -689,7 +689,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, GEM_WARN_ON(bo->ttm); base = obj->mm.region->iomap.base - obj->mm.region->region.start; - sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); + sg = i915_gem_object_page_iter_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs); return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index ac02fb036592..414ee2cb70fc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -469,7 +469,8 @@ static int gpu_fill(struct intel_context *ce, static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) { const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); - unsigned int n, m, need_flush; + unsigned int need_flush; + unsigned long n, m; int err; i915_gem_object_lock(obj, NULL); @@ -499,7 +500,8 @@ out: static noinline int cpu_check(struct drm_i915_gem_object *obj, unsigned int idx, unsigned int max) { - unsigned int n, m, needs_flush; + unsigned int needs_flush; + unsigned long n; int err; i915_gem_object_lock(obj, NULL); @@ -508,7 +510,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, goto out_unlock; for (n = 0; n < real_page_count(obj); n++) { - u32 *map; + u32 *map, m; map = kmap_atomic(i915_gem_object_get_page(obj, n)); if (needs_flush & CLFLUSH_BEFORE) @@ -516,7 +518,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, for (m = 0; m < max; m++) { if (map[m] != m) { - pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", + pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n", __builtin_return_address(0), idx, n, real_page_count(obj), m, max, map[m], m); @@ -527,7 +529,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, for (; m < DW_PER_PAGE; m++) { if (map[m] != STACK_MAGIC) { - pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", + pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n", __builtin_return_address(0), idx, n, m, map[m], STACK_MAGIC); err = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 7f6353827735..56279908ed30 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -97,11 +97,11 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_gtt_view view; struct i915_vma *vma; + unsigned long offset; unsigned long page; u32 __iomem *io; struct page *p; unsigned int n; - u64 offset; u32 *cpu; int err; @@ -158,7 +158,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, cpu = kmap(p) + offset_in_page(offset); drm_clflush_virt_range(cpu, sizeof(*cpu)); if (*cpu != (u32)page) { - pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%lu + %u [0x%lx]) of 0x%x, found 0x%x\n", page, n, view.partial.offset, view.partial.size, @@ -214,10 +214,10 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, for_each_prime_number_from(page, 1, npages) { struct i915_gtt_view view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + unsigned long offset; u32 __iomem *io; struct page *p; unsigned int n; - u64 offset; u32 *cpu; GEM_BUG_ON(view.partial.size > nreal); @@ -254,7 +254,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, cpu = kmap(p) + offset_in_page(offset); drm_clflush_virt_range(cpu, sizeof(*cpu)); if (*cpu != (u32)page) { - pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%lu + %u [0x%lx]) of 0x%x, found 0x%x\n", page, n, view.partial.offset, view.partial.size, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index bdf5bb40ccf1..19e374f68ff7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -33,10 +33,10 @@ out: static int igt_gem_huge(void *arg) { - const unsigned int nreal = 509; /* just to be awkward */ + const unsigned long nreal = 509; /* just to be awkward */ struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; - unsigned int n; + unsigned long n; int err; /* Basic sanitycheck of our huge fake object allocation */ @@ -49,7 +49,7 @@ static int igt_gem_huge(void *arg) err = i915_gem_object_pin_pages_unlocked(obj); if (err) { - pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + pr_err("Failed to allocate %lu pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); goto out; } @@ -57,7 +57,7 @@ static int igt_gem_huge(void *arg) for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { if (i915_gem_object_get_page(obj, n) != i915_gem_object_get_page(obj, n % nreal)) { - pr_err("Page lookup mismatch at index %u [%u]\n", + pr_err("Page lookup mismatch at index %lu [%lu]\n", n, n % nreal); err = -EINVAL; goto out_unpin; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 086c4702e1bf..ffdd25155943 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -229,8 +229,9 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, struct drm_i915_gem_pread *args) { unsigned int needs_clflush; - unsigned int idx, offset; char __user *user_data; + unsigned long offset; + pgoff_t idx; u64 remain; int ret; @@ -383,13 +384,17 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + unsigned long remain, offset; intel_wakeref_t wakeref; struct drm_mm_node node; void __user *user_data; struct i915_vma *vma; - u64 remain, offset; int ret = 0; + if (overflows_type(args->size, remain) || + overflows_type(args->offset, offset)) + return -EINVAL; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); vma = i915_gem_gtt_prepare(obj, &node, false); @@ -540,13 +545,17 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_runtime_pm *rpm = &i915->runtime_pm; + unsigned long remain, offset; intel_wakeref_t wakeref; struct drm_mm_node node; struct i915_vma *vma; - u64 remain, offset; void __user *user_data; int ret = 0; + if (overflows_type(args->size, remain) || + overflows_type(args->offset, offset)) + return -EINVAL; + if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as @@ -654,8 +663,9 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, { unsigned int partial_cacheline_write; unsigned int needs_clflush; - unsigned int offset, idx; void __user *user_data; + unsigned long offset; + pgoff_t idx; u64 remain; int ret; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 7d044888ac33..8b16f6e32efa 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -936,7 +936,7 @@ rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, struct sg_table *st, struct scatterlist *sg) { unsigned int column, row; - unsigned int src_idx; + pgoff_t src_idx; for (column = 0; column < width; column++) { unsigned int left; @@ -1042,7 +1042,7 @@ add_padding_pages(unsigned int count, static struct scatterlist * remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj, - unsigned int offset, unsigned int alignment_pad, + unsigned long offset, unsigned int alignment_pad, unsigned int width, unsigned int height, unsigned int src_stride, unsigned int dst_stride, struct sg_table *st, struct scatterlist *sg, @@ -1101,7 +1101,7 @@ remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj, static struct scatterlist * remap_contiguous_pages(struct drm_i915_gem_object *obj, - unsigned int obj_offset, + pgoff_t obj_offset, unsigned int count, struct sg_table *st, struct scatterlist *sg) { @@ -1134,7 +1134,7 @@ remap_contiguous_pages(struct drm_i915_gem_object *obj, static struct scatterlist * remap_linear_color_plane_pages(struct drm_i915_gem_object *obj, - unsigned int obj_offset, unsigned int alignment_pad, + pgoff_t obj_offset, unsigned int alignment_pad, unsigned int size, struct sg_table *st, struct scatterlist *sg, unsigned int *gtt_offset) -- cgit From c3bfba9a222550406082c92bbabc9c8b1355d8b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Dec 2022 21:22:48 +0200 Subject: drm/i915: Check for integer truncation on scatterlist creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an impedance mismatch between the scatterlist API using unsigned int and our memory/page accounting in unsigned long. That is we may try to create a scatterlist for a large object that overflows returning a small table into which we try to fit very many pages. As the object size is under the control of userspace, we have to be prudent and catch the conversion errors. To catch the implicit truncation we check before calling scattterlist creation Apis. we use overflows_type check and report E2BIG if the overflows may raise. When caller does not return errno, use WARN_ON to report a problem. This is already used in our create ioctls to indicate if the uABI request is simply too large for the backing store. Failing that type check, we have a second check at sg_alloc_table time to make sure the values we are passing into the scatterlist API are not truncated. v2: Move added i915_utils's macro into drm_util header (Jani N) v5: Fix macros to be enclosed in parentheses for complex values Fix too long line warning v8: Replace safe_conversion() with check_assign() (Kees) v14: Remove shadowing macros of scatterlist creation api and fix to explicitly overflow check where the scatterlist creation APIs are called. (Jani) v15: Add missing returning of error code when the WARN_ON() has been detected. (Jani) Cc: Tvrtko Ursulin Cc: Brian Welty Cc: Matthew Auld Cc: Thomas Hellström Signed-off-by: Chris Wilson Co-developed-by: Gwan-gyeong Mun Signed-off-by: Gwan-gyeong Mun Reviewed-by: Nirmoy Das Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Andrzej Hajda Acked-by: Jani Nikula Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221228192252.917299-3-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 7 +++++-- drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 --- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 4 ++++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++++++--- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++++ drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 6 +++++- drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c | 6 +++++- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 8 ++++++++ drivers/gpu/drm/i915/gvt/dmabuf.c | 10 ++++++---- drivers/gpu/drm/i915/i915_scatterlist.c | 9 +++++++++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++++ drivers/gpu/drm/i915/selftests/scatterlist.c | 4 ++++ 12 files changed, 60 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index f66bcefc09ec..6bc26b4b06b8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -35,11 +35,15 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; - unsigned int npages; + unsigned int npages; /* restricted by sg_alloc_table */ int max_order = MAX_ORDER; unsigned int max_segment; gfp_t gfp; + if (overflows_type(obj->base.size >> PAGE_SHIFT, npages)) + return -E2BIG; + + npages = obj->base.size >> PAGE_SHIFT; max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; max_order = min(max_order, get_order(max_segment)); @@ -55,7 +59,6 @@ create_st: if (!st) return -ENOMEM; - npages = obj->base.size / PAGE_SIZE; if (sg_alloc_table(st, npages, GFP_KERNEL)) { kfree(st); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f7b562e255a5..4a661af87c4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -26,9 +26,6 @@ enum intel_region_id; * this and catch if we ever need to fix it. In the meantime, if you do * spot such a local variable, please consider fixing! * - * Aside from our own locals (for which we have no excuse!): - * - sg_table embeds unsigned int for nents - * * We can check for invalidly typed locals with typecheck(), see for example * i915_gem_object_get_sg(). */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 68453572275b..76efe98eaa14 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -28,6 +28,10 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) void *dst; int i; + /* Contiguous chunk, with a single scatterlist element */ + if (overflows_type(obj->base.size, sg->length)) + return -E2BIG; + if (GEM_WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 9c759df700ca..28e857f8c169 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -60,7 +60,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, struct address_space *mapping, unsigned int max_segment) { - const unsigned long page_count = size / PAGE_SIZE; + unsigned int page_count; /* restricted by sg_alloc_table */ unsigned long i; struct scatterlist *sg; struct page *page; @@ -68,6 +68,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, gfp_t noreclaim; int ret; + if (overflows_type(size / PAGE_SIZE, page_count)) + return -E2BIG; + + page_count = size / PAGE_SIZE; /* * If there's no chance of allocating enough pages for the whole * object, bail early. @@ -193,7 +197,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; - const unsigned long page_count = obj->base.size / PAGE_SIZE; unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; @@ -236,7 +239,7 @@ rebuild_st: } else { dev_warn(i915->drm.dev, "Failed to DMA remap %lu pages\n", - page_count); + obj->base.size >> PAGE_SHIFT); goto err_pages; } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4ca717890aa9..af1378d93baa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -832,6 +832,10 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; struct ttm_placement placement; + /* restricted by sg_alloc_table */ + if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) + return -E2BIG; + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); /* Move to the requested placement. */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 9348b1804d53..1d3ebdf4069b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -128,12 +128,16 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; struct page **pvec; + unsigned int num_pages; /* limited by sg_alloc_table_from_pages_segment */ int ret; + if (overflows_type(obj->base.size >> PAGE_SHIFT, num_pages)) + return -E2BIG; + + num_pages = obj->base.size >> PAGE_SHIFT; st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index cbd9b624a788..bac957755068 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -29,11 +29,15 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL) const unsigned long nreal = obj->scratch / PAGE_SIZE; - const unsigned long npages = obj->base.size / PAGE_SIZE; + unsigned int npages; /* restricted by sg_alloc_table */ struct scatterlist *sg, *src, *end; struct sg_table *pages; unsigned long n; + if (overflows_type(obj->base.size / PAGE_SIZE, npages)) + return -E2BIG; + + npages = obj->base.size / PAGE_SIZE; pages = kmalloc(sizeof(*pages), GFP); if (!pages) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index e0c2ac9c8053..c281b0ec9e05 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -84,6 +84,10 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) unsigned int sg_page_sizes; u64 rem; + /* restricted by sg_alloc_table */ + if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) + return -E2BIG; + st = kmalloc(sizeof(*st), GFP); if (!st) return -ENOMEM; @@ -212,6 +216,10 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) struct scatterlist *sg; u64 rem; + /* restricted by sg_alloc_table */ + if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) + return -E2BIG; + st = kmalloc(sizeof(*st), GFP); if (!st) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 355f1c0e8664..7af09eb24ac0 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -42,8 +42,7 @@ #define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12)) -static int vgpu_gem_get_pages( - struct drm_i915_gem_object *obj) +static int vgpu_gem_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct intel_vgpu *vgpu; @@ -52,8 +51,12 @@ static int vgpu_gem_get_pages( int i, j, ret; gen8_pte_t __iomem *gtt_entries; struct intel_vgpu_fb_info *fb_info; - u32 page_num; + unsigned int page_num; /* limited by sg_alloc_table */ + if (overflows_type(obj->base.size >> PAGE_SHIFT, page_num)) + return -E2BIG; + + page_num = obj->base.size >> PAGE_SHIFT; fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info; if (drm_WARN_ON(&dev_priv->drm, !fb_info)) return -ENODEV; @@ -66,7 +69,6 @@ static int vgpu_gem_get_pages( if (unlikely(!st)) return -ENOMEM; - page_num = obj->base.size >> PAGE_SHIFT; ret = sg_alloc_table(st, page_num, GFP_KERNEL); if (ret) { kfree(st); diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 114e5e39aa72..756289e43dff 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -96,6 +96,11 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); st = &rsgt->table; + /* restricted by sg_alloc_table */ + if (WARN_ON(overflows_type(DIV_ROUND_UP_ULL(node->size, segment_pages), + unsigned int))) + return ERR_PTR(-E2BIG); + if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), GFP_KERNEL)) { i915_refct_sgt_put(rsgt); @@ -177,6 +182,10 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, i915_refct_sgt_init(rsgt, size); st = &rsgt->table; + /* restricted by sg_alloc_table */ + if (WARN_ON(overflows_type(PFN_UP(res->size), unsigned int))) + return ERR_PTR(-E2BIG); + if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL)) { i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index eae7d947d7de..01e75160a84a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -68,6 +68,10 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) return -ENOMEM; rem = round_up(obj->base.size, BIT(31)) >> 31; + /* restricted by sg_alloc_table */ + if (overflows_type(rem, unsigned int)) + return -E2BIG; + if (sg_alloc_table(pages, rem, GFP)) { kfree(pages); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c index d599186d5b71..805c4bfb85fe 100644 --- a/drivers/gpu/drm/i915/selftests/scatterlist.c +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -220,6 +220,10 @@ static int alloc_table(struct pfn_table *pt, struct scatterlist *sg; unsigned long n, pfn; + /* restricted by sg_alloc_table */ + if (overflows_type(max, unsigned int)) + return -E2BIG; + if (sg_alloc_table(&pt->st, max, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN)) return alloc_error; -- cgit From 6949aa0eadafec9f4e83920a798330025c049ce5 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Wed, 28 Dec 2022 21:22:49 +0200 Subject: drm/i915: Check for integer truncation on the configuration of ttm place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an impedance mismatch between the first/last valid page frame number of ttm place in unsigned and our memory/page accounting in unsigned long. As the object size is under the control of userspace, we have to be prudent and catch the conversion errors. To catch the implicit truncation as we switch from unsigned long to unsigned, we use overflows_type check and report E2BIG or overflow_type prior to the operation. v3: Not to change execution inside a macro. (Mauro) Add safe_conversion_gem_bug_on() macro and remove temporal SAFE_CONVERSION() macro. v4: Fix unhandled GEM_BUG_ON() macro call from safe_conversion_gem_bug_on() v6: Fix to follow general use case for GEM_BUG_ON(). (Jani) v7: Fix to use WARN_ON() macro where GEM_BUG_ON() macro was used. (Jani) v8: Replace safe_conversion() with check_assign() (Kees) v14: Split one macro of assignment with checking of overflow to two steps, first overflow check, and second assignment. Cc: Chris Wilson Cc: Matthew Auld Cc: Thomas Hellström Cc: Jani Nikula Signed-off-by: Gwan-gyeong Mun Reviewed-by: Nirmoy Das (v2) Reviewed-by: Mauro Carvalho Chehab (v3) Reported-by: kernel test robot Reviewed-by: Andrzej Hajda (v5) Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221228192252.917299-4-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 3 +++ drivers/gpu/drm/i915/intel_region_ttm.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index af1378d93baa..5f3abe1bb7d7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -140,13 +140,16 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr, if (flags & I915_BO_ALLOC_CONTIGUOUS) place->flags |= TTM_PL_FLAG_CONTIGUOUS; if (offset != I915_BO_INVALID_OFFSET) { + WARN_ON(overflows_type(offset >> PAGE_SHIFT, place->fpfn)); place->fpfn = offset >> PAGE_SHIFT; + WARN_ON(overflows_type(place->fpfn + (size >> PAGE_SHIFT), place->lpfn)); place->lpfn = place->fpfn + (size >> PAGE_SHIFT); } else if (mr->io_size && mr->io_size < mr->total) { if (flags & I915_BO_ALLOC_GPU_ONLY) { place->flags |= TTM_PL_FLAG_TOPDOWN; } else { place->fpfn = 0; + WARN_ON(overflows_type(mr->io_size >> PAGE_SHIFT, place->lpfn)); place->lpfn = mr->io_size >> PAGE_SHIFT; } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index cf89d0c2a2d9..7f0ec35af098 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -209,13 +209,25 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, if (flags & I915_BO_ALLOC_CONTIGUOUS) place.flags |= TTM_PL_FLAG_CONTIGUOUS; if (offset != I915_BO_INVALID_OFFSET) { + if (WARN_ON(overflows_type(offset >> PAGE_SHIFT, place.fpfn))) { + ret = -E2BIG; + goto out; + } place.fpfn = offset >> PAGE_SHIFT; + if (WARN_ON(overflows_type(place.fpfn + (size >> PAGE_SHIFT), place.lpfn))) { + ret = -E2BIG; + goto out; + } place.lpfn = place.fpfn + (size >> PAGE_SHIFT); } else if (mem->io_size && mem->io_size < mem->total) { if (flags & I915_BO_ALLOC_GPU_ONLY) { place.flags |= TTM_PL_FLAG_TOPDOWN; } else { place.fpfn = 0; + if (WARN_ON(overflows_type(mem->io_size >> PAGE_SHIFT, place.lpfn))) { + ret = -E2BIG; + goto out; + } place.lpfn = mem->io_size >> PAGE_SHIFT; } } @@ -224,6 +236,8 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, mock_bo.bdev = &mem->i915->bdev; ret = man->func->alloc(man, &mock_bo, &place, &res); + +out: if (ret == -ENOSPC) ret = -ENXIO; if (!ret) -- cgit From 662c04e2b8c6fe8eb2f2b88c62e25c8241d0f793 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Wed, 28 Dec 2022 21:22:50 +0200 Subject: drm/i915: Check if the size is too big while creating shmem file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The __shmem_file_setup() function returns -EINVAL if size is greater than MAX_LFS_FILESIZE. To handle the same error as other code that returns -E2BIG when the size is too large, it add a code that returns -E2BIG when the size is larger than the size that can be handled. v4: If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false, so it checks only when BITS_PER_LONG is 64. Cc: Chris Wilson Cc: Matthew Auld Cc: Thomas Hellström Signed-off-by: Gwan-gyeong Mun Reviewed-by: Nirmoy Das Reviewed-by: Mauro Carvalho Chehab Reported-by: kernel test robot Reviewed-by: Andrzej Hajda Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221228192252.917299-5-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 28e857f8c169..e767791e40e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -541,6 +541,20 @@ static int __create_shmem(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, obj, size); + /* XXX: The __shmem_file_setup() function returns -EINVAL if size is + * greater than MAX_LFS_FILESIZE. + * To handle the same error as other code that returns -E2BIG when + * the size is too large, we add a code that returns -E2BIG when the + * size is larger than the size that can be handled. + * If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false, + * so we only needs to check when BITS_PER_LONG is 64. + * If BITS_PER_LONG is 32, E2BIG checks are processed when + * i915_gem_object_size_2big() is called before init_object() callback + * is called. + */ + if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE) + return -E2BIG; + if (i915->mm.gemfs) filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, flags); -- cgit From 18f968cb92fb3fa758d9eb29056426fa5588b230 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Wed, 28 Dec 2022 21:22:51 +0200 Subject: drm/i915: Use error code as -E2BIG when the size of gem ttm object is too large MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ttm_bo_init_reserved() functions returns -ENOSPC if the size is too big to add vma. The direct function that returns -ENOSPC is drm_mm_insert_node_in_range(). To handle the same error as other code returning -E2BIG when the size is too large, it converts return value to -E2BIG. Cc: Chris Wilson Cc: Matthew Auld Cc: Thomas Hellström Signed-off-by: Gwan-gyeong Mun Reviewed-by: Nirmoy Das Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Andrzej Hajda Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221228192252.917299-6-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5f3abe1bb7d7..8bf40988a656 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1309,6 +1309,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), bo_type, &i915_sys_placement, page_size >> PAGE_SHIFT, &ctx, NULL, NULL, i915_ttm_bo_destroy); + + /* + * XXX: The ttm_bo_init_reserved() functions returns -ENOSPC if the size + * is too big to add vma. The direct function that returns -ENOSPC is + * drm_mm_insert_node_in_range(). To handle the same error as other code + * that returns -E2BIG when the size is too large, it converts -ENOSPC to + * -E2BIG. + */ + if (size >> PAGE_SHIFT > INT_MAX && ret == -ENOSPC) + ret = -E2BIG; + if (ret) return i915_ttm_err_to_gem(ret); -- cgit From fd3cdd932e5f79eb08ed7b8e41be34ab0ad9e74e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Dec 2022 21:22:52 +0200 Subject: drm/i915: Remove truncation warning for large objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having addressed the issues surrounding incorrect types for local variables and potential integer truncation in using the scatterlist API, we have closed all the loop holes we had previously identified with dangerously large object creation. As such, we can eliminate the warning put in place to remind us to complete the review. Cc: Tvrtko Ursulin Cc: Brian Welty Cc: Matthew Auld Cc: Thomas Hellström Testcase: igt@gem_create@create-massive Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4991 Signed-off-by: Chris Wilson Signed-off-by: Gwan-gyeong Mun Reviewed-by: Nirmoy Das Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Andrzej Hajda Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20221228192252.917299-7-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 4a661af87c4d..f9a8acbba715 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,25 +20,10 @@ enum intel_region_id; -/* - * XXX: There is a prevalence of the assumption that we fit the - * object's page count inside a 32bit _signed_ variable. Let's document - * this and catch if we ever need to fix it. In the meantime, if you do - * spot such a local variable, please consider fixing! - * - * We can check for invalidly typed locals with typecheck(), see for example - * i915_gem_object_get_sg(). - */ -#define GEM_CHECK_SIZE_OVERFLOW(sz) \ - GEM_WARN_ON((sz) >> PAGE_SHIFT > INT_MAX) - static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; - if (GEM_CHECK_SIZE_OVERFLOW(size)) - return true; - if (overflows_type(size, obj->base.size)) return true; -- cgit From cc328c9e05e98feb64a9f81f29e51df4cb986921 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 30 Dec 2022 19:35:00 +0100 Subject: drm/i915: Use "%zu" to format size_t Switch to %zu for printing size_t which will fix compilation warning for 32-bit build. Reported-by: kernel test robot Signed-off-by: Nirmoy Das Reviewed-by: Gwan-gyeong Mun Reviewed-by: Andi Shyti Signed-off-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20221230183500.20553-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index e767791e40e0..114443096841 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -238,7 +238,7 @@ rebuild_st: goto rebuild_st; } else { dev_warn(i915->drm.dev, - "Failed to DMA remap %lu pages\n", + "Failed to DMA remap %zu pages\n", obj->base.size >> PAGE_SHIFT); goto err_pages; } -- cgit From 4f0755c2faf7388616109717facc5bbde6850e60 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 23 Dec 2022 10:20:11 +0100 Subject: drm/i915: Reserve enough fence slot for i915_vma_unbind_async MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A nested dma_resv_reserve_fences(1) will not reserve slot from the 2nd call onwards and folowing dma_resv_add_fence() might hit the "BUG_ON(fobj->num_fences >= fobj->max_fences)" check. I915 hit above nested dma_resv case in ttm_bo_handle_move_mem() with async unbind: dma_resv_reserve_fences() from --> ttm_bo_handle_move_mem() dma_resv_reserve_fences() from --> i915_vma_unbind_async() dma_resv_add_fence() from --> i915_vma_unbind_async() dma_resv_add_fence() from -->ttm_bo_move_accel_cleanup() Resolve this by adding an extra fence in i915_vma_unbind_async(). Suggested-by: Thomas Hellström Fixes: 2f6b90da9192 ("drm/i915: Use vma resources for async unbinding") Cc: # v5.18+ Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20221223092011.11657-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 8b16f6e32efa..5272e2be990e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -2149,7 +2149,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) if (!obj->mm.rsgt) return -EBUSY; - err = dma_resv_reserve_fences(obj->base.resv, 1); + err = dma_resv_reserve_fences(obj->base.resv, 2); if (err) return -EBUSY; -- cgit From bed4b455cf5374e68879be56971c1da563bcd90c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 3 Jan 2023 15:49:46 -0800 Subject: drm/i915: Fix potential context UAFs gem_context_register() makes the context visible to userspace, and which point a separate thread can trigger the I915_GEM_CONTEXT_DESTROY ioctl. So we need to ensure that nothing uses the ctx ptr after this. And we need to ensure that adding the ctx to the xarray is the *last* thing that gem_context_register() does with the ctx pointer. Signed-off-by: Rob Clark Fixes: eb4dedae920a ("drm/i915/gem: Delay tracking the GEM context until it is registered") Fixes: a4c1cdd34e2c ("drm/i915/gem: Delay context creation (v3)") Fixes: 49bd54b390c2 ("drm/i915: Track all user contexts per client") Cc: # v5.10+ Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti [tursulin: Stable and fixes tags add/tidy.] Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230103234948.1218393-1-robdclark@gmail.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 46e71f62fcec..454e73a433c8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1688,6 +1688,10 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } +/* + * Note that this implicitly consumes the ctx reference, by placing + * the ctx in the context_xa. + */ static void gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 id) @@ -1703,10 +1707,6 @@ static void gem_context_register(struct i915_gem_context *ctx, snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); - /* And finally expose ourselves to userspace via the idr */ - old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); - WARN_ON(old); - spin_lock(&ctx->client->ctx_lock); list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list); spin_unlock(&ctx->client->ctx_lock); @@ -1714,6 +1714,10 @@ static void gem_context_register(struct i915_gem_context *ctx, spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); + + /* And finally expose ourselves to userspace via the idr */ + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); } int i915_gem_context_open(struct drm_i915_private *i915, @@ -2199,14 +2203,22 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, if (IS_ERR(ctx)) return ctx; + /* + * One for the xarray and one for the caller. We need to grab + * the reference *prior* to making the ctx visble to userspace + * in gem_context_register(), as at any point after that + * userspace can try to race us with another thread destroying + * the context under our feet. + */ + i915_gem_context_get(ctx); + gem_context_register(ctx, file_priv, id); old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); proto_context_close(file_priv->dev_priv, pc); - /* One for the xarray and one for the caller */ - return i915_gem_context_get(ctx); + return ctx; } struct i915_gem_context * -- cgit From d2c3c8c3d3833c45c09be671da48f9d46b79e347 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 15 Dec 2022 11:17:27 -0800 Subject: drm/i915/hwmon: Display clamped PL1 limit HW allows arbitrary PL1 limits to be set but silently clamps these values to "typical but not guaranteed" min/max values in pkg_power_sku register. Follow the same pattern for sysfs, allow arbitrary PL1 limits to be set but display clamped values when read, so that users see PL1 limits HW is likely using. Otherwise users think HW is using arbitrarily high/low PL1 limits they might have set. The previous write/read I1 power1_crit limit also follows the same clamping pattern. v2: Explain "why" in commit message and include bug link (Jani Nikula) Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7704 Signed-off-by: Ashutosh Dixit Reviewed-by: Anshuman Gupta Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20221215191727.2468770-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 39 +++++++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_mchbar_regs.h | 2 ++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index cca7a4350ec8..1225bc432f0d 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -359,6 +359,38 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) } } +/* + * HW allows arbitrary PL1 limits to be set but silently clamps these values to + * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the + * same pattern for sysfs, allow arbitrary PL1 limits to be set but display + * clamped values when read. Write/read I1 also follows the same pattern. + */ +static int +hwm_power_max_read(struct hwm_drvdata *ddat, long *val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u64 r, min, max; + + *val = hwm_field_read_and_scale(ddat, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1, + hwmon->scl_shift_power, + SF_POWER); + + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read64(ddat->uncore, hwmon->rg.pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, r); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = REG_FIELD_GET(PKG_MAX_PWR, r); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + + if (min && max) + *val = clamp_t(u64, *val, min, max); + + return 0; +} + static int hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) { @@ -368,12 +400,7 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) switch (attr) { case hwmon_power_max: - *val = hwm_field_read_and_scale(ddat, - hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1, - hwmon->scl_shift_power, - SF_POWER); - return 0; + return hwm_power_max_read(ddat, val); case hwmon_power_rated_max: *val = hwm_field_read_and_scale(ddat, hwmon->rg.pkg_power_sku, diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h index f93e9af43ac3..73900c098d59 100644 --- a/drivers/gpu/drm/i915/intel_mchbar_regs.h +++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h @@ -194,6 +194,8 @@ */ #define PCU_PACKAGE_POWER_SKU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5930) #define PKG_PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MIN_PWR GENMASK_ULL(30, 16) +#define PKG_MAX_PWR GENMASK_ULL(46, 32) #define PKG_MAX_WIN GENMASK_ULL(54, 48) #define PKG_MAX_WIN_X GENMASK_ULL(54, 53) #define PKG_MAX_WIN_Y GENMASK_ULL(52, 48) -- cgit From 41bb543f5598fb44e0e8dbd723f5821be83b466b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Jan 2023 15:44:08 -0800 Subject: drm/i915/mtl: Add initial gt workarounds This patch introduces initial gt workarounds for the MTL platform. v2: drop redundant/stale comments specifying wa platforms affected (Lucas). v3: drop additional redundant stale comments (MattR) Bspec: 66622 Signed-off-by: Matt Roper Signed-off-by: Matt Atwood Reviewed-by: Rodrigo Vivi Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230105234408.277750-1-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +- .../gpu/drm/i915/gt/intel_execlists_submission.c | 6 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 11 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 115 +++++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +- drivers/gpu/drm/i915/intel_device_info.c | 6 ++ 8 files changed, 123 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 99c4b866addd..922f1bb22dc6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1494,10 +1494,12 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); /* - * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is + * Wa_22011802037: Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (IS_GRAPHICS_VER(engine->i915, 11, 12)) + if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + (GRAPHICS_VER(engine->i915) >= 11 && + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 2daffa7c7dfd..18ffe55282e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2989,10 +2989,12 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); /* - * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_GRAPHICS_VER(engine->i915, 11, 12)) + if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + (GRAPHICS_VER(engine->i915) >= 11 && + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) intel_engine_wait_for_pending_mi_fw(engine); engine->execlists.reset_ccid = active_ccid(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 41a237509dcf..4127830c33ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -164,8 +164,15 @@ void intel_gt_mcr_init(struct intel_gt *gt) if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) { gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - fuse = REG_FIELD_GET(GT_L3_EXC_MASK, - intel_uncore_read(gt->uncore, XEHP_FUSE4)); + /* Wa_14016747170 */ + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, + intel_uncore_read(gt->uncore, + MTL_GT_ACTIVITY_FACTOR)); + else + fuse = REG_FIELD_GET(GT_L3_EXC_MASK, + intel_uncore_read(gt->uncore, XEHP_FUSE4)); /* * Despite the register field being named "exclude mask" the diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index f8eb807b56f9..d3357853be12 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -414,6 +414,7 @@ #define TBIMR_FAST_CLIP REG_BIT(5) #define VFLSKPD MCR_REG(0x62a8) +#define VF_PREFETCH_TLB_DIS REG_BIT(5) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -1535,6 +1536,9 @@ #define MTL_MEDIA_MC6 _MMIO(0x138048) +#define MTL_GT_ACTIVITY_FACTOR _MMIO(0x138010) +#define MTL_GT_L3_EXC_MASK REG_GENMASK(5, 3) + #define GEN6_GT_THREAD_STATUS_REG _MMIO(0x13805c) #define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7 diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index bf84efb3f15f..002ba7c2b1ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -786,6 +786,32 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } +static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = engine->i915; + + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + /* Wa_14014947963 */ + wa_masked_field_set(wal, VF_PREEMPTION, + PREEMPTION_VERTEX_COUNT, 0x4000); + + /* Wa_16013271637 */ + wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + + /* Wa_18019627453 */ + wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS); + + /* Wa_18018764978 */ + wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + } + + /* Wa_18019271663 */ + wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); +} + static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -872,7 +898,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_PONTEVECCHIO(i915)) + if (IS_METEORLAKE(i915)) + mtl_ctx_workarounds_init(engine, wal); + else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ else if (IS_DG2(i915)) dg2_ctx_workarounds_init(engine, wal); @@ -1628,7 +1656,10 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - /* FIXME: Actual workarounds will be added in future patch(es) */ + /* Wa_14014830051 */ + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); /* * Unlike older platforms, we no longer setup implicit steering here; @@ -2168,7 +2199,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, engine->gt, "whitelist", engine->name); - if (IS_PONTEVECCHIO(i915)) + if (IS_METEORLAKE(i915)) + ; /* noop; none at this time */ + else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); else if (IS_DG2(i915)) dg2_whitelist_build(engine); @@ -2278,6 +2311,34 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + /* Wa_22014600077 */ + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, + ENABLE_EU_COUNT_FOR_TDL_FLUSH); + } + + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + /* Wa_1509727124 */ + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); + + /* Wa_22013037850 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); + } + + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(i915) || IS_DG2_G12(i915) || + IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { + /* Wa_22012856258 */ + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); + } + if (IS_DG2(i915)) { /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | @@ -2289,13 +2350,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { - /* Wa_1509727124:dg2 */ - wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, - SC_DISABLE_POWER_OPTIMIZATION_EBB); - } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ @@ -2327,14 +2381,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { - /* Wa_22013037850:dg2 */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, - DISABLE_128B_EVICTION_COMMAND_UDW); - - /* Wa_22012856258:dg2 */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, - GEN12_DISABLE_READ_SUPPRESSION); - /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 @@ -2944,6 +2990,27 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li add_render_compute_tuning_settings(i915, wal); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + IS_PONTEVECCHIO(i915) || + IS_DG2(i915)) { + /* Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_22014226127 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + } + + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + IS_DG2(i915)) { + /* Wa_18017747507 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + } + if (IS_PONTEVECCHIO(i915)) { /* Wa_16016694945 */ wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); @@ -2985,17 +3052,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_14015227452:dg2,pvc */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - /* Wa_22014226127:dg2,pvc */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); - /* Wa_16015675438:dg2,pvc */ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - - /* Wa_18018781329:dg2,pvc */ - wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2(i915)) { @@ -3004,9 +3062,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * Wa_22015475538:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); - - /* Wa_18017747507:dg2 */ - wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index c0b5aa6fde26..1bccc175f9e6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -274,8 +274,9 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959:dg2 */ - if (IS_DG2(gt->i915)) + /* Wa_14014475959 */ + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* @@ -289,7 +290,9 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ - if (IS_GRAPHICS_VER(gt->i915, 11, 12)) + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + (GRAPHICS_VER(gt->i915) >= 11 && + GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) flags |= GUC_WA_PRE_PARSER; /* Wa_16011777198:dg2 */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fe06c93cf6e3..b436dd7f12e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1621,7 +1621,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); /* - * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ intel_engine_wait_for_pending_mi_fw(engine); @@ -4203,8 +4203,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_TIMESLICES; /* Wa_14014475959:dg2 */ - if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) - engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + if (engine->class == COMPUTE_CLASS) + if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + IS_DG2(engine->i915)) + engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; /* * TODO: GuC supports timeslicing and semaphores as well, but they're diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 849baf6c3b3c..05e90d09b208 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -343,6 +343,12 @@ static void intel_ipver_early_init(struct drm_i915_private *i915) ip_ver_read(i915, i915_mmio_reg_offset(GMD_ID_GRAPHICS), &runtime->graphics.ip); + /* Wa_22012778468 */ + if (runtime->graphics.ip.ver == 0x0 && + INTEL_INFO(i915)->platform == INTEL_METEORLAKE) { + RUNTIME_INFO(i915)->graphics.ip.ver = 12; + RUNTIME_INFO(i915)->graphics.ip.rel = 70; + } ip_ver_read(i915, i915_mmio_reg_offset(GMD_ID_DISPLAY), &runtime->display.ip); ip_ver_read(i915, i915_mmio_reg_offset(GMD_ID_MEDIA), -- cgit From f350c74fed66a41a6d5a8f4305e735f9c8b266ca Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 13 Dec 2022 13:19:50 +0100 Subject: drm/i915: use proper helper in igt_vma_move_to_active_unlocked There is no need to use _i915_vma_move_to_active. No functional changes. Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221213121951.1515023-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 1379fbc14431..71a3ca8a8865 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -38,7 +38,7 @@ igt_vma_move_to_active_unlocked(struct i915_vma *vma, struct i915_request *rq, int err; i915_vma_lock(vma); - err = _i915_vma_move_to_active(vma, rq, &rq->fence, flags); + err = i915_vma_move_to_active(vma, rq, flags); i915_vma_unlock(vma); return err; } -- cgit From 4f16749f89596c4eb65c0801c86e4a74ad6c62a9 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 13 Dec 2022 13:19:51 +0100 Subject: drm/i915/selftest: use igt_vma_move_to_active_unlocked if possible Helper replaces common sequence of calls. Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221213121951.1515023-2-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 8 ++------ drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 8 ++------ drivers/gpu/drm/i915/gt/selftest_execlists.c | 12 +++--------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 4 +--- drivers/gpu/drm/i915/gt/selftest_mocs.c | 4 +--- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 12 +++--------- 6 files changed, 12 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 414ee2cb70fc..a81fa6a20f5a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1551,9 +1551,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto err_unpin; } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); + err = igt_vma_move_to_active_unlocked(vma, rq, 0); if (err) goto skip_request; @@ -1686,9 +1684,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_unpin; } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); if (err) goto skip_request; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index c147038230c4..20a232a140b0 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -130,15 +130,11 @@ int igt_gpu_fill_dw(struct intel_context *ce, goto err_batch; } - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); + err = igt_vma_move_to_active_unlocked(batch, rq, 0); if (err) goto skip_request; - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); if (err) goto skip_request; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index a619057543fd..736b89a8ecf5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2763,13 +2763,11 @@ static int create_gang(struct intel_engine_cs *engine, rq->batch = i915_vma_get(vma); i915_request_get(rq); - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); + err = igt_vma_move_to_active_unlocked(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), PAGE_SIZE, 0); - i915_vma_unlock(vma); i915_request_add(rq); if (err) goto err_rq; @@ -3177,9 +3175,7 @@ create_gpr_client(struct intel_engine_cs *engine, goto out_batch; } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); + err = igt_vma_move_to_active_unlocked(vma, rq, 0); i915_vma_lock(batch); if (!err) @@ -3514,13 +3510,11 @@ static int smoke_submit(struct preempt_smoke *smoke, } if (vma) { - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); + err = igt_vma_move_to_active_unlocked(vma, rq, 0); if (!err) err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), PAGE_SIZE, 0); - i915_vma_unlock(vma); } i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index a61ae9d7e0a2..a78a3d2c2e16 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -599,9 +599,7 @@ __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) *cs++ = 0; } - i915_vma_lock(scratch); - err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(scratch); + err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE); i915_request_get(rq); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index f27cc28608d4..ca009a6a13bd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -228,9 +228,7 @@ static int check_mocs_engine(struct live_mocs *arg, if (IS_ERR(rq)) return PTR_ERR(rq); - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); /* Read the mocs tables back using SRM */ offset = i915_ggtt_offset(vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 9c5449709161..14a8b25b6204 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -138,9 +138,7 @@ read_nonprivs(struct intel_context *ce) goto err_pin; } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); if (err) goto err_req; @@ -853,9 +851,7 @@ static int read_whitelisted_registers(struct intel_context *ce, if (IS_ERR(rq)) return PTR_ERR(rq); - i915_vma_lock(results); - err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(results); + err = igt_vma_move_to_active_unlocked(results, rq, EXEC_OBJECT_WRITE); if (err) goto err_req; @@ -935,9 +931,7 @@ static int scrub_whitelisted_registers(struct intel_context *ce) goto err_request; } - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); + err = igt_vma_move_to_active_unlocked(batch, rq, 0); if (err) goto err_request; -- cgit From 3db9d590557da3aa2c952f2fecd3e9b703dad790 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 12 Dec 2022 17:13:38 +0100 Subject: drm/i915/gt: Reset twice After applying an engine reset, on some platforms like Jasperlake, we occasionally detect that the engine state is not cleared until shortly after the resume. As we try to resume the engine with volatile internal state, the first request fails with a spurious CS event (it looks like it reports a lite-restore to the hung context, instead of the expected idle->active context switch). Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Cc: Mika Kuoppala Signed-off-by: Andi Shyti Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ffde89c5835a..0bb9094fdacd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -268,6 +268,7 @@ out: static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { struct intel_uncore *uncore = gt->uncore; + int loops = 2; int err; /* @@ -275,18 +276,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) * for fifo space for the write or forcewake the chip for * the read */ - intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + do { + intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(uncore, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); + /* + * Wait for the device to ack the reset requests. + * + * On some platforms, e.g. Jasperlake, we see that the + * engine register state is not cleared until shortly after + * GDRST reports completion, causing a failure as we try + * to immediately resume while the internal state is still + * in flux. If we immediately repeat the reset, the second + * reset appears to serialise with the first, and since + * it is a no-op, the registers should retain their reset + * value. However, there is still a concern that upon + * leaving the second reset, the internal engine state + * is still in flux and not ready for resuming. + */ + err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, + hw_domain_mask, 0, + 2000, 0, + NULL); + } while (err == 0 && --loops); if (err) GT_TRACE(gt, "Wait for 0x%08x engines reset failed\n", hw_domain_mask); + /* + * As we have observed that the engine state is still volatile + * after GDRST is acked, impose a small delay to let everything settle. + */ + udelay(50); + return err; } -- cgit From 10903b0a0f4d4964b352fa3df12d3d2ef5fb7a3b Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 5 Jan 2023 10:37:01 -0300 Subject: drm/i915/gt: Cover rest of SVG unit MCR registers CHICKEN_RASTER_{1,2} got overlooked with the move done in commit a9e69428b1b4 ("drm/i915: Define MCR registers explicitly"). Registers from the SVG unit became multicast as of Xe_HP graphics. BSpec: 66534 Fixes: a9e69428b1b4 ("drm/i915: Define MCR registers explicitly") Signed-off-by: Gustavo Sousa Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230105133701.19556-1-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d3357853be12..4f5c06d60bcd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -407,10 +407,10 @@ #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) -#define CHICKEN_RASTER_1 _MMIO(0x6204) +#define CHICKEN_RASTER_1 MCR_REG(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) -#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) #define VFLSKPD MCR_REG(0x62a8) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 002ba7c2b1ed..6dacd0dc5c2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -645,7 +645,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); + wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_mcr_add(wal, @@ -780,7 +780,7 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); /* Wa_15010599737:dg2 */ - wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); -- cgit From 3e95263dfb52d5e5b60fbdda51094c788c446f07 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Jan 2023 11:35:33 +0000 Subject: drm/i915: Do not cover all future platforms in TLB invalidation Revert to the original explicit approach and document the reasoning behind it. v2: * DG2 needs to be covered too. (Matt) v3: * Full version check for Gen12 to avoid catching all future platforms. (Matt) v4: * Be totally explicit on the Gen12 branch. (Andrzej) Signed-off-by: Tvrtko Ursulin Cc: Matt Roper Cc: Balasubramani Vivekanandan Cc: Andrzej Hajda Reviewed-by: Andrzej Hajda # v1 Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230110113533.744436-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7eeee5a7cb33..5721bf85d119 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1070,10 +1070,23 @@ static void mmio_invalidate_full(struct intel_gt *gt) unsigned int num = 0; unsigned long flags; - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + /* + * New platforms should not be added with catch-all-newer (>=) + * condition so that any later platform added triggers the below warning + * and in turn mandates a human cross-check of whether the invalidation + * flows have compatible semantics. + * + * For instance with the 11.00 -> 12.00 transition three out of five + * respective engine registers were moved to masked type. Then after the + * 12.00 -> 12.50 transition multi cast handling is required too. + */ + + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { regs = NULL; num = ARRAY_SIZE(xehp_regs); - } else if (GRAPHICS_VER(i915) == 12) { + } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) { regs = gen12_regs; num = ARRAY_SIZE(gen12_regs); } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { -- cgit From f7a79bdc4e672092f111efc917668706c93b535f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 10 Jan 2023 10:44:53 -0600 Subject: drm/i915/guc: Replace zero-length arrays with flexible-array members Zero-length arrays are deprecated[1] and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length arrays in a couple of structures (three, actually) with flex-array members. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [2]. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays [1] Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [2] Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/Y72WBTUmh9r1lvKN@work --- drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h index 3624abfd22d1..9d589c28f40f 100644 --- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -73,7 +73,7 @@ struct guc_debug_capture_list_header { struct guc_debug_capture_list { struct guc_debug_capture_list_header header; - struct guc_mmio_reg regs[0]; + struct guc_mmio_reg regs[]; } __packed; /** @@ -125,7 +125,7 @@ struct guc_state_capture_header_t { struct guc_state_capture_t { struct guc_state_capture_header_t header; - struct guc_mmio_reg mmio_entries[0]; + struct guc_mmio_reg mmio_entries[]; } __packed; enum guc_capture_group_types { @@ -145,7 +145,7 @@ struct guc_state_capture_group_header_t { /* this is the top level structure where an error-capture dump starts */ struct guc_state_capture_group_t { struct guc_state_capture_group_header_t grp_header; - struct guc_state_capture_t capture_entries[0]; + struct guc_state_capture_t capture_entries[]; } __packed; /** -- cgit From b762787bf767e8bd09c525a178e2259a47f71342 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Wed, 21 Dec 2022 09:49:01 -0800 Subject: drm/i915/pxp: Use drm_dbg if arb session failed due to fw version If PXP arb-session is being attempted on older hardware SKUs or on hardware with older, unsupported, firmware versions, then don't report the failure with a drm_error. Instead, look specifically for the API-version error reply and drm_dbg that reply. In this case, the user-space will eventually get a -ENODEV for the protected context creation which is the correct behavior and we don't create unnecessary drm_error's in our dmesg (for what is unsupported platforms). Changes from prio revs: v2 : - remove unnecessary newline. (Jani) v1 : - print incorrect version from input packet, not output. Signed-off-by: Alan Previn Reviewed-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20221221174901.2703954-1-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h | 1 + drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h index c2f23394f9b8..aaa8187a0afb 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_cmn.h @@ -17,6 +17,7 @@ */ enum pxp_status { PXP_STATUS_SUCCESS = 0x0, + PXP_STATUS_ERROR_API_VERSION = 0x1002, PXP_STATUS_OP_NOT_PERMITTED = 0x4013 }; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index d50354bfb993..73aa8015f828 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -298,6 +298,10 @@ int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp, if (ret) drm_err(&i915->drm, "Failed to send tee msg ret=[%d]\n", ret); + else if (msg_out.header.status == PXP_STATUS_ERROR_API_VERSION) + drm_dbg(&i915->drm, "PXP firmware version unsupported, requested: " + "CMD-ID-[0x%08x] on API-Ver-[0x%08x]\n", + msg_in.header.command_id, msg_in.header.api_version); else if (msg_out.header.status != 0x0) drm_warn(&i915->drm, "PXP firmware failed arb session init request ret=[0x%08x]\n", msg_out.header.status); -- cgit From 0c8a6e9ea232c221976a0670256bd861408d9917 Mon Sep 17 00:00:00 2001 From: Sasa Dragic Date: Mon, 19 Dec 2022 18:29:27 +0100 Subject: drm/i915: re-disable RC6p on Sandy Bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RC6p on Sandy Bridge got re-enabled over time, causing visual glitches and GPU hangs. Disabled originally in commit 1c8ecf80fdee ("drm/i915: do not enable RC6p on Sandy Bridge"). Signed-off-by: Sasa Dragic Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221219172927.9603-2-sasa.dragic@gmail.com Fixes: fb6db0f5bf1d ("drm/i915: Remove unsafe i915.enable_rc6") Fixes: 13c5a577b342 ("drm/i915/gt: Select the deepest available parking mode for rc6") Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 46acbf390195..be700413199e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -423,7 +423,8 @@ static const struct intel_device_info ilk_m_info = { .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ - .has_rc6p = 1, \ + /* snb does support rc6p, but enabling it causes various issues */ \ + .has_rc6p = 0, \ .has_rps = true, \ .dma_mask_size = 40, \ .__runtime.ppgtt_type = INTEL_PPGTT_ALIASING, \ -- cgit From 67804e48b4945e109b54b4bf5a75f5cad3e2dca0 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 11 Jan 2023 12:04:29 -0800 Subject: drm/i915/gt: Start adding module oriented dmesg output When trying to analyse bug reports from CI, customers, etc. it can be difficult to work out exactly what is happening on which GT in a multi-GT system. So add GT oriented debug/error message wrappers. If used instead of the drm_ equivalents, you get the same output but with a GT# prefix on it. v2: Go back to using lower case names (combined review feedback). Convert intel_gt.c as a first step. v3: Add gt_err_ratelimited() as well, undo one conversation that might not have a GT pointer in some scenarios (review feedback from Michal W). Split definitions into separate header (review feedback from Jani). Convert all intel_gt*.c files. v4: Re-order some macro definitions (Andi S), update (c) date (Tvrtko) Signed-off-by: John Harrison Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230111200429.2139084-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 96 ++++++++++++-------------- drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c | 8 +-- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 9 ++- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 9 ++- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 9 ++- drivers/gpu/drm/i915/gt/intel_gt_print.h | 51 ++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 4 +- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 34 +++------ drivers/gpu/drm/i915/gt/intel_gtt.c | 7 +- 9 files changed, 129 insertions(+), 98 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_print.h diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 5721bf85d119..f0dbfc434e07 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -22,6 +22,7 @@ #include "intel_gt_debugfs.h" #include "intel_gt_mcr.h" #include "intel_gt_pm.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_gt_requests.h" #include "intel_migrate.h" @@ -89,9 +90,8 @@ static int intel_gt_probe_lmem(struct intel_gt *gt) if (err == -ENODEV) return 0; - drm_err(&i915->drm, - "Failed to setup region(%d) type=%d\n", - err, INTEL_MEMORY_LOCAL); + gt_err(gt, "Failed to setup region(%d) type=%d\n", + err, INTEL_MEMORY_LOCAL); return err; } @@ -200,14 +200,14 @@ int intel_gt_init_hw(struct intel_gt *gt) ret = i915_ppgtt_init_hw(gt); if (ret) { - drm_err(&i915->drm, "Enabling PPGTT failed (%d)\n", ret); + gt_err(gt, "Enabling PPGTT failed (%d)\n", ret); goto out; } /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(>->uc); if (ret) { - i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); + gt_probe_error(gt, "Enabling uc failed (%d)\n", ret); goto out; } @@ -257,7 +257,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, * some errors might have become stuck, * mask them. */ - drm_dbg(>->i915->drm, "EIR stuck: 0x%08x, masking\n", eir); + gt_dbg(gt, "EIR stuck: 0x%08x, masking\n", eir); intel_uncore_rmw(uncore, EMR, 0, eir); intel_uncore_write(uncore, GEN2_IIR, I915_MASTER_ERROR_INTERRUPT); @@ -291,16 +291,16 @@ static void gen6_check_faults(struct intel_gt *gt) for_each_engine(engine, gt, id) { fault = GEN6_RING_FAULT_REG_READ(engine); if (fault & RING_FAULT_VALID) { - drm_dbg(&engine->i915->drm, "Unexpected fault\n" - "\tAddr: 0x%08lx\n" - "\tAddress space: %s\n" - "\tSource ID: %d\n" - "\tType: %d\n", - fault & PAGE_MASK, - fault & RING_FAULT_GTTSEL_MASK ? - "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + gt_dbg(gt, "Unexpected fault\n" + "\tAddr: 0x%08lx\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? + "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); } } } @@ -327,17 +327,17 @@ static void xehp_check_faults(struct intel_gt *gt) fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | ((u64)fault_data0 << 12); - drm_dbg(>->i915->drm, "Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + gt_dbg(gt, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); } } @@ -368,17 +368,17 @@ static void gen8_check_faults(struct intel_gt *gt) fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | ((u64)fault_data0 << 12); - drm_dbg(&uncore->i915->drm, "Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + gt_dbg(gt, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); } } @@ -472,7 +472,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) { - drm_err(&i915->drm, "Failed to allocate scratch page\n"); + gt_err(gt, "Failed to allocate scratch page\n"); return PTR_ERR(obj); } @@ -727,8 +727,7 @@ int intel_gt_init(struct intel_gt *gt) err = intel_gt_init_hwconfig(gt); if (err) - drm_err(>->i915->drm, "Failed to retrieve hwconfig table: %pe\n", - ERR_PTR(err)); + gt_err(gt, "Failed to retrieve hwconfig table: %pe\n", ERR_PTR(err)); err = __engines_record_defaults(gt); if (err) @@ -885,7 +884,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) gt->name = "Primary GT"; gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; - drm_dbg(&i915->drm, "Setting up %s\n", gt->name); + gt_dbg(gt, "Setting up %s\n", gt->name); ret = intel_gt_tile_setup(gt, phys_addr); if (ret) return ret; @@ -910,7 +909,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) gt->info.engine_mask = gtdef->engine_mask; gt->info.id = i; - drm_dbg(&i915->drm, "Setting up %s\n", gt->name); + gt_dbg(gt, "Setting up %s\n", gt->name); if (GEM_WARN_ON(range_overflows_t(resource_size_t, gtdef->mapping_base, SZ_16M, @@ -998,8 +997,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, const unsigned int class = engine->class; struct reg_and_bit rb = { }; - if (drm_WARN_ON_ONCE(&engine->i915->drm, - class >= num || !regs[class].reg)) + if (gt_WARN_ON_ONCE(engine->gt, class >= num || !regs[class].reg)) return rb; rb.reg = regs[class]; @@ -1096,8 +1094,7 @@ static void mmio_invalidate_full(struct intel_gt *gt) return; } - if (drm_WARN_ONCE(&i915->drm, !num, - "Platform does not implement TLB invalidation!")) + if (gt_WARN_ONCE(gt, !num, "Platform does not implement TLB invalidation!")) return; intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); @@ -1162,9 +1159,8 @@ static void mmio_invalidate_full(struct intel_gt *gt) } if (wait_for_invalidate(gt, rb)) - drm_err_ratelimited(>->i915->drm, - "%s TLB invalidation did not complete in %ums!\n", - engine->name, TLB_INVAL_TIMEOUT_MS); + gt_err_ratelimited(gt, "%s TLB invalidation did not complete in %ums!\n", + engine->name, TLB_INVAL_TIMEOUT_MS); } /* diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 2a6a4ca7fdad..7c9be4fd1c8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -7,6 +7,7 @@ #include "i915_reg.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" static u32 read_reference_ts_freq(struct intel_uncore *uncore) @@ -193,10 +194,9 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt) void intel_gt_check_clock_frequency(const struct intel_gt *gt) { if (gt->clock_frequency != read_clock_frequency(gt->uncore)) { - dev_err(gt->i915->drm.dev, - "GT clock frequency changed, was %uHz, now %uHz!\n", - gt->clock_frequency, - read_clock_frequency(gt->uncore)); + gt_err(gt, "GT clock frequency changed, was %uHz, now %uHz!\n", + gt->clock_frequency, + read_clock_frequency(gt->uncore)); } } #endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 8fac2660e16b..1b25a6039152 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -10,6 +10,7 @@ #include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_irq.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_uncore.h" #include "intel_rps.h" @@ -47,9 +48,8 @@ gen11_gt_engine_identity(struct intel_gt *gt, !time_after32(local_clock() >> 10, timeout_ts)); if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { - drm_err(>->i915->drm, - "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", - bank, bit, ident); + gt_err(gt, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); return 0; } @@ -378,8 +378,7 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | GT_CS_MASTER_ERROR_INTERRUPT)) - drm_dbg(>->i915->drm, "Command parser error, gt_iir 0x%08x\n", - gt_iir); + gt_dbg(gt, "Command parser error, gt_iir 0x%08x\n", gt_iir); if (gt_iir & GT_PARITY_ERROR(gt->i915)) gen7_parity_error_irq_handler(gt, gt_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 4127830c33ca..169393a7ad88 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_gt_mcr.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" /** @@ -158,7 +159,7 @@ void intel_gt_mcr_init(struct intel_gt *gt) GEN12_MEML3_EN_MASK); if (!gt->info.mslice_mask) /* should be impossible! */ - drm_warn(&i915->drm, "mslice mask all zero!\n"); + gt_warn(gt, "mslice mask all zero!\n"); } if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) { @@ -205,7 +206,7 @@ void intel_gt_mcr_init(struct intel_gt *gt) ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN10_L3BANK_MASK; if (!gt->info.l3bank_mask) /* should be impossible! */ - drm_warn(&i915->drm, "L3 bank mask is all zero!\n"); + gt_warn(gt, "L3 bank mask is all zero!\n"); } else if (GRAPHICS_VER(i915) >= 11) { /* * We expect all modern platforms to have at least some @@ -394,9 +395,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) * releasing it properly. */ if (err == -ETIMEDOUT) { - drm_err_ratelimited(>->i915->drm, - "GT%u hardware MCR steering semaphore timed out", - gt->info.id); + gt_err_ratelimited(gt, "hardware MCR steering semaphore timed out"); add_taint_for_CI(gt->i915, TAINT_WARN); /* CI is now unreliable */ } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index c065950d0bad..cef3d6f5c34e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -14,6 +14,7 @@ #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" +#include "intel_gt_print.h" #include "intel_gt_requests.h" #include "intel_llc.h" #include "intel_pm.h" @@ -275,8 +276,7 @@ int intel_gt_resume(struct intel_gt *gt) /* Only when the HW is re-initialised, can we replay the requests */ err = intel_gt_init_hw(gt); if (err) { - i915_probe_error(gt->i915, - "Failed to initialize GPU, declaring it wedged!\n"); + gt_probe_error(gt, "Failed to initialize GPU, declaring it wedged!\n"); goto err_wedged; } @@ -293,9 +293,8 @@ int intel_gt_resume(struct intel_gt *gt) intel_engine_pm_put(engine); if (err) { - drm_err(>->i915->drm, - "Failed to restart %s (%d)\n", - engine->name, err); + gt_err(gt, "Failed to restart %s (%d)\n", + engine->name, err); goto err_wedged; } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_print.h b/drivers/gpu/drm/i915/gt/intel_gt_print.h new file mode 100644 index 000000000000..5d9da355ce24 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_print.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_GT_PRINT__ +#define __INTEL_GT_PRINT__ + +#include +#include "intel_gt_types.h" +#include "i915_utils.h" + +#define gt_err(_gt, _fmt, ...) \ + drm_err(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_warn(_gt, _fmt, ...) \ + drm_warn(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_notice(_gt, _fmt, ...) \ + drm_notice(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_info(_gt, _fmt, ...) \ + drm_info(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_dbg(_gt, _fmt, ...) \ + drm_dbg(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_err_ratelimited(_gt, _fmt, ...) \ + drm_err_ratelimited(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_probe_error(_gt, _fmt, ...) \ + do { \ + if (i915_error_injected()) \ + gt_dbg(_gt, _fmt, ##__VA_ARGS__); \ + else \ + gt_err(_gt, _fmt, ##__VA_ARGS__); \ + } while (0) + +#define gt_WARN(_gt, _condition, _fmt, ...) \ + drm_WARN(&(_gt)->i915->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ + drm_WARN_ONCE(&(_gt)->i915->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define gt_WARN_ON(_gt, _condition) \ + gt_WARN(_gt, _condition, "%s", "gt_WARN_ON(" __stringify(_condition) ")") + +#define gt_WARN_ON_ONCE(_gt, _condition) \ + gt_WARN_ONCE(_gt, _condition, "%s", "gt_WARN_ONCE(" __stringify(_condition) ")") + +#endif /* __INTEL_GT_PRINT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index 9486dd3bed99..6629e4c72b6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -12,6 +12,7 @@ #include "i915_drv.h" #include "i915_sysfs.h" #include "intel_gt.h" +#include "intel_gt_print.h" #include "intel_gt_sysfs.h" #include "intel_gt_sysfs_pm.h" #include "intel_gt_types.h" @@ -105,8 +106,7 @@ void intel_gt_sysfs_register(struct intel_gt *gt) exit_fail: kobject_put(>->sysfs_gt); - drm_warn(>->i915->drm, - "failed to initialize gt%d sysfs root\n", gt->info.id); + gt_warn(gt, "failed to initialize sysfs root\n"); } void intel_gt_sysfs_unregister(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index cf71305ad586..e9369aa945eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -11,6 +11,7 @@ #include "i915_reg.h" #include "i915_sysfs.h" #include "intel_gt.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_gt_sysfs.h" #include "intel_gt_sysfs_pm.h" @@ -305,9 +306,7 @@ static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj) ret = __intel_gt_sysfs_create_group(kobj, rc6_attr_group); if (ret) - drm_warn(>->i915->drm, - "failed to create gt%u RC6 sysfs files (%pe)\n", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create RC6 sysfs files (%pe)\n", ERR_PTR(ret)); /* * cannot use the is_visible() attribute because @@ -316,17 +315,13 @@ static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj) if (HAS_RC6p(gt->i915)) { ret = __intel_gt_sysfs_create_group(kobj, rc6p_attr_group); if (ret) - drm_warn(>->i915->drm, - "failed to create gt%u RC6p sysfs files (%pe)\n", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create RC6p sysfs files (%pe)\n", ERR_PTR(ret)); } if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) { ret = __intel_gt_sysfs_create_group(kobj, media_rc6_attr_group); if (ret) - drm_warn(>->i915->drm, - "failed to create media %u RC6 sysfs files (%pe)\n", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create media RC6 sysfs files (%pe)\n", ERR_PTR(ret)); } } #else @@ -745,9 +740,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) ret = intel_sysfs_rps_init(gt, kobj); if (ret) - drm_warn(>->i915->drm, - "failed to create gt%u RPS sysfs files (%pe)", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create RPS sysfs files (%pe)", ERR_PTR(ret)); /* end of the legacy interfaces */ if (!is_object_gt(kobj)) @@ -755,29 +748,22 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr); if (ret) - drm_warn(>->i915->drm, - "failed to create gt%u punit_req_freq_mhz sysfs (%pe)", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create punit_req_freq_mhz sysfs (%pe)", ERR_PTR(ret)); if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) - drm_warn(>->i915->drm, - "failed to create gt%u throttle sysfs files (%pe)", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create throttle sysfs files (%pe)", ERR_PTR(ret)); } if (HAS_MEDIA_RATIO_MODE(gt->i915) && intel_uc_uses_guc_slpc(>->uc)) { ret = sysfs_create_files(kobj, media_perf_power_attrs); if (ret) - drm_warn(>->i915->drm, - "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to create media_perf_power_attrs sysfs (%pe)\n", + ERR_PTR(ret)); } ret = sysfs_create_files(gt->sysfs_defaults, rps_defaults_attrs); if (ret) - drm_warn(>->i915->drm, - "failed to add gt%u rps defaults (%pe)\n", - gt->info.id, ERR_PTR(ret)); + gt_warn(gt, "failed to add rps defaults (%pe)\n", ERR_PTR(ret)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e37164a60d37..4f436ba7a3c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -17,6 +17,7 @@ #include "i915_utils.h" #include "intel_gt.h" #include "intel_gt_mcr.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_gtt.h" @@ -461,9 +462,9 @@ void gtt_write_workarounds(struct intel_gt *gt) intel_uncore_write(uncore, HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); - drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && - intel_uncore_read(uncore, - HSW_GTT_CACHE_EN) == 0); + gt_WARN_ON_ONCE(gt, can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); } } -- cgit From 14ec40a88210151296fff3e981c1a7196ad9bf55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Jan 2023 13:32:34 +0100 Subject: drm/i915/selftests: Unwind hugepages to drop wakeref on error Make sure that upon error after we have acquired the wakeref we do release it again. v2: add another missing "goto out_wf"(Andi). Fixes: 027c38b4121e ("drm/i915/selftests: Grab the runtime pm in shrink_thp") Cc: Andi Shyti Reviewed-by: Matthew Auld Reviewed-by: Andrzej Hajda Signed-off-by: Chris Wilson Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Nirmoy Das Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230117123234.26487-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c281b0ec9e05..defece0bcb81 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1855,7 +1855,7 @@ static int igt_shrink_thp(void *arg) I915_SHRINK_ACTIVE); i915_vma_unpin(vma); if (err) - goto out_put; + goto out_wf; /* * Now that the pages are *unpinned* shrinking should invoke @@ -1871,19 +1871,19 @@ static int igt_shrink_thp(void *arg) pr_err("unexpected pages mismatch, should_swap=%s\n", str_yes_no(should_swap)); err = -EINVAL; - goto out_put; + goto out_wf; } if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) { pr_err("unexpected residual page-size bits, should_swap=%s\n", str_yes_no(should_swap)); err = -EINVAL; - goto out_put; + goto out_wf; } err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_put; + goto out_wf; while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); -- cgit From 378e04f7cb24aad124a8e55e7a36b689fb63ac17 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 17 Jan 2023 14:38:56 +0200 Subject: drm/i915: remove a couple of superfluous i915_drm.h includes Remove a couple of unnecessary includes. Signed-off-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230117123856.2271720-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c | 1 - drivers/gpu/drm/i915/pxp/intel_pxp_huc.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c index 4e2163a1aa46..0e3630103693 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c @@ -6,7 +6,6 @@ #include "intel_ggtt_gmch.h" #include -#include #include diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c index 812c8bc7005e..64609d1b1c0f 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c @@ -3,8 +3,6 @@ * Copyright(c) 2021-2022, Intel Corporation. All rights reserved. */ -#include - #include "i915_drv.h" #include "gem/i915_gem_region.h" -- cgit