diff options
author | YiPeng Chai <YiPeng.Chai@amd.com> | 2023-01-05 14:53:59 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-01-17 16:11:51 -0500 |
commit | ae6f2db4d59e9f8c90cb3c2d2a954832898d0f2b (patch) | |
tree | 52979fe532fff5df4e608bedb7f427c72b15f465 | |
parent | 79d949a2d688b09b1ff840a2366a8cfe1b7a2651 (diff) | |
download | linux-ae6f2db4d59e9f8c90cb3c2d2a954832898d0f2b.tar.gz |
drm/amdgpu: Add gfx ras poison consumption irq handling on gfx v11_0_3
Add gfx ras poison consumption irq handling on gfx v11_0_3.
V2:
Move ras poison consumption irq handling code of gfx
v11_0_3 to gfx_v11_0_3.c.
V5:
Create dedicated irq handler for RLC_GC_FED_INTERRUPT.
V6:
Remove invalid function call.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 50 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h | 2 |
4 files changed, 79 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 6b26597217ed..0b39fe3cd624 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -210,6 +210,9 @@ struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); + int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); }; struct amdgpu_gfx_funcs { @@ -323,6 +326,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src priv_inst_irq; struct amdgpu_irq_src cp_ecc_error_irq; struct amdgpu_irq_src sq_irq; + struct amdgpu_irq_src rlc_gc_fed_irq; struct sq_work sq_work; /* gfx status */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 37002f8cf2da..9ddc91e70bf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1301,6 +1301,13 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; + /* FED error */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, + &adev->gfx.rlc_gc_fed_irq); + if (r) + return r; + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; if (adev->gfx.imu.funcs) { @@ -5992,6 +5999,16 @@ static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) + return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); + + return 0; +} + #if 0 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, @@ -6222,6 +6239,10 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { .process = gfx_v11_0_priv_inst_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { + .process = gfx_v11_0_rlc_gc_fed_irq, +}; + static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -6232,6 +6253,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; + + adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ + adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; } static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 5966d984a30a..a18e09de31dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -22,6 +22,54 @@ */ #include "amdgpu.h" +#include "soc21.h" +#include "gc/gc_11_0_3_offset.h" +#include "gc/gc_11_0_3_sh_mask.h" +#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" +#include "soc15.h" +#include "soc15d.h" +#include "gfx_v11_0.h" -struct amdgpu_gfx_ras gfx_v11_0_3_ras; +static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t rlc_status0 = 0, rlc_status1 = 0; + struct ras_common_if *ras_if = NULL; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0)); + rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1)); + + if (!rlc_status0 && !rlc_status1) { + dev_warn(adev->dev, "RLC_GC_FED irq is generated, but rlc_status0 and rlc_status1 are empty!\n"); + return 0; + } + + /* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */ + if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) || + REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) + ras_if = adev->sdma.ras_if; + else + ras_if = adev->gfx.ras_if; + + if (!ras_if) { + dev_err(adev->dev, "Gfx or sdma ras block not initialized, rlc_status0:0x%x.\n", + rlc_status0); + return -EINVAL; + } + + ih_data.head = *ras_if; + + dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + + return 0; +} + +struct amdgpu_gfx_ras gfx_v11_0_3_ras = { + .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq, +}; diff --git a/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h index 9e8ed9f4bb15..3a4670bc4449 100644 --- a/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h +++ b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h @@ -49,6 +49,8 @@ #define GFX_11_0_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT 65 // 0x41 GPF(Sem incomplete timeout) #define GFX_11_0_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT 66 // 0x42 Semaphore wait fail timeout +#define GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT 128 // 0x80 FED Interrupt (for data poisoning) + #define GFX_11_0_0__SRCID__CP_GENERIC_INT 177 // 0xB1 CP_GENERIC int #define GFX_11_0_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR 180 // 0xB4 PM4 Pkt Rsvd Bits Error #define GFX_11_0_0__SRCID__CP_EOP_INTERRUPT 181 // 0xB5 End-of-Pipe Interrupt |