From f464c5dd4d918d4dd84eda7e68d4a0b6d41fe37f Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 20 Mar 2023 18:21:14 +0800 Subject: [PATCH] drm/amdgpu: add check for RAS instance mask The mask is only needed to be set when RAS block instance number is more than 1 and invalid bits should be also masked out. We only check valid bits for GFX and SDMA block for now, and will add check for other RAS blocks in the future. v2: move the check under injection operation since the mask is only used by RAS error inject. v3: add valid bits handling for SDMA. v4: print message if the mask is adjusted. Signed-off-by: Tao Zhou Hawking Zhang Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b7d8250..6bb4386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -333,6 +333,42 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } +static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev, + struct ras_debug_if *data) +{ + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; + uint32_t mask, inst_mask = data->inject.instance_mask; + + /* no need to set instance mask if there is only one instance */ + if (num_xcc <= 1 && inst_mask) { + data->inject.instance_mask = 0; + dev_dbg(adev->dev, + "RAS inject mask(0x%x) isn't supported and force it to 0.\n", + inst_mask); + + return; + } + + switch (data->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + mask = GENMASK(num_xcc - 1, 0); + break; + case AMDGPU_RAS_BLOCK__SDMA: + mask = GENMASK(adev->sdma.num_instances - 1, 0); + break; + default: + mask = 0; + break; + } + + /* remove invalid bits in instance mask */ + data->inject.instance_mask &= mask; + if (inst_mask != data->inject.instance_mask) + dev_dbg(adev->dev, + "Adjust RAS inject mask 0x%x to 0x%x\n", + inst_mask, data->inject.instance_mask); +} + /** * DOC: AMDGPU RAS debugfs control interface * @@ -468,6 +504,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, break; } + amdgpu_ras_instance_mask_check(adev, &data); + /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); break; -- 2.7.4