drm/amdgpu: initialze ras caps per paltform config
authorHawking Zhang <Hawking.Zhang@amd.com>
Mon, 8 Mar 2021 08:40:07 +0000 (16:40 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Apr 2021 20:50:48 +0000 (16:50 -0400)
Driver only manages GFX/SDMA/MMHUB RAS in platforms
that gpu node is connected to cpu through XGMI, other
than that, it queries VBIOS for RAS capabilities.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: John Clements <John.Clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 2645894..1708045 100644 (file)
@@ -1936,6 +1936,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
        return adev->asic_type == CHIP_VEGA10 ||
                adev->asic_type == CHIP_VEGA20 ||
                adev->asic_type == CHIP_ARCTURUS ||
+               adev->asic_type == CHIP_ALDEBARAN ||
                adev->asic_type == CHIP_SIENNA_CICHLID;
 }
 
@@ -1958,19 +1959,29 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
            !amdgpu_ras_asic_supported(adev))
                return;
 
-       if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
-               dev_info(adev->dev, "MEM ECC is active.\n");
-               *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
-                               1 << AMDGPU_RAS_BLOCK__DF);
-       } else
-               dev_info(adev->dev, "MEM ECC is not presented.\n");
+       if (!adev->gmc.xgmi.connected_to_cpu) {
+               if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+                       dev_info(adev->dev, "MEM ECC is active.\n");
+                       *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
+                                       1 << AMDGPU_RAS_BLOCK__DF);
+               } else {
+                       dev_info(adev->dev, "MEM ECC is not presented.\n");
+               }
 
-       if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
-               dev_info(adev->dev, "SRAM ECC is active.\n");
-               *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
-                               1 << AMDGPU_RAS_BLOCK__DF);
-       } else
-               dev_info(adev->dev, "SRAM ECC is not presented.\n");
+               if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+                       dev_info(adev->dev, "SRAM ECC is active.\n");
+                       *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+                                       1 << AMDGPU_RAS_BLOCK__DF);
+               } else {
+                       dev_info(adev->dev, "SRAM ECC is not presented.\n");
+               }
+       } else {
+               /* driver only manages a few IP blocks RAS feature
+                * when GPU is connected cpu through XGMI */
+               *hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX |
+                               1 << AMDGPU_RAS_BLOCK__SDMA |
+                               1 << AMDGPU_RAS_BLOCK__MMHUB);
+       }
 
        /* hw_supported needs to be aligned with RAS block mask. */
        *hw_supported &= AMDGPU_RAS_BLOCK_MASK;