drm/amdgpu: create mmhub ras framework
authorTao Zhou <tao.zhou1@amd.com>
Wed, 7 Aug 2019 04:21:22 +0000 (12:21 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 12 Aug 2019 17:47:48 +0000 (12:47 -0500)
enable mmhub ras feature and create sysfs/debugfs node for mmhub

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 071145a..cac2ef8 100644 (file)
@@ -177,7 +177,8 @@ struct amdgpu_gmc {
 
        struct amdgpu_xgmi xgmi;
        struct amdgpu_irq_src   ecc_irq;
-       struct ras_common_if    *ras_if;
+       struct ras_common_if    *umc_ras_if;
+       struct ras_common_if    *mmhub_ras_if;
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
index 7ca75d2..7206e55 100644 (file)
@@ -266,7 +266,7 @@ static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
                struct amdgpu_irq_src *source,
                struct amdgpu_iv_entry *entry)
 {
-       struct ras_common_if *ras_if = adev->gmc.ras_if;
+       struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
        struct ras_dispatch_if ih_data = {
                .entry = entry,
        };
@@ -740,27 +740,25 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
        return 0;
 }
 
-static int gmc_v9_0_ecc_late_init(void *handle)
+static int gmc_v9_0_ecc_ras_block_late_init(void *handle,
+                       struct ras_fs_if *fs_info, struct ras_common_if *ras_block)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       struct ras_common_if **ras_if = &adev->gmc.ras_if;
+       struct ras_common_if **ras_if = NULL;
        struct ras_ih_if ih_info = {
                .cb = gmc_v9_0_process_ras_data_cb,
        };
-       struct ras_fs_if fs_info = {
-               .sysfs_name = "umc_err_count",
-               .debugfs_name = "umc_err_inject",
-       };
-       struct ras_common_if ras_block = {
-               .block = AMDGPU_RAS_BLOCK__UMC,
-               .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-               .sub_block_index = 0,
-               .name = "umc",
-       };
        int r;
 
-       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
-               amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
+       if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
+               ras_if = &adev->gmc.umc_ras_if;
+       else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB)
+               ras_if = &adev->gmc.mmhub_ras_if;
+       else
+               BUG();
+
+       if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+               amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
                return 0;
        }
 
@@ -775,7 +773,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
                        if (r == -EAGAIN) {
                                /* request a gpu reset. will run again. */
                                amdgpu_ras_request_reset_on_boot(adev,
-                                               AMDGPU_RAS_BLOCK__UMC);
+                                               ras_block->block);
                                return 0;
                        }
                        /* fail to enable ras, cleanup all. */
@@ -789,41 +787,46 @@ static int gmc_v9_0_ecc_late_init(void *handle)
        if (!*ras_if)
                return -ENOMEM;
 
-       **ras_if = ras_block;
+       **ras_if = *ras_block;
 
        r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
        if (r) {
                if (r == -EAGAIN) {
                        amdgpu_ras_request_reset_on_boot(adev,
-                                       AMDGPU_RAS_BLOCK__UMC);
+                                       ras_block->block);
                        r = 0;
                }
                goto feature;
        }
 
        ih_info.head = **ras_if;
-       fs_info.head = **ras_if;
+       fs_info->head = **ras_if;
 
-       r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
-       if (r)
-               goto interrupt;
+       if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
+               r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
+               if (r)
+                       goto interrupt;
+       }
 
-       amdgpu_ras_debugfs_create(adev, &fs_info);
+       amdgpu_ras_debugfs_create(adev, fs_info);
 
-       r = amdgpu_ras_sysfs_create(adev, &fs_info);
+       r = amdgpu_ras_sysfs_create(adev, fs_info);
        if (r)
                goto sysfs;
 resume:
-       r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
-       if (r)
-               goto irq;
+       if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
+               r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+               if (r)
+                       goto irq;
+       }
 
        return 0;
 irq:
        amdgpu_ras_sysfs_remove(adev, *ras_if);
 sysfs:
        amdgpu_ras_debugfs_remove(adev, *ras_if);
-       amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
+       if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
+               amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
 interrupt:
        amdgpu_ras_feature_enable(adev, *ras_if, 0);
 feature:
@@ -832,6 +835,40 @@ feature:
        return r;
 }
 
+static int gmc_v9_0_ecc_late_init(void *handle)
+{
+       int r;
+
+       struct ras_fs_if umc_fs_info = {
+               .sysfs_name = "umc_err_count",
+               .debugfs_name = "umc_err_inject",
+       };
+       struct ras_common_if umc_ras_block = {
+               .block = AMDGPU_RAS_BLOCK__UMC,
+               .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+               .sub_block_index = 0,
+               .name = "umc",
+       };
+       struct ras_fs_if mmhub_fs_info = {
+               .sysfs_name = "mmhub_err_count",
+               .debugfs_name = "mmhub_err_inject",
+       };
+       struct ras_common_if mmhub_ras_block = {
+               .block = AMDGPU_RAS_BLOCK__MMHUB,
+               .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+               .sub_block_index = 0,
+               .name = "mmhub",
+       };
+
+       r = gmc_v9_0_ecc_ras_block_late_init(handle,
+                       &umc_fs_info, &umc_ras_block);
+       if (r)
+               return r;
+
+       r = gmc_v9_0_ecc_ras_block_late_init(handle,
+                       &mmhub_fs_info, &mmhub_ras_block);
+       return r;
+}
 
 static int gmc_v9_0_late_init(void *handle)
 {
@@ -1192,21 +1229,32 @@ static int gmc_v9_0_sw_fini(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
        if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
-                       adev->gmc.ras_if) {
-               struct ras_common_if *ras_if = adev->gmc.ras_if;
+                       adev->gmc.umc_ras_if) {
+               struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
                struct ras_ih_if ih_info = {
                        .head = *ras_if,
                };
 
-               /*remove fs first*/
+               /* remove fs first */
                amdgpu_ras_debugfs_remove(adev, ras_if);
                amdgpu_ras_sysfs_remove(adev, ras_if);
-               /*remove the IH*/
+               /* remove the IH */
                amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
                amdgpu_ras_feature_enable(adev, ras_if, 0);
                kfree(ras_if);
        }
 
+       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
+                       adev->gmc.mmhub_ras_if) {
+               struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if;
+
+               /* remove fs and disable ras feature */
+               amdgpu_ras_debugfs_remove(adev, ras_if);
+               amdgpu_ras_sysfs_remove(adev, ras_if);
+               amdgpu_ras_feature_enable(adev, ras_if, 0);
+               kfree(ras_if);
+       }
+
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);