drm/amdgpu: add sdma 4_x interrupts printing
authorFeifei Xu <Feifei.Xu@amd.com>
Wed, 3 Mar 2021 10:42:40 +0000 (18:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 5 Mar 2021 20:12:23 +0000 (15:12 -0500)
Add VM_HOLE/DOORBELL_INVALID_BE/POLL_TIMEOUT/SRBMWRITE
interrupt info printing.

Signed-off-by: Feifei Xu <Feifei.Xu@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

index e5b8fb8..f8fb755 100644 (file)
@@ -64,6 +64,11 @@ struct amdgpu_sdma {
        struct amdgpu_irq_src   trap_irq;
        struct amdgpu_irq_src   illegal_inst_irq;
        struct amdgpu_irq_src   ecc_irq;
+       struct amdgpu_irq_src   vm_hole_irq;
+       struct amdgpu_irq_src   doorbell_invalid_irq;
+       struct amdgpu_irq_src   pool_timeout_irq;
+       struct amdgpu_irq_src   srbm_write_irq;
+
        int                     num_instances;
        uint32_t                    srbm_soft_reset;
        bool                    has_page_queue;
index fdc0c81..bfdd676 100644 (file)
@@ -1895,6 +1895,33 @@ static int sdma_v4_0_sw_init(void *handle)
                        return r;
        }
 
+       /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+                                     SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+                                     &adev->sdma.vm_hole_irq);
+               if (r)
+                       return r;
+
+               r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+                                     SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+                                     &adev->sdma.doorbell_invalid_irq);
+               if (r)
+                       return r;
+
+               r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+                                     SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+                                     &adev->sdma.pool_timeout_irq);
+               if (r)
+                       return r;
+
+               r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+                                     SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+                                     &adev->sdma.srbm_write_irq);
+               if (r)
+                       return r;
+       }
+
        for (i = 0; i < adev->sdma.num_instances; i++) {
                ring = &adev->sdma.instance[i].ring;
                ring->ring_obj = NULL;
@@ -2149,6 +2176,72 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
        return 0;
 }
 
+static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
+                                             struct amdgpu_iv_entry *entry)
+{
+       int instance;
+       struct amdgpu_task_info task_info;
+       u64 addr;
+
+       instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+       if (instance < 0 || instance >= adev->sdma.num_instances) {
+               dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+               return -EINVAL;
+       }
+
+       addr = (u64)entry->src_data[0] << 12;
+       addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+       memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+       amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+       dev_info(adev->dev,
+                  "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
+                  "pasid:%u, for process %s pid %d thread %s pid %d\n",
+                  instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+                  entry->pasid, task_info.process_name, task_info.tgid,
+                  task_info.task_name, task_info.pid);
+       return 0;
+}
+
+static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
+                                             struct amdgpu_irq_src *source,
+                                             struct amdgpu_iv_entry *entry)
+{
+       dev_err(adev->dev, "MC or SEM address in VM hole\n");
+       sdma_v4_0_print_iv_entry(adev, entry);
+       return 0;
+}
+
+static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+                                             struct amdgpu_irq_src *source,
+                                             struct amdgpu_iv_entry *entry)
+{
+       dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+       sdma_v4_0_print_iv_entry(adev, entry);
+       return 0;
+}
+
+static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
+                                             struct amdgpu_irq_src *source,
+                                             struct amdgpu_iv_entry *entry)
+{
+       dev_err(adev->dev,
+               "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+       sdma_v4_0_print_iv_entry(adev, entry);
+       return 0;
+}
+
+static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
+                                             struct amdgpu_irq_src *source,
+                                             struct amdgpu_iv_entry *entry)
+{
+       dev_err(adev->dev,
+               "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
+       sdma_v4_0_print_iv_entry(adev, entry);
+       return 0;
+}
+
 static void sdma_v4_0_update_medium_grain_clock_gating(
                struct amdgpu_device *adev,
                bool enable)
@@ -2454,7 +2547,21 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
        .process = amdgpu_sdma_process_ecc_irq,
 };
 
+static const struct amdgpu_irq_src_funcs sdma_v4_0_vm_hole_irq_funcs = {
+       .process = sdma_v4_0_process_vm_hole_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_doorbell_invalid_irq_funcs = {
+       .process = sdma_v4_0_process_doorbell_invalid_irq,
+};
 
+static const struct amdgpu_irq_src_funcs sdma_v4_0_pool_timeout_irq_funcs = {
+       .process = sdma_v4_0_process_pool_timeout_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
+       .process = sdma_v4_0_process_srbm_write_irq,
+};
 
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
 {
@@ -2466,6 +2573,10 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
        case 8:
                adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
                adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+               adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
+               adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+               adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+               adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
                break;
        case 2:
        default:
@@ -2476,6 +2587,10 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
        adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
        adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
        adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
+       adev->sdma.vm_hole_irq.funcs = &sdma_v4_0_vm_hole_irq_funcs;
+       adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_0_doorbell_invalid_irq_funcs;
+       adev->sdma.pool_timeout_irq.funcs = &sdma_v4_0_pool_timeout_irq_funcs;
+       adev->sdma.srbm_write_irq.funcs = &sdma_v4_0_srbm_write_irq_funcs;
 }
 
 /**