drm/amdgpu: do gfxhub init for all XCDs
authorLe Ma <le.ma@amd.com>
Wed, 24 Nov 2021 09:24:58 +0000 (17:24 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 13:33:47 +0000 (09:33 -0400)
Each XCD needs to do gfxhub init

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c

index c59c6c8..79af32b 100644 (file)
@@ -43,19 +43,25 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev,
                                         uint64_t page_table_base)
 {
        struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+       int i;
 
-       WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
-                           hub->ctx_addr_distance * vmid,
-                           lower_32_bits(page_table_base));
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               WREG32_SOC15_OFFSET(GC, i,
+                                   regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+                                   hub->ctx_addr_distance * vmid,
+                                   lower_32_bits(page_table_base));
 
-       WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
-                           hub->ctx_addr_distance * vmid,
-                           upper_32_bits(page_table_base));
+               WREG32_SOC15_OFFSET(GC, i,
+                                   regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+                                   hub->ctx_addr_distance * vmid,
+                                   upper_32_bits(page_table_base));
+       }
 }
 
 static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev)
 {
        uint64_t pt_base;
+       int i;
 
        if (adev->gmc.pdb0_bo)
                pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
@@ -67,26 +73,36 @@ static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev)
        /* If use GART for FB translation, vmid0 page table covers both
         * vram and system memory (gart)
         */
-       if (adev->gmc.pdb0_bo) {
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-                               (u32)(adev->gmc.fb_start >> 12));
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-                               (u32)(adev->gmc.fb_start >> 44));
-
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-                               (u32)(adev->gmc.gart_end >> 12));
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-                               (u32)(adev->gmc.gart_end >> 44));
-       } else {
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-                               (u32)(adev->gmc.gart_start >> 12));
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-                               (u32)(adev->gmc.gart_start >> 44));
-
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-                               (u32)(adev->gmc.gart_end >> 12));
-               WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-                               (u32)(adev->gmc.gart_end >> 44));
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               if (adev->gmc.pdb0_bo) {
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+                                    (u32)(adev->gmc.fb_start >> 12));
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+                                    (u32)(adev->gmc.fb_start >> 44));
+
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+                                    (u32)(adev->gmc.gart_end >> 12));
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+                                    (u32)(adev->gmc.gart_end >> 44));
+               } else {
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+                                    (u32)(adev->gmc.gart_start >> 12));
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+                                    (u32)(adev->gmc.gart_start >> 44));
+
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+                                    (u32)(adev->gmc.gart_end >> 12));
+                       WREG32_SOC15(GC, i,
+                                    regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+                                    (u32)(adev->gmc.gart_end >> 44));
+               }
        }
 }
 
@@ -94,160 +110,183 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev)
 {
        uint64_t value;
        uint32_t tmp;
+       int i;
 
-       /* Program the AGP BAR */
-       WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_BASE, 0);
-       WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
-       WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
-       if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
-               /* Program the system aperture low logical page number. */
-               WREG32_SOC15_RLC(GC, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-                       min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-
-               if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-                       /*
-                       * Raven2 has a HW issue that it is unable to use the
-                       * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
-                       * So here is the workaround that increase system
-                       * aperture high address (add 1) to get rid of the VM
-                       * fault and hardware hang.
-                       */
-                       WREG32_SOC15_RLC(GC, 0,
-                                        regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-                                        max((adev->gmc.fb_end >> 18) + 0x1,
-                                            adev->gmc.agp_end >> 18));
-               else
-                       WREG32_SOC15_RLC(GC, 0,
-                               regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-                               max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
-
-               /* Set default page address. */
-               value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
-               WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
-                            (u32)(value >> 12));
-               WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
-                            (u32)(value >> 44));
-
-               /* Program "protection fault". */
-               WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
-                            (u32)(adev->dummy_page_addr >> 12));
-               WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
-                            (u32)((u64)adev->dummy_page_addr >> 44));
-
-               tmp = RREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL2);
-               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
-                                   ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
-               WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
-       }
-
-       /* In the case squeezing vram into GART aperture, we don't use
-        * FB aperture and AGP aperture. Disable them.
-        */
-       if (adev->gmc.pdb0_bo) {
-               WREG32_SOC15(GC, 0, regMC_VM_FB_LOCATION_TOP, 0);
-               WREG32_SOC15(GC, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
-               WREG32_SOC15(GC, 0, regMC_VM_AGP_TOP, 0);
-               WREG32_SOC15(GC, 0, regMC_VM_AGP_BOT, 0xFFFFFF);
-               WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
-               WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               /* Program the AGP BAR */
+               WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BASE, 0);
+               WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+               WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+               if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+                       /* Program the system aperture low logical page number. */
+                       WREG32_SOC15_RLC(GC, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+                               min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+                       if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+                               /*
+                               * Raven2 has a HW issue that it is unable to use the
+                               * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+                               * So here is the workaround that increase system
+                               * aperture high address (add 1) to get rid of the VM
+                               * fault and hardware hang.
+                               */
+                               WREG32_SOC15_RLC(GC, i,
+                                                regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+                                                max((adev->gmc.fb_end >> 18) + 0x1,
+                                                    adev->gmc.agp_end >> 18));
+                       else
+                               WREG32_SOC15_RLC(GC, i,
+                                       regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+                                       max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+                       /* Set default page address. */
+                       value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+                       WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+                                    (u32)(value >> 12));
+                       WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+                                    (u32)(value >> 44));
+
+                       /* Program "protection fault". */
+                       WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+                                    (u32)(adev->dummy_page_addr >> 12));
+                       WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+                                    (u32)((u64)adev->dummy_page_addr >> 44));
+
+                       tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL2);
+                       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+                                           ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+                       WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+               }
+
+               /* In the case squeezing vram into GART aperture, we don't use
+                * FB aperture and AGP aperture. Disable them.
+                */
+               if (adev->gmc.pdb0_bo) {
+                       WREG32_SOC15(GC, i, regMC_VM_FB_LOCATION_TOP, 0);
+                       WREG32_SOC15(GC, i, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+                       WREG32_SOC15(GC, i, regMC_VM_AGP_TOP, 0);
+                       WREG32_SOC15(GC, i, regMC_VM_AGP_BOT, 0xFFFFFF);
+                       WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+                       WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+               }
        }
 }
 
 static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev)
 {
        uint32_t tmp;
+       int i;
 
-       /* Setup TLB control */
-       tmp = RREG32_SOC15(GC, 0, regMC_VM_MX_L1_TLB_CNTL);
-
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
-                           ENABLE_ADVANCED_DRIVER_MODEL, 1);
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
-                           SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
-                           MTYPE, MTYPE_UC);/* XXX for emulation. */
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
-
-       WREG32_SOC15_RLC(GC, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               /* Setup TLB control */
+               tmp = RREG32_SOC15(GC, i, regMC_VM_MX_L1_TLB_CNTL);
+
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+                                   ENABLE_L1_TLB, 1);
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+                                   SYSTEM_ACCESS_MODE, 3);
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+                                   ENABLE_ADVANCED_DRIVER_MODEL, 1);
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+                                   SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+                                   MTYPE, MTYPE_UC);/* XXX for emulation. */
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+               WREG32_SOC15_RLC(GC, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+       }
 }
 
 static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev)
 {
        uint32_t tmp;
+       int i;
 
-       /* Setup L2 cache */
-       tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
-       /* XXX for emulation, Refer to closed source code.*/
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
-                           0);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
-       WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL, tmp);
-
-       tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL2);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
-       WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL2, tmp);
-
-       tmp = regVM_L2_CNTL3_DEFAULT;
-       if (adev->gmc.translate_further) {
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
-                                   L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
-       } else {
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
-                                   L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
-       }
-       WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL3, tmp);
-
-       tmp = regVM_L2_CNTL4_DEFAULT;
-       if (adev->gmc.xgmi.connected_to_cpu) {
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
-       } else {
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
-               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               /* Setup L2 cache */
+               tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+               /* XXX for emulation, Refer to closed source code.*/
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+                                   0);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+               WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL, tmp);
+
+               tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL2);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+               WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL2, tmp);
+
+               tmp = regVM_L2_CNTL3_DEFAULT;
+               if (adev->gmc.translate_further) {
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+                                           L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+               } else {
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+                                           L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+               }
+               WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL3, tmp);
+
+               tmp = regVM_L2_CNTL4_DEFAULT;
+               if (adev->gmc.xgmi.connected_to_cpu) {
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+               } else {
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+                       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+               }
+               WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL4, tmp);
        }
-       WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL4, tmp);
 }
 
 static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev)
 {
        uint32_t tmp;
+       int i;
 
-       tmp = RREG32_SOC15(GC, 0, regVM_CONTEXT0_CNTL);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
-                       adev->gmc.vmid0_page_table_depth);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
-                       adev->gmc.vmid0_page_table_block_size);
-       tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
-                           RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
-       WREG32_SOC15(GC, 0, regVM_CONTEXT0_CNTL, tmp);
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               tmp = RREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL);
+               tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+               tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+                               adev->gmc.vmid0_page_table_depth);
+               tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+                               adev->gmc.vmid0_page_table_block_size);
+               tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+                                   RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+               WREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL, tmp);
+       }
 }
 
 static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev)
 {
-       WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
-                    0XFFFFFFFF);
-       WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
-                    0x0000000F);
-
-       WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
-                    0);
-       WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
-                    0);
-
-       WREG32_SOC15(GC, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
-       WREG32_SOC15(GC, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+       int i;
 
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               WREG32_SOC15(GC, i,
+                            regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+                            0XFFFFFFFF);
+               WREG32_SOC15(GC, i,
+                            regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+                            0x0000000F);
+
+               WREG32_SOC15(GC, i,
+                            regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+                            0);
+               WREG32_SOC15(GC, i,
+                            regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+                            0);
+
+               WREG32_SOC15(GC, i,
+                            regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+               WREG32_SOC15(GC, i,
+                            regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+       }
 }
 
 static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev)
@@ -255,7 +294,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev)
        struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
        unsigned num_level, block_size;
        uint32_t tmp;
-       int i;
+       int i, j;
 
        num_level = adev->vm_manager.num_level;
        block_size = adev->vm_manager.block_size;
@@ -264,81 +303,89 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev)
        else
                block_size -= 9;
 
-       for (i = 0; i <= 14; i++) {
-               tmp = RREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT1_CNTL, i);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
-                                   num_level);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
-                                   1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   PAGE_TABLE_BLOCK_SIZE,
-                                   block_size);
-               /* Send no-retry XNACK on fault to suppress VM fault storm.
-                * On Aldebaran, XNACK can be enabled in the SQ per-process.
-                * Retry faults need to be enabled for that to work.
-                */
-               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-                                   RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-                                   !adev->gmc.noretry ||
-                                   adev->asic_type == CHIP_ALDEBARAN);
-               WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT1_CNTL,
-                                   i * hub->ctx_distance, tmp);
-               WREG32_SOC15_OFFSET(GC, 0,
-                                   regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
-                                   i * hub->ctx_addr_distance, 0);
-               WREG32_SOC15_OFFSET(GC, 0,
-                                   regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
-                                   i * hub->ctx_addr_distance, 0);
-               WREG32_SOC15_OFFSET(GC, 0,
-                                   regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
-                                   i * hub->ctx_addr_distance,
-                                   lower_32_bits(adev->vm_manager.max_pfn - 1));
-               WREG32_SOC15_OFFSET(GC, 0,
-                                   regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
-                                   i * hub->ctx_addr_distance,
-                                   upper_32_bits(adev->vm_manager.max_pfn - 1));
+       for (j = 0; j < adev->gfx.num_xcd; j++) {
+               for (i = 0; i <= 14; i++) {
+                       tmp = RREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, i);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+                                           num_level);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+                                           1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           PAGE_TABLE_BLOCK_SIZE,
+                                           block_size);
+                       /* Send no-retry XNACK on fault to suppress VM fault storm.
+                        * On Aldebaran, XNACK can be enabled in the SQ per-process.
+                        * Retry faults need to be enabled for that to work.
+                        */
+                       tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                           RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+                                           !adev->gmc.noretry ||
+                                           adev->asic_type == CHIP_ALDEBARAN);
+                       WREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL,
+                                           i * hub->ctx_distance, tmp);
+                       WREG32_SOC15_OFFSET(GC, j,
+                                           regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+                                           i * hub->ctx_addr_distance, 0);
+                       WREG32_SOC15_OFFSET(GC, j,
+                                           regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+                                           i * hub->ctx_addr_distance, 0);
+                       WREG32_SOC15_OFFSET(GC, j,
+                                           regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+                                           i * hub->ctx_addr_distance,
+                                           lower_32_bits(adev->vm_manager.max_pfn - 1));
+                       WREG32_SOC15_OFFSET(GC, j,
+                                           regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+                                           i * hub->ctx_addr_distance,
+                                           upper_32_bits(adev->vm_manager.max_pfn - 1));
+               }
        }
 }
 
 static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev)
 {
        struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
-       unsigned i;
-
-       for (i = 0 ; i < 18; ++i) {
-               WREG32_SOC15_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
-                                   i * hub->eng_addr_distance, 0xffffffff);
-               WREG32_SOC15_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
-                                   i * hub->eng_addr_distance, 0x1f);
+       unsigned i, j;
+
+       for (j = 0; j < adev->gfx.num_xcd; j++) {
+               for (i = 0 ; i < 18; ++i) {
+                       WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+                                           i * hub->eng_addr_distance, 0xffffffff);
+                       WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+                                           i * hub->eng_addr_distance, 0x1f);
+               }
        }
 }
 
 static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev)
 {
-       if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {
+       int i;
+
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               if (amdgpu_sriov_vf(adev)) {
                /*
                 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
                 * VF copy registers so vbios post doesn't program them, for
                 * SRIOV driver need to program them
                 */
-               WREG32_SOC15_RLC(GC, 0, regMC_VM_FB_LOCATION_BASE,
-                            adev->gmc.vram_start >> 24);
-               WREG32_SOC15_RLC(GC, 0, regMC_VM_FB_LOCATION_TOP,
-                            adev->gmc.vram_end >> 24);
+                       WREG32_SOC15_RLC(GC, i, regMC_VM_FB_LOCATION_BASE,
+                                    adev->gmc.vram_start >> 24);
+                       WREG32_SOC15_RLC(GC, i, regMC_VM_FB_LOCATION_TOP,
+                                    adev->gmc.vram_end >> 24);
+               }
        }
 
        /* GART Enable. */
@@ -361,27 +408,29 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
 {
        struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
        u32 tmp;
-       u32 i;
-
-       /* Disable all tables */
-       for (i = 0; i < 16; i++)
-               WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_CNTL,
-                                   i * hub->ctx_distance, 0);
-
-       /* Setup TLB control */
-       tmp = RREG32_SOC15(GC, 0, regMC_VM_MX_L1_TLB_CNTL);
-       tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
-       tmp = REG_SET_FIELD(tmp,
-                               MC_VM_MX_L1_TLB_CNTL,
-                               ENABLE_ADVANCED_DRIVER_MODEL,
-                               0);
-       WREG32_SOC15_RLC(GC, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
-
-       /* Setup L2 cache */
-       tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-       WREG32_SOC15(GC, 0, regVM_L2_CNTL, tmp);
-       WREG32_SOC15(GC, 0, regVM_L2_CNTL3, 0);
+       u32 i, j;
+
+       for (j = 0; j < adev->gfx.num_xcd; j++) {
+               /* Disable all tables */
+               for (i = 0; i < 16; i++)
+                       WREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT0_CNTL,
+                                           i * hub->ctx_distance, 0);
+
+               /* Setup TLB control */
+               tmp = RREG32_SOC15(GC, j, regMC_VM_MX_L1_TLB_CNTL);
+               tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+               tmp = REG_SET_FIELD(tmp,
+                                       MC_VM_MX_L1_TLB_CNTL,
+                                       ENABLE_ADVANCED_DRIVER_MODEL,
+                                       0);
+               WREG32_SOC15_RLC(GC, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+               /* Setup L2 cache */
+               tmp = RREG32_SOC15(GC, j, regVM_L2_CNTL);
+               tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+               WREG32_SOC15(GC, j, regVM_L2_CNTL, tmp);
+               WREG32_SOC15(GC, j, regVM_L2_CNTL3, 0);
+       }
 }
 
 /**
@@ -394,38 +443,42 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev,
                                                 bool value)
 {
        u32 tmp;
-       tmp = RREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp,
-                       VM_L2_PROTECTION_FAULT_CNTL,
-                       TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
-                       value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                       EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-       if (!value) {
+       int i;
+
+       for (i = 0; i < adev->gfx.num_xcd; i++) {
+               tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp,
+                               VM_L2_PROTECTION_FAULT_CNTL,
+                               TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+                               value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                               READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
                tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                               CRASH_ON_NO_RETRY_FAULT, 1);
+                               WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
                tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
-                               CRASH_ON_RETRY_FAULT, 1);
+                               EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+               if (!value) {
+                       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                                       CRASH_ON_NO_RETRY_FAULT, 1);
+                       tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+                                       CRASH_ON_RETRY_FAULT, 1);
+               }
+               WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
        }
-       WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
 }
 
 static void gfxhub_v1_2_init(struct amdgpu_device *adev)