drm/radeon: add support for MC/VM setup on CIK (v6)
authorAlex Deucher <alexander.deucher@amd.com>
Tue, 9 Apr 2013 16:45:26 +0000 (12:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 25 Jun 2013 21:50:25 +0000 (17:50 -0400)
The vm callbacks are the same as the SI ones right now
(same regs and bits). We could share the SI variants, and
I may yet do that, but I figured I would add CIK specific
ones for now in case we need to change anything.

V2: add documentation, minor fixes.
V3: integrate vram offset fixes for APUs
V4: enable 2 level VM PTs
V5: index SH_MEM_* regs properly
V6: add ib_parse()

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cikd.h
drivers/gpu/drm/radeon/si.c

index e448ae2..a4e1b95 100644 (file)
@@ -33,6 +33,7 @@
 
 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
+extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 
 /*
  * Core functions
@@ -1387,3 +1388,363 @@ int cik_asic_reset(struct radeon_device *rdev)
 
        return cik_gfx_gpu_soft_reset(rdev);
 }
+
+/* MC */
+/**
+ * cik_mc_program - program the GPU memory controller
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set the location of vram, gart, and AGP in the GPU's
+ * physical address space (CIK).
+ */
+static void cik_mc_program(struct radeon_device *rdev)
+{
+       struct evergreen_mc_save save;
+       u32 tmp;
+       int i, j;
+
+       /* Initialize HDP */
+       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+               WREG32((0x2c14 + j), 0x00000000);
+               WREG32((0x2c18 + j), 0x00000000);
+               WREG32((0x2c1c + j), 0x00000000);
+               WREG32((0x2c20 + j), 0x00000000);
+               WREG32((0x2c24 + j), 0x00000000);
+       }
+       WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+       evergreen_mc_stop(rdev, &save);
+       if (radeon_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+       }
+       /* Lockout access through VGA aperture*/
+       WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+       /* Update configuration */
+       WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+              rdev->mc.vram_start >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+              rdev->mc.vram_end >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+              rdev->vram_scratch.gpu_addr >> 12);
+       tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
+       tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+       WREG32(MC_VM_FB_LOCATION, tmp);
+       /* XXX double check these! */
+       WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+       WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+       WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+       WREG32(MC_VM_AGP_BASE, 0);
+       WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+       WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+       if (radeon_mc_wait_for_idle(rdev)) {
+               dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+       }
+       evergreen_mc_resume(rdev, &save);
+       /* we need to own VRAM, so turn off the VGA renderer here
+        * to stop it overwriting our objects */
+       rv515_vga_render_disable(rdev);
+}
+
+/**
+ * cik_mc_init - initialize the memory controller driver params
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space (CIK).
+ * Returns 0 for success.
+ */
+static int cik_mc_init(struct radeon_device *rdev)
+{
+       u32 tmp;
+       int chansize, numchan;
+
+       /* Get VRAM informations */
+       rdev->mc.vram_is_ddr = true;
+       tmp = RREG32(MC_ARB_RAMCFG);
+       if (tmp & CHANSIZE_MASK) {
+               chansize = 64;
+       } else {
+               chansize = 32;
+       }
+       tmp = RREG32(MC_SHARED_CHMAP);
+       switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+       case 0:
+       default:
+               numchan = 1;
+               break;
+       case 1:
+               numchan = 2;
+               break;
+       case 2:
+               numchan = 4;
+               break;
+       case 3:
+               numchan = 8;
+               break;
+       case 4:
+               numchan = 3;
+               break;
+       case 5:
+               numchan = 6;
+               break;
+       case 6:
+               numchan = 10;
+               break;
+       case 7:
+               numchan = 12;
+               break;
+       case 8:
+               numchan = 16;
+               break;
+       }
+       rdev->mc.vram_width = numchan * chansize;
+       /* Could aper size report 0 ? */
+       rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+       rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
+       /* size in MB on si */
+       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.visible_vram_size = rdev->mc.aper_size;
+       si_vram_gtt_location(rdev, &rdev->mc);
+       radeon_update_bandwidth_info(rdev);
+
+       return 0;
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the radeon vm/hsa code.
+ */
+/**
+ * cik_pcie_gart_tlb_flush - gart tlb flush callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Flush the TLB for the VMID 0 page table (CIK).
+ */
+void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
+{
+       /* flush hdp cache */
+       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+
+       /* bits 0-15 are the VM contexts0-15 */
+       WREG32(VM_INVALIDATE_REQUEST, 0x1);
+}
+
+/**
+ * cik_pcie_gart_enable - gart enable
+ *
+ * @rdev: radeon_device pointer
+ *
+ * This sets up the TLBs, programs the page tables for VMID0,
+ * sets up the hw for VMIDs 1-15 which are allocated on
+ * demand, and sets up the global locations for the LDS, GDS,
+ * and GPUVM for FSA64 clients (CIK).
+ * Returns 0 for success, errors for failure.
+ */
+static int cik_pcie_gart_enable(struct radeon_device *rdev)
+{
+       int r, i;
+
+       if (rdev->gart.robj == NULL) {
+               dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
+               return -EINVAL;
+       }
+       r = radeon_gart_table_vram_pin(rdev);
+       if (r)
+               return r;
+       radeon_gart_restore(rdev);
+       /* Setup TLB control */
+       WREG32(MC_VM_MX_L1_TLB_CNTL,
+              (0xA << 7) |
+              ENABLE_L1_TLB |
+              SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+              ENABLE_ADVANCED_DRIVER_MODEL |
+              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+              ENABLE_L2_FRAGMENT_PROCESSING |
+              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+              EFFECTIVE_L2_QUEUE_SIZE(7) |
+              CONTEXT1_IDENTITY_ACCESS_MODE(1));
+       WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
+       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+       /* setup context0 */
+       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+       WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+                       (u32)(rdev->dummy_page.addr >> 12));
+       WREG32(VM_CONTEXT0_CNTL2, 0);
+       WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+
+       WREG32(0x15D4, 0);
+       WREG32(0x15D8, 0);
+       WREG32(0x15DC, 0);
+
+       /* empty context1-15 */
+       /* FIXME start with 4G, once using 2 level pt switch to full
+        * vm size space
+        */
+       /* set vm size, must be a multiple of 4 */
+       WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
+       for (i = 1; i < 16; i++) {
+               if (i < 8)
+                       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
+                              rdev->gart.table_addr >> 12);
+               else
+                       WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
+                              rdev->gart.table_addr >> 12);
+       }
+
+       /* enable context1-15 */
+       WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+              (u32)(rdev->dummy_page.addr >> 12));
+       WREG32(VM_CONTEXT1_CNTL2, 0);
+       WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
+                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+
+       /* TC cache setup ??? */
+       WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
+       WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
+       WREG32(TC_CFG_L1_STORE_POLICY, 0);
+
+       WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
+       WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
+       WREG32(TC_CFG_L2_STORE_POLICY0, 0);
+       WREG32(TC_CFG_L2_STORE_POLICY1, 0);
+       WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
+
+       WREG32(TC_CFG_L1_VOLATILE, 0);
+       WREG32(TC_CFG_L2_VOLATILE, 0);
+
+       if (rdev->family == CHIP_KAVERI) {
+               u32 tmp = RREG32(CHUB_CONTROL);
+               tmp &= ~BYPASS_VM;
+               WREG32(CHUB_CONTROL, tmp);
+       }
+
+       /* XXX SH_MEM regs */
+       /* where to put LDS, scratch, GPUVM in FSA64 space */
+       for (i = 0; i < 16; i++) {
+               WREG32(SRBM_GFX_CNTL, VMID(i));
+               WREG32(SH_MEM_CONFIG, 0);
+               WREG32(SH_MEM_APE1_BASE, 1);
+               WREG32(SH_MEM_APE1_LIMIT, 0);
+               WREG32(SH_MEM_BASES, 0);
+       }
+       WREG32(SRBM_GFX_CNTL, 0);
+
+       cik_pcie_gart_tlb_flush(rdev);
+       DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+                (unsigned)(rdev->mc.gtt_size >> 20),
+                (unsigned long long)rdev->gart.table_addr);
+       rdev->gart.ready = true;
+       return 0;
+}
+
+/**
+ * cik_pcie_gart_disable - gart disable
+ *
+ * @rdev: radeon_device pointer
+ *
+ * This disables all VM page table (CIK).
+ */
+static void cik_pcie_gart_disable(struct radeon_device *rdev)
+{
+       /* Disable all tables */
+       WREG32(VM_CONTEXT0_CNTL, 0);
+       WREG32(VM_CONTEXT1_CNTL, 0);
+       /* Setup TLB control */
+       WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+              SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL,
+              ENABLE_L2_FRAGMENT_PROCESSING |
+              ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+              ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+              EFFECTIVE_L2_QUEUE_SIZE(7) |
+              CONTEXT1_IDENTITY_ACCESS_MODE(1));
+       WREG32(VM_L2_CNTL2, 0);
+       WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+              L2_CACHE_BIGK_FRAGMENT_SIZE(6));
+       radeon_gart_table_vram_unpin(rdev);
+}
+
+/**
+ * cik_pcie_gart_fini - vm fini callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void cik_pcie_gart_fini(struct radeon_device *rdev)
+{
+       cik_pcie_gart_disable(rdev);
+       radeon_gart_table_vram_free(rdev);
+       radeon_gart_fini(rdev);
+}
+
+/* vm parser */
+/**
+ * cik_ib_parse - vm ib_parse callback
+ *
+ * @rdev: radeon_device pointer
+ * @ib: indirect buffer pointer
+ *
+ * CIK uses hw IB checking so this is a nop (CIK).
+ */
+int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       return 0;
+}
+
+/*
+ * vm
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the radeon vm/hsa code.
+ */
+/**
+ * cik_vm_init - cik vm init callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Inits cik specific vm parameters (number of VMs, base of vram for
+ * VMIDs 1-15) (CIK).
+ * Returns 0 for success.
+ */
+int cik_vm_init(struct radeon_device *rdev)
+{
+       /* number of VMs */
+       rdev->vm_manager.nvm = 16;
+       /* base offset of vram pages */
+       if (rdev->flags & RADEON_IS_IGP) {
+               u64 tmp = RREG32(MC_VM_FB_OFFSET);
+               tmp <<= 22;
+               rdev->vm_manager.vram_base_offset = tmp;
+       } else
+               rdev->vm_manager.vram_base_offset = 0;
+
+       return 0;
+}
+
+/**
+ * cik_vm_fini - cik vm fini callback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down any asic specific VM setup (CIK).
+ */
+void cik_vm_fini(struct radeon_device *rdev)
+{
+}
+
index 41b2316..071a781 100644 (file)
 
 #define CIK_RB_BITMAP_WIDTH_PER_SH  2
 
+#define VGA_HDP_CONTROL                                0x328
+#define                VGA_MEMORY_DISABLE                              (1 << 4)
+
 #define DMIF_ADDR_CALC                                 0xC00
 
+#define        SRBM_GFX_CNTL                                   0xE44
+#define                PIPEID(x)                                       ((x) << 0)
+#define                MEID(x)                                         ((x) << 2)
+#define                VMID(x)                                         ((x) << 4)
+#define                QUEUEID(x)                                      ((x) << 8)
+
 #define        SRBM_STATUS2                                    0xE4C
 #define        SRBM_STATUS                                     0xE50
 
+#define VM_L2_CNTL                                     0x1400
+#define                ENABLE_L2_CACHE                                 (1 << 0)
+#define                ENABLE_L2_FRAGMENT_PROCESSING                   (1 << 1)
+#define                L2_CACHE_PTE_ENDIAN_SWAP_MODE(x)                ((x) << 2)
+#define                L2_CACHE_PDE_ENDIAN_SWAP_MODE(x)                ((x) << 4)
+#define                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE         (1 << 9)
+#define                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE        (1 << 10)
+#define                EFFECTIVE_L2_QUEUE_SIZE(x)                      (((x) & 7) << 15)
+#define                CONTEXT1_IDENTITY_ACCESS_MODE(x)                (((x) & 3) << 19)
+#define VM_L2_CNTL2                                    0x1404
+#define                INVALIDATE_ALL_L1_TLBS                          (1 << 0)
+#define                INVALIDATE_L2_CACHE                             (1 << 1)
+#define                INVALIDATE_CACHE_MODE(x)                        ((x) << 26)
+#define                        INVALIDATE_PTE_AND_PDE_CACHES           0
+#define                        INVALIDATE_ONLY_PTE_CACHES              1
+#define                        INVALIDATE_ONLY_PDE_CACHES              2
+#define VM_L2_CNTL3                                    0x1408
+#define                BANK_SELECT(x)                                  ((x) << 0)
+#define                L2_CACHE_UPDATE_MODE(x)                         ((x) << 6)
+#define                L2_CACHE_BIGK_FRAGMENT_SIZE(x)                  ((x) << 15)
+#define                L2_CACHE_BIGK_ASSOCIATIVITY                     (1 << 20)
+#define        VM_L2_STATUS                                    0x140C
+#define                L2_BUSY                                         (1 << 0)
+#define VM_CONTEXT0_CNTL                               0x1410
+#define                ENABLE_CONTEXT                                  (1 << 0)
+#define                PAGE_TABLE_DEPTH(x)                             (((x) & 3) << 1)
+#define                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT           (1 << 4)
+#define VM_CONTEXT1_CNTL                               0x1414
+#define VM_CONTEXT0_CNTL2                              0x1430
+#define VM_CONTEXT1_CNTL2                              0x1434
+#define        VM_CONTEXT8_PAGE_TABLE_BASE_ADDR                0x1438
+#define        VM_CONTEXT9_PAGE_TABLE_BASE_ADDR                0x143c
+#define        VM_CONTEXT10_PAGE_TABLE_BASE_ADDR               0x1440
+#define        VM_CONTEXT11_PAGE_TABLE_BASE_ADDR               0x1444
+#define        VM_CONTEXT12_PAGE_TABLE_BASE_ADDR               0x1448
+#define        VM_CONTEXT13_PAGE_TABLE_BASE_ADDR               0x144c
+#define        VM_CONTEXT14_PAGE_TABLE_BASE_ADDR               0x1450
+#define        VM_CONTEXT15_PAGE_TABLE_BASE_ADDR               0x1454
+
+#define VM_INVALIDATE_REQUEST                          0x1478
+#define VM_INVALIDATE_RESPONSE                         0x147c
+
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR      0x1518
+#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR      0x151c
+
+#define        VM_CONTEXT0_PAGE_TABLE_BASE_ADDR                0x153c
+#define        VM_CONTEXT1_PAGE_TABLE_BASE_ADDR                0x1540
+#define        VM_CONTEXT2_PAGE_TABLE_BASE_ADDR                0x1544
+#define        VM_CONTEXT3_PAGE_TABLE_BASE_ADDR                0x1548
+#define        VM_CONTEXT4_PAGE_TABLE_BASE_ADDR                0x154c
+#define        VM_CONTEXT5_PAGE_TABLE_BASE_ADDR                0x1550
+#define        VM_CONTEXT6_PAGE_TABLE_BASE_ADDR                0x1554
+#define        VM_CONTEXT7_PAGE_TABLE_BASE_ADDR                0x1558
+#define        VM_CONTEXT0_PAGE_TABLE_START_ADDR               0x155c
+#define        VM_CONTEXT1_PAGE_TABLE_START_ADDR               0x1560
+
+#define        VM_CONTEXT0_PAGE_TABLE_END_ADDR                 0x157C
+#define        VM_CONTEXT1_PAGE_TABLE_END_ADDR                 0x1580
+
 #define MC_SHARED_CHMAP                                                0x2004
 #define                NOOFCHAN_SHIFT                                  12
 #define                NOOFCHAN_MASK                                   0x0000f000
 #define MC_SHARED_CHREMAP                                      0x2008
 
+#define CHUB_CONTROL                                   0x1864
+#define                BYPASS_VM                                       (1 << 0)
+
+#define        MC_VM_FB_LOCATION                               0x2024
+#define        MC_VM_AGP_TOP                                   0x2028
+#define        MC_VM_AGP_BOT                                   0x202C
+#define        MC_VM_AGP_BASE                                  0x2030
+#define        MC_VM_SYSTEM_APERTURE_LOW_ADDR                  0x2034
+#define        MC_VM_SYSTEM_APERTURE_HIGH_ADDR                 0x2038
+#define        MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR              0x203C
+
+#define        MC_VM_MX_L1_TLB_CNTL                            0x2064
+#define                ENABLE_L1_TLB                                   (1 << 0)
+#define                ENABLE_L1_FRAGMENT_PROCESSING                   (1 << 1)
+#define                SYSTEM_ACCESS_MODE_PA_ONLY                      (0 << 3)
+#define                SYSTEM_ACCESS_MODE_USE_SYS_MAP                  (1 << 3)
+#define                SYSTEM_ACCESS_MODE_IN_SYS                       (2 << 3)
+#define                SYSTEM_ACCESS_MODE_NOT_IN_SYS                   (3 << 3)
+#define                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU       (0 << 5)
+#define                ENABLE_ADVANCED_DRIVER_MODEL                    (1 << 6)
+#define        MC_VM_FB_OFFSET                                 0x2068
+
 #define        MC_ARB_RAMCFG                                   0x2760
 #define                NOOFBANK_SHIFT                                  0
 #define                NOOFBANK_MASK                                   0x00000003
 #define HDP_MISC_CNTL                                  0x2F4C
 #define        HDP_FLUSH_INVALIDATE_CACHE                      (1 << 0)
 
+#define        CONFIG_MEMSIZE                                  0x5428
+
+#define HDP_MEM_COHERENCY_FLUSH_CNTL                   0x5480
+
 #define        BIF_FB_EN                                               0x5490
 #define                FB_READ_EN                                      (1 << 0)
 #define                FB_WRITE_EN                                     (1 << 1)
 
+#define HDP_REG_COHERENCY_FLUSH_CNTL                   0x54A0
+
 #define        GRBM_CNTL                                       0x8000
 #define                GRBM_READ_TIMEOUT(x)                            ((x) << 0)
 
 
 #define        SQ_CONFIG                                       0x8C00
 
+#define        SH_MEM_BASES                                    0x8C28
+/* if PTR32, these are the bases for scratch and lds */
+#define                PRIVATE_BASE(x)                                 ((x) << 0) /* scratch */
+#define                SHARED_BASE(x)                                  ((x) << 16) /* LDS */
+#define        SH_MEM_APE1_BASE                                0x8C2C
+/* if PTR32, this is the base location of GPUVM */
+#define        SH_MEM_APE1_LIMIT                               0x8C30
+/* if PTR32, this is the upper limit of GPUVM */
+#define        SH_MEM_CONFIG                                   0x8C34
+#define                PTR32                                           (1 << 0)
+#define                ALIGNMENT_MODE(x)                               ((x) << 2)
+#define                        SH_MEM_ALIGNMENT_MODE_DWORD                     0
+#define                        SH_MEM_ALIGNMENT_MODE_DWORD_STRICT              1
+#define                        SH_MEM_ALIGNMENT_MODE_STRICT                    2
+#define                        SH_MEM_ALIGNMENT_MODE_UNALIGNED                 3
+#define                DEFAULT_MTYPE(x)                                ((x) << 4)
+#define                APE1_MTYPE(x)                                   ((x) << 7)
+
 #define        SX_DEBUG_1                                      0x9060
 
 #define        SPI_CONFIG_CNTL                                 0x9100
 #define        TCP_CHAN_STEER_LO                               0xac0c
 #define        TCP_CHAN_STEER_HI                               0xac10
 
+#define        TC_CFG_L1_LOAD_POLICY0                          0xAC68
+#define        TC_CFG_L1_LOAD_POLICY1                          0xAC6C
+#define        TC_CFG_L1_STORE_POLICY                          0xAC70
+#define        TC_CFG_L2_LOAD_POLICY0                          0xAC74
+#define        TC_CFG_L2_LOAD_POLICY1                          0xAC78
+#define        TC_CFG_L2_STORE_POLICY0                         0xAC7C
+#define        TC_CFG_L2_STORE_POLICY1                         0xAC80
+#define        TC_CFG_L2_ATOMIC_POLICY                         0xAC84
+#define        TC_CFG_L1_VOLATILE                              0xAC88
+#define        TC_CFG_L2_VOLATILE                              0xAC8C
+
 #define PA_SC_RASTER_CONFIG                             0x28350
 #       define RASTER_CONFIG_RB_MAP_0                   0
 #       define RASTER_CONFIG_RB_MAP_1                   1
index a1b0da6..813a8a9 100644 (file)
@@ -3535,8 +3535,8 @@ static void si_mc_program(struct radeon_device *rdev)
        }
 }
 
-static void si_vram_gtt_location(struct radeon_device *rdev,
-                                struct radeon_mc *mc)
+void si_vram_gtt_location(struct radeon_device *rdev,
+                         struct radeon_mc *mc)
 {
        if (mc->mc_vram_size > 0xFFC0000000ULL) {
                /* leave room for at least 1024M GTT */