drm/radeon/kms: Add initial support for async DMA on cayman/TN
authorAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Dec 2012 20:27:33 +0000 (15:27 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 10 Dec 2012 21:53:34 +0000 (16:53 -0500)
There are 2 async DMA engines on cayman, one at 0xd000 and
one at 0xd800.  The programming interface is the same as
evergreen however there are some changes to the commands
for using vmids.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/nid.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h

index 68206df..c66251e 100644 (file)
@@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
                                         CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
                cayman_cp_int_cntl_setup(rdev, 1, 0);
                cayman_cp_int_cntl_setup(rdev, 2, 0);
+               tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+               WREG32(CAYMAN_DMA1_CNTL, tmp);
        } else
                WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
        tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
@@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
        u32 grbm_int_cntl = 0;
        u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
        u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
-       u32 dma_cntl;
+       u32 dma_cntl, dma_cntl1 = 0;
 
        if (!rdev->irq.installed) {
                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev)
                dma_cntl |= TRAP_ENABLE;
        }
 
+       if (rdev->family >= CHIP_CAYMAN) {
+               dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+               if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+                       DRM_DEBUG("r600_irq_set: sw int dma1\n");
+                       dma_cntl1 |= TRAP_ENABLE;
+               }
+       }
+
        if (rdev->irq.crtc_vblank_int[0] ||
            atomic_read(&rdev->irq.pflip[0])) {
                DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev)
 
        WREG32(DMA_CNTL, dma_cntl);
 
+       if (rdev->family >= CHIP_CAYMAN)
+               WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
+
        WREG32(GRBM_INT_CNTL, grbm_int_cntl);
 
        WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3147,6 +3160,12 @@ restart_ih:
                case 233: /* GUI IDLE */
                        DRM_DEBUG("IH: GUI idle\n");
                        break;
+               case 244: /* DMA trap event */
+                       if (rdev->family >= CHIP_CAYMAN) {
+                               DRM_DEBUG("IH: DMA1 trap\n");
+                               radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+                       }
+                       break;
                default:
                        DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
                        break;
index 92d1f45..7b4a650 100644 (file)
 #       define CTXEMPTY_INT_ENABLE                        (1 << 28)
 #define DMA_TILING_CONFIG                                0xD0B8
 
+#define CAYMAN_DMA1_CNTL                                  0xd82c
+
 /* async DMA packets */
 #define DMA_PACKET(cmd, t, s, n)       ((((cmd) & 0xF) << 28) |        \
                                         (((t) & 0x1) << 23) |          \
index 30c18a6..b81aca4 100644 (file)
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
        WREG32(GB_ADDR_CONFIG, gb_addr_config);
        WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
        WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+       WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+       WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
 
        tmp = gb_addr_config & NUM_PIPES_MASK;
        tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
                radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
                WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
                WREG32(SCRATCH_UMSK, 0);
+               rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
        }
 }
 
@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev)
        return 0;
 }
 
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine.  The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things.  It also
+ * has support for tiling/detiling of buffers.
+ * Cayman and newer support two asynchronous DMA engines.
+ */
+/**
+ * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (cayman-SI).
+ */
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+                               struct radeon_ib *ib)
+{
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+       if (rdev->wb.enabled) {
+               u32 next_rptr = ring->wptr + 4;
+               while ((next_rptr & 7) != 5)
+                       next_rptr++;
+               next_rptr += 3;
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+               radeon_ring_write(ring, next_rptr);
+       }
+
+       /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+        * Pad as necessary with NOPs.
+        */
+       while ((ring->wptr & 7) != 5)
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+       radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+       radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * cayman_dma_stop - stop the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines (cayman-SI).
+ */
+void cayman_dma_stop(struct radeon_device *rdev)
+{
+       u32 rb_cntl;
+
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+       /* dma0 */
+       rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+
+       /* dma1 */
+       rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+       rb_cntl &= ~DMA_RB_ENABLE;
+       WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+
+       rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+       rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+}
+
+/**
+ * cayman_dma_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffers and enable them. (cayman-SI).
+ * Returns 0 for success, error for failure.
+ */
+int cayman_dma_resume(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring;
+       u32 rb_cntl, dma_cntl;
+       u32 rb_bufsz;
+       u32 reg_offset, wb_offset;
+       int i, r;
+
+       /* Reset dma */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+       RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+
+       for (i = 0; i < 2; i++) {
+               if (i == 0) {
+                       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+                       reg_offset = DMA0_REGISTER_OFFSET;
+                       wb_offset = R600_WB_DMA_RPTR_OFFSET;
+               } else {
+                       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+                       reg_offset = DMA1_REGISTER_OFFSET;
+                       wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+               }
+
+               WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+               WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+
+               /* Set ring buffer size in dwords */
+               rb_bufsz = drm_order(ring->ring_size / 4);
+               rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+               rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+
+               /* Initialize the ring buffer's read and write pointers */
+               WREG32(DMA_RB_RPTR + reg_offset, 0);
+               WREG32(DMA_RB_WPTR + reg_offset, 0);
+
+               /* set the wb address whether it's enabled or not */
+               WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+                      upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+               WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+                      ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+
+               if (rdev->wb.enabled)
+                       rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+               WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+
+               /* enable DMA IBs */
+               WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
+
+               dma_cntl = RREG32(DMA_CNTL + reg_offset);
+               dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+               WREG32(DMA_CNTL + reg_offset, dma_cntl);
+
+               ring->wptr = 0;
+               WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+
+               ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+
+               WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+
+               ring->ready = true;
+
+               r = radeon_ring_test(rdev, ring->idx, ring);
+               if (r) {
+                       ring->ready = false;
+                       return r;
+               }
+       }
+
+       radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+       return 0;
+}
+
+/**
+ * cayman_dma_fini - tear down the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines and free the rings (cayman-SI).
+ */
+void cayman_dma_fini(struct radeon_device *rdev)
+{
+       cayman_dma_stop(rdev);
+       radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+}
+
 static int cayman_gpu_soft_reset(struct radeon_device *rdev)
 {
        struct evergreen_mc_save save;
@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev)
        return cayman_gpu_soft_reset(rdev);
 }
 
+/**
+ * cayman_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (cayman-SI).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+       u32 dma_status_reg;
+
+       if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+               dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
+       else
+               dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
+       if (dma_status_reg & DMA_IDLE) {
+               radeon_ring_lockup_update(ring);
+               return false;
+       }
+       /* force ring activities */
+       radeon_ring_force_activity(rdev, ring);
+       return radeon_ring_test_lockup(rdev, ring);
+}
+
 static int cayman_startup(struct radeon_device *rdev)
 {
        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev)
                return r;
        }
 
+       r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+       if (r) {
+               dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+               return r;
+       }
+
        /* Enable IRQ */
        r = r600_irq_init(rdev);
        if (r) {
@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev)
                             0, 0xfffff, RADEON_CP_PACKET2);
        if (r)
                return r;
+
+       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+                            DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+                            DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       if (r)
+               return r;
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+       r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+                            DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+                            DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+       if (r)
+               return r;
+
        r = cayman_cp_load_microcode(rdev);
        if (r)
                return r;
@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev)
        if (r)
                return r;
 
+       r = cayman_dma_resume(rdev);
+       if (r)
+               return r;
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev)
 {
        r600_audio_fini(rdev);
        cayman_cp_enable(rdev, false);
-       rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+       cayman_dma_stop(rdev);
        evergreen_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        cayman_pcie_gart_disable(rdev);
@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev)
        ring->ring_obj = NULL;
        r600_ring_init(rdev, ring, 1024 * 1024);
 
+       ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 64 * 1024);
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+       ring->ring_obj = NULL;
+       r600_ring_init(rdev, ring, 64 * 1024);
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev)
        if (r) {
                dev_err(rdev->dev, "disabling GPU acceleration\n");
                cayman_cp_fini(rdev);
+               cayman_dma_fini(rdev);
                r600_irq_fini(rdev);
                if (rdev->flags & RADEON_IS_IGP)
                        si_rlc_fini(rdev);
@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev)
 {
        r600_blit_fini(rdev);
        cayman_cp_fini(rdev);
+       cayman_dma_fini(rdev);
        r600_irq_fini(rdev);
        if (rdev->flags & RADEON_IS_IGP)
                si_rlc_fini(rdev);
@@ -1606,3 +1853,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
        radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
        radeon_ring_write(ring, 0x0);
 }
+
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+       struct radeon_ring *ring = &rdev->ring[ridx];
+
+       if (vm == NULL)
+               return;
+
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+       radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+       /* flush hdp cache */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+       radeon_ring_write(ring, 1);
+
+       /* bits 0-7 are the VM contexts0-7 */
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+       radeon_ring_write(ring, 1 << vm->id);
+}
+
index f5e54a7..b93186b 100644 (file)
 #define                VMID(x)                                         (((x) & 0x7) << 0)
 #define        SRBM_STATUS                                     0x0E50
 
+#define        SRBM_SOFT_RESET                                 0x0E60
+#define                SOFT_RESET_BIF                          (1 << 1)
+#define                SOFT_RESET_CG                           (1 << 2)
+#define                SOFT_RESET_DC                           (1 << 5)
+#define                SOFT_RESET_DMA1                         (1 << 6)
+#define                SOFT_RESET_GRBM                         (1 << 8)
+#define                SOFT_RESET_HDP                          (1 << 9)
+#define                SOFT_RESET_IH                           (1 << 10)
+#define                SOFT_RESET_MC                           (1 << 11)
+#define                SOFT_RESET_RLC                          (1 << 13)
+#define                SOFT_RESET_ROM                          (1 << 14)
+#define                SOFT_RESET_SEM                          (1 << 15)
+#define                SOFT_RESET_VMC                          (1 << 17)
+#define                SOFT_RESET_DMA                          (1 << 20)
+#define                SOFT_RESET_TST                          (1 << 21)
+#define                SOFT_RESET_REGBB                        (1 << 22)
+#define                SOFT_RESET_ORB                          (1 << 23)
+
 #define VM_CONTEXT0_REQUEST_RESPONSE                   0x1470
 #define                REQUEST_TYPE(x)                                 (((x) & 0xf) << 0)
 #define                RESPONSE_TYPE_MASK                              0x000000F0
 #define        PACKET3_SET_APPEND_CNT                          0x75
 #define        PACKET3_ME_WRITE                                0x7A
 
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET                              0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET                              0x800 /* not a register */
+
+#define DMA_RB_CNTL                                       0xd000
+#       define DMA_RB_ENABLE                              (1 << 0)
+#       define DMA_RB_SIZE(x)                             ((x) << 1) /* log2 */
+#       define DMA_RB_SWAP_ENABLE                         (1 << 9) /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_ENABLE                  (1 << 12)
+#       define DMA_RPTR_WRITEBACK_SWAP_ENABLE             (1 << 13)  /* 8IN32 */
+#       define DMA_RPTR_WRITEBACK_TIMER(x)                ((x) << 16) /* log2 */
+#define DMA_RB_BASE                                       0xd004
+#define DMA_RB_RPTR                                       0xd008
+#define DMA_RB_WPTR                                       0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI                               0xd01c
+#define DMA_RB_RPTR_ADDR_LO                               0xd020
+
+#define DMA_IB_CNTL                                       0xd024
+#       define DMA_IB_ENABLE                              (1 << 0)
+#       define DMA_IB_SWAP_ENABLE                         (1 << 4)
+#       define CMD_VMID_FORCE                             (1 << 31)
+#define DMA_IB_RPTR                                       0xd028
+#define DMA_CNTL                                          0xd02c
+#       define TRAP_ENABLE                                (1 << 0)
+#       define SEM_INCOMPLETE_INT_ENABLE                  (1 << 1)
+#       define SEM_WAIT_INT_ENABLE                        (1 << 2)
+#       define DATA_SWAP_ENABLE                           (1 << 3)
+#       define FENCE_SWAP_ENABLE                          (1 << 4)
+#       define CTXEMPTY_INT_ENABLE                        (1 << 28)
+#define DMA_STATUS_REG                                    0xd034
+#       define DMA_IDLE                                   (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL                     0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL                      0xd048
+#define DMA_TILING_CONFIG                                0xd0b8
+#define DMA_MODE                                          0xd0bc
+
+#define DMA_PACKET(cmd, t, s, n)       ((((cmd) & 0xF) << 28) |        \
+                                        (((t) & 0x1) << 23) |          \
+                                        (((s) & 0x1) << 22) |          \
+                                        (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n)    ((((cmd) & 0xF) << 28) |        \
+                                        (((vmid) & 0xF) << 20) |       \
+                                        (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define        DMA_PACKET_WRITE                                  0x2
+#define        DMA_PACKET_COPY                                   0x3
+#define        DMA_PACKET_INDIRECT_BUFFER                        0x4
+#define        DMA_PACKET_SEMAPHORE                              0x5
+#define        DMA_PACKET_FENCE                                  0x6
+#define        DMA_PACKET_TRAP                                   0x7
+#define        DMA_PACKET_SRBM_WRITE                             0x9
+#define        DMA_PACKET_CONSTANT_FILL                          0xd
+#define        DMA_PACKET_NOP                                    0xf
+
 #endif
 
index 461bf53..38b6fa3 100644 (file)
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
 #define RADEON_BIOS_NUM_SCRATCH                        8
 
 /* max number of rings */
-#define RADEON_NUM_RINGS                       4
+#define RADEON_NUM_RINGS                       5
 
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ              0LL
@@ -124,6 +124,8 @@ extern int radeon_lockup_timeout;
 
 /* R600+ has an async dma ring */
 #define R600_RING_TYPE_DMA_INDEX               3
+/* cayman add a second async dma ring */
+#define CAYMAN_RING_TYPE_DMA1_INDEX            4
 
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET                    (1 << 20)
@@ -893,6 +895,7 @@ struct radeon_wb {
 #define RADEON_WB_CP2_RPTR_OFFSET 1536
 #define R600_WB_DMA_RPTR_OFFSET   1792
 #define R600_WB_IH_WPTR_OFFSET   2048
+#define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
 #define R600_WB_EVENT_OFFSET     3072
 
 /**
index 1dd8d92..8cf8ae8 100644 (file)
@@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = {
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
                        .vm_flush = &cayman_vm_flush,
+               },
+               [R600_RING_TYPE_DMA_INDEX] = {
+                       .ib_execute = &cayman_dma_ring_ib_execute,
+                       .emit_fence = &evergreen_dma_fence_ring_emit,
+                       .emit_semaphore = &r600_dma_semaphore_ring_emit,
+                       .cs_parse = NULL,
+                       .ring_test = &r600_dma_ring_test,
+                       .ib_test = &r600_dma_ib_test,
+                       .is_lockup = &cayman_dma_is_lockup,
+                       .vm_flush = &cayman_dma_vm_flush,
+               },
+               [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+                       .ib_execute = &cayman_dma_ring_ib_execute,
+                       .emit_fence = &evergreen_dma_fence_ring_emit,
+                       .emit_semaphore = &r600_dma_semaphore_ring_emit,
+                       .cs_parse = NULL,
+                       .ring_test = &r600_dma_ring_test,
+                       .ib_test = &r600_dma_ib_test,
+                       .is_lockup = &cayman_dma_is_lockup,
+                       .vm_flush = &cayman_dma_vm_flush,
                }
        },
        .irq = {
@@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = {
        .copy = {
                .blit = &r600_copy_blit,
                .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-               .dma = NULL,
-               .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+               .dma = &evergreen_copy_dma,
+               .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
                .copy = &r600_copy_blit,
                .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
        },
@@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = {
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
                        .vm_flush = &cayman_vm_flush,
+               },
+               [R600_RING_TYPE_DMA_INDEX] = {
+                       .ib_execute = &cayman_dma_ring_ib_execute,
+                       .emit_fence = &evergreen_dma_fence_ring_emit,
+                       .emit_semaphore = &r600_dma_semaphore_ring_emit,
+                       .cs_parse = NULL,
+                       .ring_test = &r600_dma_ring_test,
+                       .ib_test = &r600_dma_ib_test,
+                       .is_lockup = &cayman_dma_is_lockup,
+                       .vm_flush = &cayman_dma_vm_flush,
+               },
+               [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+                       .ib_execute = &cayman_dma_ring_ib_execute,
+                       .emit_fence = &evergreen_dma_fence_ring_emit,
+                       .emit_semaphore = &r600_dma_semaphore_ring_emit,
+                       .cs_parse = NULL,
+                       .ring_test = &r600_dma_ring_test,
+                       .ib_test = &r600_dma_ib_test,
+                       .is_lockup = &cayman_dma_is_lockup,
+                       .vm_flush = &cayman_dma_vm_flush,
                }
        },
        .irq = {
@@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = {
        .copy = {
                .blit = &r600_copy_blit,
                .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
-               .dma = NULL,
-               .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+               .dma = &evergreen_copy_dma,
+               .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
                .copy = &r600_copy_blit,
                .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
        },
index 7a2705d..c2988f7 100644 (file)
@@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                        uint64_t addr, unsigned count,
                        uint32_t incr, uint32_t flags);
 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+                               struct radeon_ib *ib);
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 
 /* DCE6 - SI */
 void dce6_bandwidth_update(struct radeon_device *rdev);