radeon/kms: cleanup async dma packet checking
authorJerome Glisse <jglisse@redhat.com>
Mon, 14 Jan 2013 16:32:27 +0000 (11:32 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 31 Jan 2013 21:24:47 +0000 (16:24 -0500)
This simplify and cleanup the async dma checking.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreen_cs.c
drivers/gpu/drm/radeon/evergreend.h

index a2d478e..a1e12bf 100644 (file)
@@ -3280,14 +3280,14 @@ void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
        struct radeon_ring *ring = &rdev->ring[fence->ring];
        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
        /* write the fence */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
        radeon_ring_write(ring, addr & 0xfffffffc);
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
        radeon_ring_write(ring, fence->seq);
        /* generate an interrupt */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
        /* flush HDP */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
        radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
        radeon_ring_write(ring, 1);
 }
@@ -3310,7 +3310,7 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
                while ((next_rptr & 7) != 5)
                        next_rptr++;
                next_rptr += 3;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
                radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
                radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
                radeon_ring_write(ring, next_rptr);
@@ -3320,8 +3320,8 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
         * Pad as necessary with NOPs.
         */
        while ((ring->wptr & 7) != 5)
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
        radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
        radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
 
@@ -3380,7 +3380,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
                if (cur_size_in_dw > 0xFFFFF)
                        cur_size_in_dw = 0xFFFFF;
                size_in_dw -= cur_size_in_dw;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
                radeon_ring_write(ring, dst_offset & 0xfffffffc);
                radeon_ring_write(ring, src_offset & 0xfffffffc);
                radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
@@ -3488,7 +3488,7 @@ static int evergreen_startup(struct radeon_device *rdev)
        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
                             DMA_RB_RPTR, DMA_RB_WPTR,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
        if (r)
                return r;
 
index 4a9760a..d8f5d5f 100644 (file)
@@ -2670,16 +2670,6 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
        return 0;
 }
 
-/*
- *  DMA
- */
-
-#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
-#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
-#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
-#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
-#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
-
 /**
  * evergreen_dma_cs_parse() - parse the DMA IB
  * @p:         parser structure holding parsing context.
@@ -2693,9 +2683,9 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 {
        struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
        struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
-       u32 header, cmd, count, tiled, new_cmd, misc;
+       u32 header, cmd, count, sub_cmd;
        volatile u32 *ib = p->ib.ptr;
-       u32 idx, idx_value;
+       u32 idx;
        u64 src_offset, dst_offset, dst2_offset;
        int r;
 
@@ -2709,9 +2699,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
                header = radeon_get_ib_value(p, idx);
                cmd = GET_DMA_CMD(header);
                count = GET_DMA_COUNT(header);
-               tiled = GET_DMA_T(header);
-               new_cmd = GET_DMA_NEW(header);
-               misc = GET_DMA_MISC(header);
+               sub_cmd = GET_DMA_SUB_CMD(header);
 
                switch (cmd) {
                case DMA_PACKET_WRITE:
@@ -2720,19 +2708,27 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
                                DRM_ERROR("bad DMA_PACKET_WRITE\n");
                                return -EINVAL;
                        }
-                       if (tiled) {
+                       switch (sub_cmd) {
+                       /* tiled */
+                       case 8:
                                dst_offset = ib[idx+1];
                                dst_offset <<= 8;
 
                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
                                p->idx += count + 7;
-                       } else {
+                               break;
+                       /* linear */
+                       case 0:
                                dst_offset = ib[idx+1];
                                dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
 
                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
                                ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
                                p->idx += count + 3;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib[idx+0]);
+                               return -EINVAL;
                        }
                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
                                dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
@@ -2751,338 +2747,330 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
                                DRM_ERROR("bad DMA_PACKET_COPY\n");
                                return -EINVAL;
                        }
-                       if (tiled) {
-                               idx_value = radeon_get_ib_value(p, idx + 2);
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2T, frame to fields */
-                                               if (idx_value & (1 << 31)) {
-                                                       DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset <<= 8;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset <<= 8;
-                                               src_offset = ib[idx+8];
-                                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
-                                                                src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
-                                                                dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
-                                                                dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 10;
-                                               break;
-                                       case 1:
-                                               /* L2T, T2L partial */
-                                               if (p->family < CHIP_CAYMAN) {
-                                                       DRM_ERROR("L2T, T2L Partial is cayman only !\n");
-                                                       return -EINVAL;
-                                               }
-                                               /* detile bit */
-                                               if (idx_value & (1 << 31)) {
-                                                       /* tiled src, linear dst */
-                                                       ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
-
-                                                       ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               } else {
-                                                       /* linear src, tiled dst */
-                                                       ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-
-                                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               }
-                                               p->idx += 12;
-                                               break;
-                                       case 3:
-                                               /* L2T, broadcast */
-                                               if (idx_value & (1 << 31)) {
-                                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset <<= 8;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset <<= 8;
-                                               src_offset = ib[idx+8];
-                                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
-                                                                dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 10;
-                                               break;
-                                       case 4:
-                                               /* L2T, T2L */
-                                               /* detile bit */
-                                               if (idx_value & (1 << 31)) {
-                                                       /* tiled src, linear dst */
-                                                       src_offset = ib[idx+1];
-                                                       src_offset <<= 8;
-                                                       ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
-
-                                                       dst_offset = ib[idx+7];
-                                                       dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               } else {
-                                                       /* linear src, tiled dst */
-                                                       src_offset = ib[idx+7];
-                                                       src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-
-                                                       dst_offset = ib[idx+1];
-                                                       dst_offset <<= 8;
-                                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               }
-                                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
-                                                                src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               p->idx += 9;
-                                               break;
-                                       case 5:
-                                               /* T2T partial */
-                                               if (p->family < CHIP_CAYMAN) {
-                                                       DRM_ERROR("L2T, T2L Partial is cayman only !\n");
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               p->idx += 13;
-                                               break;
-                                       case 7:
-                                               /* L2T, broadcast */
-                                               if (idx_value & (1 << 31)) {
-                                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset <<= 8;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset <<= 8;
-                                               src_offset = ib[idx+8];
-                                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
-                                                                dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 10;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
+                       switch (sub_cmd) {
+                       /* Copy L2L, DW aligned */
+                       case 0x00:
+                               /* L2L, dw */
+                               src_offset = ib[idx+2];
+                               src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                               dst_offset = ib[idx+1];
+                               dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 5;
+                               break;
+                       /* Copy L2T/T2L */
+                       case 0x08:
+                               /* detile bit */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       /* tiled src, linear dst */
+                                       src_offset = ib[idx+1];
+                                       src_offset <<= 8;
+                                       ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+                                       dst_offset = radeon_get_ib_value(p, idx + 7);
+                                       dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+                                       ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
                                } else {
-                                       switch (misc) {
-                                       case 0:
-                                               /* detile bit */
-                                               if (idx_value & (1 << 31)) {
-                                                       /* tiled src, linear dst */
-                                                       src_offset = ib[idx+1];
-                                                       src_offset <<= 8;
-                                                       ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
-
-                                                       dst_offset = ib[idx+7];
-                                                       dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               } else {
-                                                       /* linear src, tiled dst */
-                                                       src_offset = ib[idx+7];
-                                                       src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-
-                                                       dst_offset = ib[idx+1];
-                                                       dst_offset <<= 8;
-                                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               }
-                                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               p->idx += 9;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
+                                       /* linear src, tiled dst */
+                                       src_offset = ib[idx+7];
+                                       src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+                                       ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+                                       dst_offset = ib[idx+1];
+                                       dst_offset <<= 8;
+                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
                                }
-                       } else {
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2L, byte */
-                                               src_offset = ib[idx+2];
-                                               src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
-                                               dst_offset = ib[idx+1];
-                                               dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
-                                               if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
-                                                                src_offset + count, radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + count, radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 5;
-                                               break;
-                                       case 1:
-                                               /* L2L, partial */
-                                               if (p->family < CHIP_CAYMAN) {
-                                                       DRM_ERROR("L2L Partial is cayman only !\n");
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-
-                                               p->idx += 9;
-                                               break;
-                                       case 4:
-                                               /* L2L, dw, broadcast */
-                                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
-                                               src_offset = ib[idx+3];
-                                               src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
-                                                                dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 7;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               p->idx += 9;
+                               break;
+                       /* Copy L2L, byte aligned */
+                       case 0x40:
+                               /* L2L, byte */
+                               src_offset = ib[idx+2];
+                               src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                               dst_offset = ib[idx+1];
+                               dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+                               if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
+                                                       src_offset + count, radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + count, radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+                               ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+                               ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 5;
+                               break;
+                       /* Copy L2L, partial */
+                       case 0x41:
+                               /* L2L, partial */
+                               if (p->family < CHIP_CAYMAN) {
+                                       DRM_ERROR("L2L Partial is cayman only !\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+                               ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+                               ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+                               p->idx += 9;
+                               break;
+                       /* Copy L2L, DW aligned, broadcast */
+                       case 0x44:
+                               /* L2L, dw, broadcast */
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+                               src_offset = ib[idx+3];
+                               src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 7;
+                               break;
+                       /* Copy L2T Frame to Field */
+                       case 0x48:
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset <<= 8;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset <<= 8;
+                               src_offset = ib[idx+8];
+                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 10;
+                               break;
+                       /* Copy L2T/T2L, partial */
+                       case 0x49:
+                               /* L2T, T2L partial */
+                               if (p->family < CHIP_CAYMAN) {
+                                       DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+                                       return -EINVAL;
+                               }
+                               /* detile bit */
+                               if (ib[idx + 2 ] & (1 << 31)) {
+                                       /* tiled src, linear dst */
+                                       ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+                                       ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               } else {
+                                       /* linear src, tiled dst */
+                                       ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+                               }
+                               p->idx += 12;
+                               break;
+                       /* Copy L2T broadcast */
+                       case 0x4b:
+                               /* L2T, broadcast */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset <<= 8;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset <<= 8;
+                               src_offset = ib[idx+8];
+                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 10;
+                               break;
+                       /* Copy L2T/T2L (tile units) */
+                       case 0x4c:
+                               /* L2T, T2L */
+                               /* detile bit */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       /* tiled src, linear dst */
+                                       src_offset = ib[idx+1];
+                                       src_offset <<= 8;
+                                       ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+                                       dst_offset = ib[idx+7];
+                                       dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+                                       ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
                                } else {
-                                       /* L2L, dw */
-                                       src_offset = ib[idx+2];
-                                       src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                                       /* linear src, tiled dst */
+                                       src_offset = ib[idx+7];
+                                       src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+                                       ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
                                        dst_offset = ib[idx+1];
-                                       dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
-                                       if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
-                                               dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
-                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
-                                               return -EINVAL;
-                                       }
-                                       if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
-                                               dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
-                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
-                                               return -EINVAL;
-                                       }
-                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                       ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                       ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                       ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                       p->idx += 5;
+                                       dst_offset <<= 8;
+                                       ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
                                }
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               p->idx += 9;
+                               break;
+                       /* Copy T2T, partial (tile units) */
+                       case 0x4d:
+                               /* T2T partial */
+                               if (p->family < CHIP_CAYMAN) {
+                                       DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+                               p->idx += 13;
+                               break;
+                       /* Copy L2T broadcast (tile units) */
+                       case 0x4f:
+                               /* L2T, broadcast */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset <<= 8;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset <<= 8;
+                               src_offset = ib[idx+8];
+                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                               ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 10;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib[idx+0]);
+                               return -EINVAL;
                        }
                        break;
                case DMA_PACKET_CONSTANT_FILL:
@@ -3435,88 +3423,79 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
 {
        u32 idx = 0;
-       u32 header, cmd, count, tiled, new_cmd, misc;
+       u32 header, cmd, count, sub_cmd;
 
        do {
                header = ib->ptr[idx];
                cmd = GET_DMA_CMD(header);
                count = GET_DMA_COUNT(header);
-               tiled = GET_DMA_T(header);
-               new_cmd = GET_DMA_NEW(header);
-               misc = GET_DMA_MISC(header);
+               sub_cmd = GET_DMA_SUB_CMD(header);
 
                switch (cmd) {
                case DMA_PACKET_WRITE:
-                       if (tiled)
+                       switch (sub_cmd) {
+                       /* tiled */
+                       case 8:
                                idx += count + 7;
-                       else
+                               break;
+                       /* linear */
+                       case 0:
                                idx += count + 3;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
+                               return -EINVAL;
+                       }
                        break;
                case DMA_PACKET_COPY:
-                       if (tiled) {
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2T, frame to fields */
-                                               idx += 10;
-                                               break;
-                                       case 1:
-                                               /* L2T, T2L partial */
-                                               idx += 12;
-                                               break;
-                                       case 3:
-                                               /* L2T, broadcast */
-                                               idx += 10;
-                                               break;
-                                       case 4:
-                                               /* L2T, T2L */
-                                               idx += 9;
-                                               break;
-                                       case 5:
-                                               /* T2T partial */
-                                               idx += 13;
-                                               break;
-                                       case 7:
-                                               /* L2T, broadcast */
-                                               idx += 10;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
-                               } else {
-                                       switch (misc) {
-                                       case 0:
-                                               idx += 9;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
-                               }
-                       } else {
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2L, byte */
-                                               idx += 5;
-                                               break;
-                                       case 1:
-                                               /* L2L, partial */
-                                               idx += 9;
-                                               break;
-                                       case 4:
-                                               /* L2L, dw, broadcast */
-                                               idx += 7;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
-                               } else {
-                                       /* L2L, dw */
-                                       idx += 5;
-                               }
+                       switch (sub_cmd) {
+                       /* Copy L2L, DW aligned */
+                       case 0x00:
+                               idx += 5;
+                               break;
+                       /* Copy L2T/T2L */
+                       case 0x08:
+                               idx += 9;
+                               break;
+                       /* Copy L2L, byte aligned */
+                       case 0x40:
+                               idx += 5;
+                               break;
+                       /* Copy L2L, partial */
+                       case 0x41:
+                               idx += 9;
+                               break;
+                       /* Copy L2L, DW aligned, broadcast */
+                       case 0x44:
+                               idx += 7;
+                               break;
+                       /* Copy L2T Frame to Field */
+                       case 0x48:
+                               idx += 10;
+                               break;
+                       /* Copy L2T/T2L, partial */
+                       case 0x49:
+                               idx += 12;
+                               break;
+                       /* Copy L2T broadcast */
+                       case 0x4b:
+                               idx += 10;
+                               break;
+                       /* Copy L2T/T2L (tile units) */
+                       case 0x4c:
+                               idx += 9;
+                               break;
+                       /* Copy T2T, partial (tile units) */
+                       case 0x4d:
+                               idx += 13;
+                               break;
+                       /* Copy L2T broadcast (tile units) */
+                       case 0x4f:
+                               idx += 10;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
+                               return -EINVAL;
                        }
                        break;
                case DMA_PACKET_CONSTANT_FILL:
index 70388fe..5ad29f3 100644 (file)
 #define CAYMAN_DMA1_CNTL                                  0xd82c
 
 /* async DMA packets */
-#define DMA_PACKET(cmd, t, s, n)       ((((cmd) & 0xF) << 28) |        \
-                                        (((t) & 0x1) << 23) |          \
-                                        (((s) & 0x1) << 22) |          \
-                                        (((n) & 0xFFFFF) << 0))
+#define DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) |    \
+                                    (((sub_cmd) & 0xFF) << 20) |\
+                                    (((n) & 0xFFFFF) << 0))
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_SUB_CMD(h) (((h) & 0x0ff00000) >> 20)
+
 /* async DMA Packet types */
-#define        DMA_PACKET_WRITE                                  0x2
-#define        DMA_PACKET_COPY                                   0x3
-#define        DMA_PACKET_INDIRECT_BUFFER                        0x4
-#define        DMA_PACKET_SEMAPHORE                              0x5
-#define        DMA_PACKET_FENCE                                  0x6
-#define        DMA_PACKET_TRAP                                   0x7
-#define        DMA_PACKET_SRBM_WRITE                             0x9
-#define        DMA_PACKET_CONSTANT_FILL                          0xd
-#define        DMA_PACKET_NOP                                    0xf
+#define        DMA_PACKET_WRITE                        0x2
+#define        DMA_PACKET_COPY                         0x3
+#define        DMA_PACKET_INDIRECT_BUFFER              0x4
+#define        DMA_PACKET_SEMAPHORE                    0x5
+#define        DMA_PACKET_FENCE                        0x6
+#define        DMA_PACKET_TRAP                         0x7
+#define        DMA_PACKET_SRBM_WRITE                   0x9
+#define        DMA_PACKET_CONSTANT_FILL                0xd
+#define        DMA_PACKET_NOP                          0xf
 
 /* PCIE link stuff */
 #define PCIE_LC_TRAINING_CNTL                             0xa1 /* PCIE_P */