radeon/ttm: add support for zeroing the contents of VRAM buffers
authorDave Airlie <airlied@redhat.com>
Tue, 26 Aug 2008 07:44:47 +0000 (17:44 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 26 Aug 2008 07:44:47 +0000 (17:44 +1000)
This uses a solid fill fastpath, falling back to the slow memset path.

linux-core/radeon_buffer.c

index 900d450..96a584f 100644 (file)
@@ -37,7 +37,7 @@ struct drm_ttm_backend *radeon_create_ttm_backend_entry(struct drm_device * dev)
 {
        drm_radeon_private_t *dev_priv = dev->dev_private;
 
-       if(dev_priv->flags & RADEON_IS_AGP)
+       if (dev_priv->flags & RADEON_IS_AGP)
                return drm_agp_init_ttm(dev);
        else
                return ati_pcigart_init_ttm(dev, &dev_priv->gart_info, radeon_gart_flush);
@@ -58,9 +58,10 @@ int radeon_invalidate_caches(struct drm_device * dev, uint64_t flags)
        if (!dev_priv->cp_running)
                return 0;
 
-       BEGIN_RING(4);
-       RADEON_FLUSH_CACHE();
-       RADEON_FLUSH_ZCACHE();
+       BEGIN_RING(6);
+       RADEON_PURGE_CACHE();
+       RADEON_PURGE_ZCACHE();
+       RADEON_WAIT_UNTIL_3D_IDLE();
        ADVANCE_RING();
        COMMIT_RING();
        return 0;
@@ -112,15 +113,17 @@ int radeon_init_mem_type(struct drm_device * dev, uint32_t type,
        return 0;
 }
 
-static void radeon_emit_copy_blit(struct drm_device * dev,
-                                 uint32_t src_offset,
-                                 uint32_t dst_offset,
-                                 uint32_t pages, int direction)
+void radeon_emit_copy_blit(struct drm_device * dev,
+                          uint32_t src_offset,
+                          uint32_t dst_offset,
+                          uint32_t pages)
 {
        uint32_t cur_pages;
-       uint32_t stride = PAGE_SIZE;
+       uint32_t stride_bytes = PAGE_SIZE;
        drm_radeon_private_t *dev_priv = dev->dev_private;
-       uint32_t format, height;
+       uint32_t format, pitch;
+       const uint32_t clip = (0x1fff) | (0x1fff << 16);
+       uint32_t stride_pixels;
        RING_LOCALS;
 
        if (!dev_priv)
@@ -130,67 +133,171 @@ static void radeon_emit_copy_blit(struct drm_device * dev,
        format = RADEON_COLOR_FORMAT_ARGB8888;
 
        /* radeon limited to 16k stride */
-       stride &= 0x3fff;
+       stride_bytes &= 0x3fff;
+       /* radeon pitch is /64 */
+       pitch = stride_bytes / 64;
+
+       stride_pixels = stride_bytes / 4;
+
        while(pages > 0) {
                cur_pages = pages;
-               if (cur_pages > 2048)
-                       cur_pages = 2048;
+               if (cur_pages > 8191)
+                       cur_pages = 8191;
                pages -= cur_pages;
 
-               /* needs verification */
-               BEGIN_RING(7);          
-               OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
+               /* pages are in Y direction - height
+                  page width in X direction - width */
+               BEGIN_RING(10);
+               OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 8));
                OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
                         RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+                        RADEON_GMC_SRC_CLIPPING | RADEON_GMC_DST_CLIPPING |
                         RADEON_GMC_BRUSH_NONE |
                         (format << 8) |
                         RADEON_GMC_SRC_DATATYPE_COLOR |
                         RADEON_ROP3_S |
                         RADEON_DP_SRC_SOURCE_MEMORY |
                         RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
-               if (direction) {
-                       OUT_RING((stride << 22) | (src_offset >> 10));
-                       OUT_RING((stride << 22) | (dst_offset >> 10));
-               } else {
-                       OUT_RING((stride << 22) | (dst_offset >> 10));
-                       OUT_RING((stride << 22) | (src_offset >> 10));
-               }
-               OUT_RING(0);
+               OUT_RING((pitch << 22) | (src_offset >> 10));
+               OUT_RING((pitch << 22) | (dst_offset >> 10));
+               OUT_RING(clip); // SRC _SC BOT_RITE
+               OUT_RING(0);   // SC_TOP_LEFT
+               OUT_RING(clip); // SC_BOT_RITE
+
+               OUT_RING(pages);
                OUT_RING(pages); /* x - y */
-               OUT_RING((stride << 16) | cur_pages);
+               OUT_RING(cur_pages | (stride_pixels << 16));
                ADVANCE_RING();
        }
 
-       BEGIN_RING(2);
+       BEGIN_RING(4);
+       OUT_RING(CP_PACKET0(RADEON_RB2D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(RADEON_RB2D_DC_FLUSH_ALL);
        RADEON_WAIT_UNTIL_2D_IDLE();
        ADVANCE_RING();
 
+       COMMIT_RING();
        return;
 }
 
-static int radeon_move_blit(struct drm_buffer_object * bo,
-                           int evict, int no_wait, struct drm_bo_mem_reg *new_mem)
+int radeon_move_blit(struct drm_buffer_object * bo,
+                           int evict, int no_wait, struct drm_bo_mem_reg *new_mem,
+                           struct drm_bo_mem_reg *old_mem)
 {
-       struct drm_bo_mem_reg *old_mem = &bo->mem;
-       int dir = 0;
+       struct drm_device *dev = bo->dev;
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       uint32_t old_start, new_start;
 
-       if ((old_mem->mem_type == new_mem->mem_type) &&
-           (new_mem->mm_node->start <
-            old_mem->mm_node->start + old_mem->mm_node->size)) {
-               dir = 1;
-       }
+       old_start = old_mem->mm_node->start << PAGE_SHIFT;
+       new_start = new_mem->mm_node->start << PAGE_SHIFT;
+
+       if (old_mem->mem_type == DRM_BO_MEM_VRAM)
+               old_start += dev_priv->fb_location;
+       if (old_mem->mem_type == DRM_BO_MEM_TT)
+               old_start += dev_priv->gart_vm_start;
+
+       if (new_mem->mem_type == DRM_BO_MEM_VRAM)
+               new_start += dev_priv->fb_location;
+       if (new_mem->mem_type == DRM_BO_MEM_TT)
+               new_start += dev_priv->gart_vm_start;
 
        radeon_emit_copy_blit(bo->dev,
-                             old_mem->mm_node->start << PAGE_SHIFT,
-                             new_mem->mm_node->start << PAGE_SHIFT,
-                             new_mem->num_pages, dir);
+                             old_start,
+                             new_start,
+                             new_mem->num_pages);
+
+       /* invalidate the chip caches */
 
-       
        return drm_bo_move_accel_cleanup(bo, evict, no_wait, 0,
                                         DRM_FENCE_TYPE_EXE, 0,
                                         new_mem);
 }
 
+void radeon_emit_solid_fill(struct drm_device * dev,
+                           uint32_t dst_offset,
+                           uint32_t pages, uint8_t value)
+{
+       uint32_t cur_pages;
+       uint32_t stride_bytes = PAGE_SIZE;
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       uint32_t format, pitch;
+       const uint32_t clip = (0x1fff) | (0x1fff << 16);
+       uint32_t stride_pixels;
+       RING_LOCALS;
+
+       if (!dev_priv)
+               return;
+
+       /* 32-bit copy format */
+       format = RADEON_COLOR_FORMAT_ARGB8888;
+
+       /* radeon limited to 16k stride */
+       stride_bytes &= 0x3fff;
+       /* radeon pitch is /64 */
+       pitch = stride_bytes / 64;
+
+       stride_pixels = stride_bytes / 4;
+
+       while(pages > 0) {
+               cur_pages = pages;
+               if (cur_pages > 8191)
+                       cur_pages = 8191;
+               pages -= cur_pages;
+
+               /* pages are in Y direction - height
+                  page width in X direction - width */
+               BEGIN_RING(8);
+               OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 6));
+               OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+                        RADEON_GMC_DST_CLIPPING |
+                        RADEON_GMC_BRUSH_SOLID_COLOR |
+                        (format << 8) |
+                        RADEON_ROP3_S |
+                        RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+               OUT_RING((pitch << 22) | (dst_offset >> 10)); // PITCH
+               OUT_RING(0);   // SC_TOP_LEFT // DST CLIPPING
+               OUT_RING(clip); // SC_BOT_RITE
+
+               OUT_RING(0); // COLOR
+
+               OUT_RING(pages); /* x - y */
+               OUT_RING(cur_pages | (stride_pixels << 16));
+               ADVANCE_RING();
+       }
+
+       BEGIN_RING(4);
+       OUT_RING(CP_PACKET0(RADEON_RB2D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(RADEON_RB2D_DC_FLUSH_ALL);
+       RADEON_WAIT_UNTIL_2D_IDLE();
+       ADVANCE_RING();
+
+       COMMIT_RING();
+       return;
+}
+
+int radeon_move_zero_fill(struct drm_buffer_object * bo,
+                         int evict, int no_wait, struct drm_bo_mem_reg *new_mem)
+{
+       struct drm_device *dev = bo->dev;
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       uint32_t new_start;
+
+       new_start = new_mem->mm_node->start << PAGE_SHIFT;
+
+       if (new_mem->mem_type == DRM_BO_MEM_VRAM)
+               new_start += dev_priv->fb_location;
+
+       radeon_emit_solid_fill(bo->dev,
+                              new_start,
+                              new_mem->num_pages, 0);
+
+       /* invalidate the chip caches */
+
+       return drm_bo_move_accel_cleanup(bo, 1, no_wait, 0,
+                                        DRM_FENCE_TYPE_EXE, 0,
+                                        new_mem);
+}
+
 static int radeon_move_flip(struct drm_buffer_object * bo,
                            int evict, int no_wait, struct drm_bo_mem_reg * new_mem)
 {
@@ -200,8 +307,7 @@ static int radeon_move_flip(struct drm_buffer_object * bo,
 
        tmp_mem = *new_mem;
        tmp_mem.mm_node = NULL;
-       //      tmp_mem.mask = DRM_BO_FLAG_MEM_TT |
-       //          DRM_BO_FLAG_CACHED | DRM_BO_FLAG_FORCE_CACHING;
+       tmp_mem.proposed_flags = DRM_BO_FLAG_MEM_TT;
 
        ret = drm_bo_mem_space(bo, &tmp_mem, no_wait);
        if (ret)
@@ -211,7 +317,7 @@ static int radeon_move_flip(struct drm_buffer_object * bo,
        if (ret)
                goto out_cleanup;
 
-       ret = radeon_move_blit(bo, 1, no_wait, &tmp_mem);
+       ret = radeon_move_blit(bo, 1, no_wait, &tmp_mem, &bo->mem);
        if (ret)
                goto out_cleanup;
 
@@ -227,25 +333,90 @@ out_cleanup:
        return ret;
 }
 
+static int radeon_move_vram(struct drm_buffer_object * bo, 
+                            int evict, int no_wait, struct drm_bo_mem_reg * new_mem) 
+{ 
+       struct drm_device *dev = bo->dev; 
+       struct drm_bo_mem_reg tmp_mem;
+       struct drm_bo_mem_reg *old_mem = &bo->mem;
+       int ret; 
+       bool was_local = false;
+       /* old - LOCAL memory node bo->mem
+          tmp - TT type memory node
+          new - VRAM memory node */
+
+       tmp_mem = *old_mem;
+       tmp_mem.mm_node = NULL; 
+
+       if (old_mem->mem_type == DRM_BO_MEM_LOCAL) {
+               tmp_mem.proposed_flags = DRM_BO_FLAG_MEM_TT; 
+               ret = drm_bo_mem_space(bo, &tmp_mem, no_wait); 
+               if (ret) 
+                       return ret; 
+       }
+
+       if (!bo->ttm) {
+               ret = drm_bo_add_ttm(bo);
+               if (ret)
+                       goto out_cleanup;
+       }
+
+       if (old_mem->mem_type == DRM_BO_MEM_LOCAL) {
+               ret = drm_bo_move_ttm(bo, evict, no_wait, &tmp_mem);
+               if (ret)
+                       return ret;
+       }
+
+       ret = radeon_move_blit(bo, 1, no_wait, new_mem, &bo->mem);
+       if (ret) 
+               goto out_cleanup; 
+out_cleanup: 
+       if (tmp_mem.mm_node) { 
+               mutex_lock(&dev->struct_mutex); 
+               if (tmp_mem.mm_node != bo->pinned_node) 
+                       drm_mm_put_block(tmp_mem.mm_node); 
+               tmp_mem.mm_node = NULL; 
+               mutex_unlock(&dev->struct_mutex); 
+       } 
+       return ret; 
+} 
+
 int radeon_move(struct drm_buffer_object * bo,
-               int evict, int no_wait, struct drm_bo_mem_reg * new_mem)
+               int evict, int no_wait, struct drm_bo_mem_reg *new_mem)
 {
+       struct drm_device *dev = bo->dev;
        struct drm_bo_mem_reg *old_mem = &bo->mem;
+       drm_radeon_private_t *dev_priv = dev->dev_private;
 
-       return drm_bo_move_memcpy(bo, evict, no_wait, new_mem);
-#if 0
-       DRM_DEBUG("\n");
-       if (old_mem->mem_type == DRM_BO_MEM_LOCAL) {
-               return drm_bo_move_memcpy(bo, evict, no_wait, new_mem);
-       } else if (new_mem->mem_type == DRM_BO_MEM_LOCAL) {
+       if (!dev_priv->cp_running)
+               goto fallback;
+       
+       if (bo->mem.flags & DRM_BO_FLAG_CLEAN) /* need to implement solid fill */
+       {
+               if (radeon_move_zero_fill(bo, evict, no_wait, new_mem))
+                       return drm_bo_move_zero(bo, evict, no_wait, new_mem);
+               return 0; 
+       }
+
+       if (new_mem->mem_type == DRM_BO_MEM_VRAM) {
+               if (radeon_move_vram(bo, evict, no_wait, new_mem))
+                       goto fallback;
+       } else if (new_mem->mem_type == DRM_BO_MEM_LOCAL){ 
                if (radeon_move_flip(bo, evict, no_wait, new_mem))
-                       return drm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+                       goto fallback;
        } else {
-               if (radeon_move_blit(bo, evict, no_wait, new_mem))
-                       return drm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+               if (radeon_move_flip(bo, evict, no_wait, new_mem))
+                       goto fallback;
        }
        return 0;
-#endif
+fallback:
+       if (bo->mem.flags & DRM_BO_FLAG_CLEAN)
+               return drm_bo_move_zero(bo, evict, no_wait, new_mem);
+       else
+               return drm_bo_move_memcpy(bo, evict, no_wait, new_mem);
 }