drm/vmwgfx: Use the cpu blit utility for framebuffer to screen target blits
authorThomas Hellstrom <thellstrom@vmware.com>
Tue, 16 Jan 2018 10:07:30 +0000 (11:07 +0100)
committerThomas Hellstrom <thellstrom@vmware.com>
Thu, 22 Mar 2018 11:08:23 +0000 (12:08 +0100)
This blit was previously performed using two large vmaps, one of which
was teared down and remapped on each blit. Use the more resource-
conserving TTM cpu blit instead.

The blit is used in boundary-box computing mode which makes it possible
to minimize the bounding box used in host operations.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c

index 7177eec..ddf71be 100644 (file)
@@ -185,6 +185,22 @@ static const struct ttm_place evictable_placement_flags[] = {
        }
 };
 
+static const struct ttm_place nonfixed_placement_flags[] = {
+       {
+               .fpfn = 0,
+               .lpfn = 0,
+               .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+       }, {
+               .fpfn = 0,
+               .lpfn = 0,
+               .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+       }, {
+               .fpfn = 0,
+               .lpfn = 0,
+               .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+       }
+};
+
 struct ttm_placement vmw_evictable_placement = {
        .num_placement = 4,
        .placement = evictable_placement_flags,
@@ -213,6 +229,13 @@ struct ttm_placement vmw_mob_ne_placement = {
        .busy_placement = &mob_ne_placement_flags
 };
 
+struct ttm_placement vmw_nonfixed_placement = {
+       .num_placement = 3,
+       .placement = nonfixed_placement_flags,
+       .num_busy_placement = 1,
+       .busy_placement = &sys_placement_flags
+};
+
 struct vmw_ttm_tt {
        struct ttm_dma_tt dma_ttm;
        struct vmw_private *dev_priv;
index 053418a..714c794 100644 (file)
@@ -767,6 +767,7 @@ extern struct ttm_placement vmw_evictable_placement;
 extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_placement vmw_mob_placement;
 extern struct ttm_placement vmw_mob_ne_placement;
+extern struct ttm_placement vmw_nonfixed_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 extern int vmw_bo_map_dma(struct ttm_buffer_object *bo);
index 0f9c9cd..6315967 100644 (file)
@@ -682,9 +682,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
                return NULL;
 
        vps->pinned = 0;
-
-       /* Mapping is managed by prepare_fb/cleanup_fb */
-       memset(&vps->host_map, 0, sizeof(vps->host_map));
        vps->cpp = 0;
 
        /* Each ref counted resource needs to be acquired again */
@@ -746,11 +743,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 
 
        /* Should have been freed by cleanup_fb */
-       if (vps->host_map.virtual) {
-               DRM_ERROR("Host mapping not freed\n");
-               ttm_bo_kunmap(&vps->host_map);
-       }
-
        if (vps->surf)
                vmw_surface_unreference(&vps->surf);
 
@@ -1129,12 +1121,14 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
 };
 
 /**
- * Pin the dmabuffer to the start of vram.
+ * Pin the dmabuffer in a location suitable for access by the
+ * display system.
  */
 static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 {
        struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
        struct vmw_dma_buffer *buf;
+       struct ttm_placement *placement;
        int ret;
 
        buf = vfb->dmabuf ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
@@ -1151,12 +1145,24 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
                break;
        case vmw_du_screen_object:
        case vmw_du_screen_target:
-               if (vfb->dmabuf)
-                       return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf,
-                                                            false);
+               if (vfb->dmabuf) {
+                       if (dev_priv->capabilities & SVGA_CAP_3D) {
+                               /*
+                                * Use surface DMA to get content to
+                                * sreen target surface.
+                                */
+                               placement = &vmw_vram_gmr_placement;
+                       } else {
+                               /* Use CPU blit. */
+                               placement = &vmw_sys_placement;
+                       }
+               } else {
+                       /* Use surface / image update */
+                       placement = &vmw_mob_placement;
+               }
 
-               return vmw_dmabuf_pin_in_placement(dev_priv, buf,
-                                                  &vmw_mob_placement, false);
+               return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement,
+                                                  false);
        default:
                return -EINVAL;
        }
@@ -2419,14 +2425,21 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
                                  struct vmw_dma_buffer *buf,
                                  bool interruptible,
-                                 bool validate_as_mob)
+                                 bool validate_as_mob,
+                                 bool for_cpu_blit)
 {
+       struct ttm_operation_ctx ctx = {
+               .interruptible = interruptible,
+               .no_wait_gpu = false};
        struct ttm_buffer_object *bo = &buf->base;
        int ret;
 
        ttm_bo_reserve(bo, false, false, NULL);
-       ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
-                                        validate_as_mob);
+       if (for_cpu_blit)
+               ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx);
+       else
+               ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
+                                                validate_as_mob);
        if (ret)
                ttm_bo_unreserve(bo);
 
@@ -2538,7 +2551,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
        if (res->backup) {
                ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup,
                                                    interruptible,
-                                                   res->dev_priv->has_mob);
+                                                   res->dev_priv->has_mob,
+                                                   false);
                if (ret)
                        goto out_unreserve;
        }
index 42b0f15..4e8749a 100644 (file)
@@ -177,7 +177,6 @@ struct vmw_plane_state {
        int pinned;
 
        /* For CPU Blit */
-       struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
 };
 
@@ -289,7 +288,8 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
                                  struct vmw_dma_buffer *buf,
                                  bool interruptible,
-                                 bool validate_as_mob);
+                                 bool validate_as_mob,
+                                 bool for_cpu_blit);
 void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf);
 void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
                                  struct drm_file *file_priv,
index 9f6dece..3b7bf7c 100644 (file)
@@ -1032,7 +1032,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
        int ret;
 
        ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
-                                           false);
+                                           false, false);
        if (ret)
                return ret;
 
@@ -1130,7 +1130,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv,
        struct vmw_kms_dirty dirty;
        int ret;
 
-       ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false);
+       ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false,
+                                           false);
        if (ret)
                return ret;
 
index 6de2874..8eec889 100644 (file)
@@ -114,7 +114,6 @@ struct vmw_screen_target_display_unit {
        bool defined;
 
        /* For CPU Blit */
-       struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
 };
 
@@ -639,10 +638,9 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
                container_of(dirty->unit, typeof(*stdu), base);
        s32 width, height;
        s32 src_pitch, dst_pitch;
-       u8 *src, *dst;
-       bool not_used;
-       struct ttm_bo_kmap_obj guest_map;
-       int ret;
+       struct ttm_buffer_object *src_bo, *dst_bo;
+       u32 src_offset, dst_offset;
+       struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
 
        if (!dirty->num_hits)
                return;
@@ -653,57 +651,38 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        if (width == 0 || height == 0)
                return;
 
-       ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
-                         &guest_map);
-       if (ret) {
-               DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
-                         ret);
-               goto out_cleanup;
-       }
-
-       /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
-       src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
-       src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
-       src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
-
-       dst_pitch = ddirty->pitch;
-       dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
-       dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
+       /* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */
+       dst_pitch = stdu->display_srf->base_size.width * stdu->cpp;
+       dst_bo = &stdu->display_srf->res.backup->base;
+       dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
 
+       src_pitch = ddirty->pitch;
+       src_bo = &ddirty->buf->base;
+       src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
 
-       /* Figure out the real direction */
-       if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
-               u8 *tmp;
-               s32 tmp_pitch;
-
-               tmp = src;
-               tmp_pitch = src_pitch;
-
-               src = dst;
-               src_pitch = dst_pitch;
-
-               dst = tmp;
-               dst_pitch = tmp_pitch;
+       /* Swap src and dst if the assumption was wrong. */
+       if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
+               swap(dst_pitch, src_pitch);
+               swap(dst_bo, src_bo);
+               swap(src_offset, dst_offset);
        }
 
-       /* CPU Blit */
-       while (height-- > 0) {
-               memcpy(dst, src, width * stdu->cpp);
-               dst += dst_pitch;
-               src += src_pitch;
-       }
+       (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
+                              src_bo, src_offset, src_pitch,
+                              width * stdu->cpp, height, &diff);
 
-       if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
+       if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
+           drm_rect_visible(&diff.rect)) {
                struct vmw_private *dev_priv;
                struct vmw_stdu_update *cmd;
                struct drm_clip_rect region;
                int ret;
 
                /* We are updating the actual surface, not a proxy */
-               region.x1 = ddirty->left;
-               region.x2 = ddirty->right;
-               region.y1 = ddirty->top;
-               region.y2 = ddirty->bottom;
+               region.x1 = diff.rect.x1;
+               region.x2 = diff.rect.x2;
+               region.y1 = diff.rect.y1;
+               region.y2 = diff.rect.y2;
                ret = vmw_kms_update_proxy(
                        (struct vmw_resource *) &stdu->display_srf->res,
                        (const struct drm_clip_rect *) &region, 1, 1);
@@ -720,13 +699,12 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
                }
 
                vmw_stdu_populate_update(cmd, stdu->base.unit,
-                                        ddirty->left, ddirty->right,
-                                        ddirty->top, ddirty->bottom);
+                                        region.x1, region.x2,
+                                        region.y1, region.y2);
 
                vmw_fifo_commit(dev_priv, sizeof(*cmd));
        }
 
-       ttm_bo_kunmap(&guest_map);
 out_cleanup:
        ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
        ddirty->right = ddirty->bottom = S32_MIN;
@@ -772,9 +750,15 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
                container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
        struct vmw_stdu_dirty ddirty;
        int ret;
+       bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D);
 
+       /*
+        * VMs without 3D support don't have the surface DMA command and
+        * we'll be using a CPU blit, and the framebuffer should be moved out
+        * of VRAM.
+        */
        ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
-                                           false);
+                                           false, cpu_blit);
        if (ret)
                return ret;
 
@@ -793,8 +777,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
        if (to_surface)
                ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);
 
-       /* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */
-       if (!(dev_priv->capabilities & SVGA_CAP_3D)) {
+
+       if (cpu_blit) {
                ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit;
                ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip;
                ddirty.base.fifo_reserve_size = 0;
@@ -1071,9 +1055,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
 {
        struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
-       if (vps->host_map.virtual)
-               ttm_bo_kunmap(&vps->host_map);
-
        if (vps->surf)
                WARN_ON(!vps->pinned);
 
@@ -1235,24 +1216,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
         * so cache these mappings
         */
        if (vps->content_fb_type == SEPARATE_DMA &&
-           !(dev_priv->capabilities & SVGA_CAP_3D)) {
-               ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
-                                 vps->surf->res.backup->base.num_pages,
-                                 &vps->host_map);
-               if (ret) {
-                       DRM_ERROR("Failed to map display buffer to CPU\n");
-                       goto out_srf_unpin;
-               }
-
+           !(dev_priv->capabilities & SVGA_CAP_3D))
                vps->cpp = new_fb->pitches[0] / new_fb->width;
-       }
 
        return 0;
 
-out_srf_unpin:
-       vmw_resource_unpin(&vps->surf->res);
-       vps->pinned--;
-
 out_srf_unref:
        vmw_surface_unreference(&vps->surf);
        return ret;
@@ -1296,7 +1264,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
                stdu->display_srf = vps->surf;
                stdu->content_fb_type = vps->content_fb_type;
                stdu->cpp = vps->cpp;
-               memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
                vclips.x = crtc->x;
                vclips.y = crtc->y;