r600g/radeonsi: Use write-combined CPU mappings of some BOs in GTT
authorMichel Dänzer <michel.daenzer@amd.com>
Thu, 19 Jun 2014 01:40:38 +0000 (10:40 +0900)
committerMichel Dänzer <michel@daenzer.net>
Wed, 23 Jul 2014 09:55:50 +0000 (18:55 +0900)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
17 files changed:
src/gallium/drivers/r300/r300_query.c
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_screen_buffer.c
src/gallium/drivers/r300/r300_texture.c
src/gallium/drivers/radeon/r600_buffer_common.c
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeon/radeon_uvd.c
src/gallium/drivers/radeon/radeon_vce.c
src/gallium/drivers/radeon/radeon_video.c
src/gallium/drivers/radeon/radeon_video.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.h
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
src/gallium/winsys/radeon/drm/radeon_winsys.h

index 5305ebd..1679433 100644 (file)
@@ -59,7 +59,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
         q->num_pipes = r300screen->info.r300_num_gb_pipes;
 
     q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE,
-                                      RADEON_DOMAIN_GTT);
+                                      RADEON_DOMAIN_GTT, 0);
     if (!q->buf) {
         FREE(q);
         return NULL;
index 175b83a..6e5b381 100644 (file)
@@ -907,7 +907,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
         r300->vbo = rws->buffer_create(rws,
                                        MAX2(R300_MAX_DRAW_VBO_SIZE, size),
                                        R300_BUFFER_ALIGNMENT, TRUE,
-                                       RADEON_DOMAIN_GTT);
+                                       RADEON_DOMAIN_GTT, 0);
         if (!r300->vbo) {
             return FALSE;
         }
index 86e4478..de557b5 100644 (file)
@@ -103,7 +103,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
             /* Create a new one in the same pipe_resource. */
             new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0,
                                                R300_BUFFER_ALIGNMENT, TRUE,
-                                               rbuf->domain);
+                                               rbuf->domain, 0);
             if (new_buf) {
                 /* Discard the old buffer. */
                 pb_reference(&rbuf->buf, NULL);
@@ -185,7 +185,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
     rbuf->buf =
         r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0,
                                        R300_BUFFER_ALIGNMENT, TRUE,
-                                       rbuf->domain);
+                                       rbuf->domain, 0);
     if (!rbuf->buf) {
         FREE(rbuf);
         return NULL;
index 4ea69dc..ffe8c00 100644 (file)
@@ -1042,7 +1042,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
     /* Create the backing buffer if needed. */
     if (!tex->buf) {
         tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE,
-                                      tex->domain);
+                                      tex->domain, 0);
 
         if (!tex->buf) {
             goto fail;
index 0eaa817..4e6b897 100644 (file)
@@ -107,11 +107,14 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
 {
        struct r600_texture *rtex = (struct r600_texture*)res;
        struct pb_buffer *old_buf, *new_buf;
+       enum radeon_bo_flag flags = 0;
 
        switch (res->b.b.usage) {
-       case PIPE_USAGE_STAGING:
        case PIPE_USAGE_DYNAMIC:
        case PIPE_USAGE_STREAM:
+               flags = RADEON_FLAG_GTT_WC;
+               /* fall through */
+       case PIPE_USAGE_STAGING:
                /* Transfers are likely to occur more often with these resources. */
                res->domains = RADEON_DOMAIN_GTT;
                break;
@@ -120,6 +123,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
        default:
                /* Not listing GTT here improves performance in some apps. */
                res->domains = RADEON_DOMAIN_VRAM;
+               flags = RADEON_FLAG_GTT_WC;
                break;
        }
 
@@ -129,6 +133,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
            res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
                              PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
                res->domains = RADEON_DOMAIN_GTT;
+               flags = 0;
        }
 
        /* Tiled textures are unmappable. Always put them in VRAM. */
@@ -140,7 +145,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
        /* Allocate a new resource. */
        new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
                                             use_reusable_pool,
-                                            res->domains);
+                                            res->domains, flags);
        if (!new_buf) {
                return false;
        }
index bfda69e..6dd84a4 100644 (file)
@@ -1027,6 +1027,8 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 
                r600_init_temp_resource_from_box(&resource, texture, box, level,
                                                 R600_RESOURCE_FLAG_TRANSFER);
+               resource.usage = (usage & PIPE_TRANSFER_READ) ?
+                       PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
 
                /* Create the temporary texture. */
                staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
index 137c69c..d77217c 100644 (file)
@@ -816,12 +816,14 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
        for (i = 0; i < NUM_BUFFERS; ++i) {
                unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
                STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
-               if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size, RADEON_DOMAIN_VRAM)) {
+               if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size,
+                                        RADEON_DOMAIN_VRAM, 0)) {
                        RVID_ERR("Can't allocated message buffers.\n");
                        goto error;
                }
 
-               if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size, RADEON_DOMAIN_GTT)) {
+               if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size,
+                                        RADEON_DOMAIN_GTT, 0)) {
                        RVID_ERR("Can't allocated bitstream buffers.\n");
                        goto error;
                }
@@ -830,7 +832,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
                rvid_clear_buffer(dec->ws, dec->cs, &dec->bs_buffers[i]);
        }
 
-       if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM)) {
+       if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM, 0)) {
                RVID_ERR("Can't allocated dpb.\n");
                goto error;
        }
index f5395b3..9174c97 100644 (file)
@@ -191,7 +191,7 @@ static void rvce_destroy(struct pipe_video_codec *encoder)
        struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
        if (enc->stream_handle) {
                struct rvid_buffer fb;
-               rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT);
+               rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT, 0);
                enc->fb = &fb;
                enc->session(enc);
                enc->feedback(enc);
@@ -233,7 +233,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
        if (!enc->stream_handle) {
                struct rvid_buffer fb;
                enc->stream_handle = rvid_alloc_stream_handle();
-               rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT);
+               rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT, 0);
                enc->fb = &fb;
                enc->session(enc);
                enc->create(enc);
@@ -265,7 +265,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
        enc->bs_size = destination->width0;
 
        *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
-       if (!rvid_create_buffer(enc->ws, enc->fb, 512, RADEON_DOMAIN_GTT)) {
+       if (!rvid_create_buffer(enc->ws, enc->fb, 512, RADEON_DOMAIN_GTT, 0)) {
                RVID_ERR("Can't create feedback buffer.\n");
                return;
        }
@@ -390,7 +390,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
        cpb_size = cpb_size * 3 / 2;
        cpb_size = cpb_size * enc->cpb_num;
        tmp_buf->destroy(tmp_buf);
-       if (!rvid_create_buffer(enc->ws, &enc->cpb, cpb_size, RADEON_DOMAIN_VRAM)) {
+       if (!rvid_create_buffer(enc->ws, &enc->cpb, cpb_size, RADEON_DOMAIN_VRAM, 0)) {
                RVID_ERR("Can't create CPB buffer.\n");
                goto error;
        }
index eae533e..17e9a59 100644 (file)
@@ -61,11 +61,13 @@ unsigned rvid_alloc_stream_handle()
 
 /* create a buffer in the winsys */
 bool rvid_create_buffer(struct radeon_winsys *ws, struct rvid_buffer *buffer,
-                       unsigned size, enum radeon_bo_domain domain)
+                       unsigned size, enum radeon_bo_domain domain,
+                       enum radeon_bo_flag flags)
 {
        buffer->domain = domain;
+       buffer->flags = flags;
 
-       buffer->buf = ws->buffer_create(ws, size, 4096, false, domain);
+       buffer->buf = ws->buffer_create(ws, size, 4096, false, domain, flags);
        if (!buffer->buf)
                return false;
 
@@ -91,7 +93,8 @@ bool rvid_resize_buffer(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
        struct rvid_buffer old_buf = *new_buf;
        void *src = NULL, *dst = NULL;
 
-       if (!rvid_create_buffer(ws, new_buf, new_size, new_buf->domain))
+       if (!rvid_create_buffer(ws, new_buf, new_size, new_buf->domain,
+                                new_buf->flags))
                goto error;
 
        src = ws->buffer_map(old_buf.cs_handle, cs, PIPE_TRANSFER_READ);
@@ -191,7 +194,7 @@ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
        /* TODO: 2D tiling workaround */
        alignment *= 2;
 
-       pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM);
+       pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM, 0);
        if (!pb)
                return;
 
index 55d2ca4..42de5a9 100644 (file)
@@ -44,6 +44,7 @@
 struct rvid_buffer
 {
        enum radeon_bo_domain           domain;
+       enum radeon_bo_flag             flags;
        struct pb_buffer*               buf;
        struct radeon_winsys_cs_handle* cs_handle;
 };
@@ -53,7 +54,8 @@ unsigned rvid_alloc_stream_handle(void);
 
 /* create a buffer in the winsys */
 bool rvid_create_buffer(struct radeon_winsys *ws, struct rvid_buffer *buffer,
-                       unsigned size, enum radeon_bo_domain domain);
+                       unsigned size, enum radeon_bo_domain domain,
+                       enum radeon_bo_flag flags);
 
 /* destroy a buffer */
 void rvid_destroy_buffer(struct rvid_buffer *buffer);
index fde06fc..3dec536 100644 (file)
@@ -2697,7 +2697,7 @@ static void si_set_border_colors(struct si_context *sctx, unsigned count,
 
                                sctx->border_color_table =
                                        si_resource_create_custom(&sctx->screen->b.b,
-                                                                 PIPE_USAGE_STAGING,
+                                                                 PIPE_USAGE_DYNAMIC,
                                                                  4096 * 4 * 4);
                        }
 
index d06bb34..73f8d38 100644 (file)
@@ -477,6 +477,10 @@ const struct pb_vtbl radeon_bo_vtbl = {
     radeon_bo_get_base_buffer,
 };
 
+#ifndef RADEON_GEM_GTT_WC
+#define RADEON_GEM_GTT_WC (1 << 2)
+#endif
+
 static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
                                                 pb_size size,
                                                 const struct pb_desc *desc)
@@ -497,6 +501,10 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     args.size = size;
     args.alignment = desc->alignment;
     args.initial_domain = rdesc->initial_domains;
+    args.flags = 0;
+
+    if (rdesc->flags & RADEON_FLAG_GTT_WC)
+        args.flags |= RADEON_GEM_GTT_WC;
 
     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
                             &args, sizeof(args))) {
@@ -504,6 +512,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
         fprintf(stderr, "radeon:    size      : %d bytes\n", size);
         fprintf(stderr, "radeon:    alignment : %d bytes\n", desc->alignment);
         fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
+        fprintf(stderr, "radeon:    flags     : %d\n", args.flags);
         return NULL;
     }
 
@@ -784,7 +793,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
                         unsigned size,
                         unsigned alignment,
                         boolean use_reusable_pool,
-                        enum radeon_bo_domain domain)
+                        enum radeon_bo_domain domain,
+                        enum radeon_bo_flag flags)
 {
     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
     struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
@@ -798,13 +808,20 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
     /* Additional criteria for the cache manager. */
     desc.base.usage = domain;
     desc.initial_domains = domain;
+    desc.flags = flags;
 
     /* Assign a buffer manager. */
     if (use_reusable_pool) {
-        if (domain == RADEON_DOMAIN_VRAM)
-            provider = ws->cman_vram;
-        else
+        if (domain == RADEON_DOMAIN_VRAM) {
+            if (flags & RADEON_FLAG_GTT_WC)
+                provider = ws->cman_vram_gtt_wc;
+            else
+                provider = ws->cman_vram;
+        } else if (flags & RADEON_FLAG_GTT_WC) {
+            provider = ws->cman_gtt_wc;
+        } else {
             provider = ws->cman_gtt;
+        }
     } else {
         provider = ws->kman;
     }
index f5b122f..1c00a13 100644 (file)
@@ -42,6 +42,7 @@ struct radeon_bo_desc {
     struct pb_desc base;
 
     unsigned initial_domains;
+    unsigned flags;
 };
 
 struct radeon_bo {
index 67375dc..3596f8d 100644 (file)
@@ -606,7 +606,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
 
     /* Create a fence, which is a dummy BO. */
     fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
-                                       RADEON_DOMAIN_GTT);
+                                       RADEON_DOMAIN_GTT, 0);
     /* Add the fence as a dummy relocation. */
     cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
                               RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
index 9e60de3..910d06b 100644 (file)
@@ -424,7 +424,9 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
     pipe_mutex_destroy(ws->cs_stack_lock);
 
     ws->cman_vram->destroy(ws->cman_vram);
+    ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc);
     ws->cman_gtt->destroy(ws->cman_gtt);
+    ws->cman_gtt_wc->destroy(ws->cman_gtt_wc);
     ws->kman->destroy(ws->kman);
     if (ws->gen >= DRV_R600) {
         radeon_surface_manager_free(ws->surf_man);
@@ -642,9 +644,15 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
     ws->cman_vram = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
     if (!ws->cman_vram)
         goto fail;
+    ws->cman_vram_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
+    if (!ws->cman_vram_gtt_wc)
+        goto fail;
     ws->cman_gtt = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
     if (!ws->cman_gtt)
         goto fail;
+    ws->cman_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
+    if (!ws->cman_gtt_wc)
+        goto fail;
 
     if (ws->gen >= DRV_R600) {
         ws->surf_man = radeon_surface_manager_new(fd);
@@ -701,8 +709,12 @@ fail:
     pipe_mutex_unlock(fd_tab_mutex);
     if (ws->cman_gtt)
         ws->cman_gtt->destroy(ws->cman_gtt);
+    if (ws->cman_gtt_wc)
+        ws->cman_gtt_wc->destroy(ws->cman_gtt_wc);
     if (ws->cman_vram)
         ws->cman_vram->destroy(ws->cman_vram);
+    if (ws->cman_vram_gtt_wc)
+        ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc);
     if (ws->kman)
         ws->kman->destroy(ws->kman);
     if (ws->surf_man)
index fc6f53b..ea6f7f0 100644 (file)
@@ -58,7 +58,9 @@ struct radeon_drm_winsys {
 
     struct pb_manager *kman;
     struct pb_manager *cman_vram;
+    struct pb_manager *cman_vram_gtt_wc;
     struct pb_manager *cman_gtt;
+    struct pb_manager *cman_gtt_wc;
     struct radeon_surface_manager *surf_man;
 
     uint32_t num_cpus;      /* Number of CPUs. */
index a63a50b..9aea1e8 100644 (file)
@@ -65,6 +65,10 @@ enum radeon_bo_domain { /* bitfield */
     RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
 };
 
+enum radeon_bo_flag { /* bitfield */
+   RADEON_FLAG_GTT_WC = (1 << 0)
+};
+
 enum radeon_bo_usage { /* bitfield */
     RADEON_USAGE_READ = 2,
     RADEON_USAGE_WRITE = 4,
@@ -287,7 +291,8 @@ struct radeon_winsys {
                                        unsigned size,
                                        unsigned alignment,
                                        boolean use_reusable_pool,
-                                       enum radeon_bo_domain domain);
+                                       enum radeon_bo_domain domain,
+                                       enum radeon_bo_flag flags);
 
     struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
             struct pb_buffer *buf);