intel: Pack dynamic draws together
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 8 Feb 2011 20:01:10 +0000 (20:01 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 21 Feb 2011 12:59:34 +0000 (12:59 +0000)
Dynamic arrays have the tendency to be small and so allocating a bo for
each one is overkill and we can exploit many efficiency gains by packing
them together.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/intel/intel_batchbuffer.c
src/mesa/drivers/dri/intel/intel_buffer_objects.c
src/mesa/drivers/dri/intel/intel_buffer_objects.h
src/mesa/drivers/dri/intel/intel_context.h
src/mesa/drivers/dri/intel/intel_pixel_read.c
src/mesa/drivers/dri/intel/intel_tex_image.c

index 2cd2498..7f93f1d 100644 (file)
@@ -359,13 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
       if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
         struct intel_buffer_object *intel_buffer =
            intel_buffer_object(input->glarray->BufferObj);
+        GLuint offset;
 
         /* Named buffer object: Just reference its contents directly. */
         drm_intel_bo_unreference(input->bo);
-        input->bo = intel_bufferobj_buffer(intel, intel_buffer,
-                                           INTEL_READ);
+        input->bo = intel_bufferobj_source(intel, intel_buffer, &offset);
         drm_intel_bo_reference(input->bo);
-        input->offset = (unsigned long)input->glarray->Ptr;
+        input->offset = offset + (unsigned long)input->glarray->Ptr;
         input->stride = input->glarray->StrideB;
         input->count = input->glarray->_MaxElement;
 
@@ -633,16 +633,16 @@ static void brw_prepare_indices(struct brw_context *brw)
 
            ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
        } else {
-         bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
-                                     INTEL_READ);
-         drm_intel_bo_reference(bo);
-
          /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
           * the index buffer state when we're just moving the start index
           * of our drawing.
           */
          brw->ib.start_vertex_offset = offset / ib_type_size;
-         offset = 0;
+
+         bo = intel_bufferobj_source(intel, intel_buffer_object(bufferobj),
+                                     &offset);
+         drm_intel_bo_reference(bo);
+
          ib_size = bo->size;
        }
    }
index 67ce8a4..28bf42e 100644 (file)
@@ -175,6 +175,12 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    if (intel->vtbl.finish_batch)
       intel->vtbl.finish_batch(intel);
 
+   if (intel->upload.bo) {
+      drm_intel_bo_unreference(intel->upload.bo);
+      intel->upload.bo = NULL;
+      intel->upload.offset = 0;
+   }
+
    /* Check that we didn't just wrap our batchbuffer at a bad time. */
    assert(!intel->no_batch_wrap);
 
index 62e8d82..f54fc1a 100644 (file)
@@ -528,7 +528,8 @@ intel_bufferobj_unmap(struct gl_context * ctx,
 
 drm_intel_bo *
 intel_bufferobj_buffer(struct intel_context *intel,
-                       struct intel_buffer_object *intel_obj, GLuint flag)
+                       struct intel_buffer_object *intel_obj,
+                      GLuint flag)
 {
    if (intel_obj->region) {
       if (flag == INTEL_WRITE_PART)
@@ -539,19 +540,65 @@ intel_bufferobj_buffer(struct intel_context *intel,
       }
    }
 
+   if (intel_obj->source) {
+      drm_intel_bo_unreference(intel_obj->buffer);
+      intel_obj->buffer = NULL;
+      intel_obj->source = 0;
+   }
+
    if (intel_obj->buffer == NULL) {
-      /* XXX suballocate for DYNAMIC READ */
       intel_bufferobj_alloc_buffer(intel, intel_obj);
       drm_intel_bo_subdata(intel_obj->buffer,
                           0, intel_obj->Base.Size,
                           intel_obj->sys_buffer);
 
-      if (flag != INTEL_READ) {
-        free(intel_obj->sys_buffer);
-        intel_obj->sys_buffer = NULL;
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
+      intel_obj->offset = 0;
+   }
+
+   return intel_obj->buffer;
+}
+
+#define INTEL_UPLOAD_SIZE (64*1024)
+
+static void wrap_buffers(struct intel_context *intel, GLuint size)
+{
+   if (size < INTEL_UPLOAD_SIZE)
+      size = INTEL_UPLOAD_SIZE;
+
+   if (intel->upload.bo != NULL)
+      drm_intel_bo_unreference(intel->upload.bo);
+
+   intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0);
+   intel->upload.offset = 0;
+}
+
+drm_intel_bo *
+intel_bufferobj_source(struct intel_context *intel,
+                       struct intel_buffer_object *intel_obj,
+                      GLuint *offset)
+{
+   if (intel_obj->buffer == NULL) {
+      GLuint size = ALIGN(intel_obj->Base.Size, 64);
+
+      if (intel->upload.bo == NULL ||
+         intel->upload.offset + size > intel->upload.bo->size) {
+        wrap_buffers(intel, size);
       }
+
+      drm_intel_bo_reference(intel->upload.bo);
+      intel_obj->buffer = intel->upload.bo;
+      intel_obj->offset = intel->upload.offset;
+      intel_obj->source = 1;
+      intel->upload.offset += size;
+
+      drm_intel_bo_subdata(intel_obj->buffer,
+                          intel_obj->offset, intel_obj->Base.Size,
+                          intel_obj->sys_buffer);
    }
 
+   *offset = intel_obj->offset;
    return intel_obj->buffer;
 }
 
@@ -566,6 +613,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
    struct intel_buffer_object *intel_src = intel_buffer_object(src);
    struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
    drm_intel_bo *src_bo, *dst_bo;
+   GLuint src_offset;
 
    if (size == 0)
       return;
@@ -600,11 +648,11 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
    /* Otherwise, we have real BOs, so blit them. */
 
    dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
-   src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ);
+   src_bo = intel_bufferobj_source(intel, intel_src, &src_offset);
 
    intel_emit_linear_blit(intel,
                          dst_bo, write_offset,
-                         src_bo, read_offset, size);
+                         src_bo, read_offset + src_offset, size);
 
    /* Since we've emitted some blits to buffers that will (likely) be used
     * in rendering operations in other cache domains in this batch, emit a
index b15c192..c845b70 100644 (file)
@@ -42,6 +42,8 @@ struct intel_buffer_object
 {
    struct gl_buffer_object Base;
    drm_intel_bo *buffer;     /* the low-level buffer manager's buffer handle */
+   GLuint offset;            /* any offset into that buffer */
+
    /** System memory buffer data, if not using a BO to store the data. */
    void *sys_buffer;
 
@@ -55,6 +57,7 @@ struct intel_buffer_object
    GLsizei range_map_size;
 
    GLboolean mapped_gtt;
+   GLboolean source;
 };
 
 
@@ -63,6 +66,9 @@ struct intel_buffer_object
 drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel,
                                     struct intel_buffer_object *obj,
                                     GLuint flag);
+drm_intel_bo *intel_bufferobj_source(struct intel_context *intel,
+                                    struct intel_buffer_object *obj,
+                                    GLuint *offset);
 
 /* Hook the bufferobject implementation into mesa: 
  */
index 134e07e..a1ed462 100644 (file)
@@ -187,6 +187,11 @@ struct intel_context
       unsigned int count;      /**< Number of vertices in current primitive */
    } prim;
 
+   struct {
+      drm_intel_bo *bo;
+      GLuint offset;
+   } upload;
+
    GLuint stats_wm;
 
    /* Offsets of fields within the current vertex:
index 54da292..716b9ce 100644 (file)
@@ -78,6 +78,7 @@ do_blit_readpixels(struct gl_context * ctx,
    GLuint dst_offset;
    GLuint rowLength;
    drm_intel_bo *dst_buffer;
+   GLuint offset;
    GLboolean all;
    GLint dst_x, dst_y;
 
@@ -138,8 +139,8 @@ do_blit_readpixels(struct gl_context * ctx,
    dst_y = 0;
 
    dst_buffer = intel_bufferobj_buffer(intel, dst,
-                                              all ? INTEL_WRITE_FULL :
-                                              INTEL_WRITE_PART);
+                                      all ? INTEL_WRITE_FULL :
+                                      INTEL_WRITE_PART);
 
    if (ctx->ReadBuffer->Name == 0)
       y = ctx->ReadBuffer->Height - (y + height);
@@ -147,7 +148,7 @@ do_blit_readpixels(struct gl_context * ctx,
    if (!intelEmitCopyBlit(intel,
                          src->cpp,
                          src->pitch, src->buffer, 0, src->tiling,
-                         rowLength, dst_buffer, dst_offset, GL_FALSE,
+                         rowLength, dst_buffer, dst_offset + offset, GL_FALSE,
                          x, y,
                          dst_x, dst_y,
                          width, height,
index cd8c4c2..ddcb748 100644 (file)
@@ -235,11 +235,13 @@ try_pbo_upload(struct intel_context *intel,
       intel_flush(&intel->ctx);
 
    {
-      drm_intel_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
+      GLuint offset;
+      drm_intel_bo *src_buffer = intel_bufferobj_source(intel, pbo, &offset);
 
       if (!intelEmitCopyBlit(intel,
                             intelImage->mt->cpp,
-                            src_stride, src_buffer, src_offset, GL_FALSE,
+                            src_stride, src_buffer,
+                            src_offset + offset, GL_FALSE,
                             dst_stride, dst_buffer, 0,
                             intelImage->mt->region->tiling,
                             0, 0, dst_x, dst_y, width, height,