Added support __constant samplers
authorbsegovia <devnull@localhost>
Tue, 18 Oct 2011 00:59:41 +0000 (00:59 +0000)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:01 +0000 (16:15 -0700)
kernels/test_copy_image.cl
src/cl_command_queue_gen6.c
src/intel/intel_gpgpu.c
src/intel/intel_gpgpu.h
src/intel/intel_structs.h

index 3d6fb06..921b138 100644 (file)
@@ -2,13 +2,11 @@ __constant sampler_t s = CLK_NORMALIZED_COORDS_FALSE |
                          CLK_ADDRESS_CLAMP |
                          CLK_FILTER_NEAREST;
 __kernel void
-test_copy_image(__read_only image2d_t src,
-                __global uchar4 *dst)
+test_copy_image(__read_only image2d_t src, __global uchar4 *dst)
 {
   const int x = (int) get_global_id(0);
   const int y = (int) get_global_id(1);
   const int id = x + y * get_image_width(src);
-  //const int id = x + y * 32;
   const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y)));
   dst[id] = from;
 }
index c5547e4..e811453 100644 (file)
@@ -143,11 +143,7 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue,
   /* Create the constant buffer */
   if (cst_sz > 0) {
     assert(ker->cst_buffer);
-    curbe = cl_kernel_create_cst_buffer(ker,
-                                        global_wk_off,
-                                        global_wk_sz,
-                                        local_wk_sz,
-                                        0, 0);
+    curbe = cl_kernel_create_cst_buffer(ker, global_wk_off, global_wk_sz, local_wk_sz, 0, 0);
   }
 
   /* Only if we want to monitor performance for this kernel */
@@ -166,6 +162,12 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue,
                                 &private_bo,
                                 &scratch_bo,
                                 header.local_mem_sz);
+
+  /* Upload the __constant samplers if any */
+  const void *samplers  = ker->dynamic_heap + ker->patch.sampler_state.offset;
+  const uint32_t sampler_n = ker->patch.sampler_state.count;
+  gpgpu_upload_samplers(gpgpu, samplers, sampler_n);
+
   gpgpu_states_setup(gpgpu, kernels, 16);
 
   /* Fill the constant buffer */
index 7a08de8..8bdb52a 100644 (file)
 #define MO_RETAIN_BIT         (1 << 28)
 #define SAMPLER_STATE_SIZE    (16)
 
-/* No dependency on Gen specific structures */
-struct opaque_sampler_state {
-  char opaque[SAMPLER_STATE_SIZE];
-};
-
 /* Stores both binding tables and surface states */
 typedef struct surface_heap {
   uint32_t binding_table[256];
@@ -75,9 +70,6 @@ struct intel_gpgpu
   struct { dri_bo *bo; } sampler_state_b;
   struct { dri_bo *bo; } perf_b;
 
-  /* we will just copy them into the bo */
-  struct opaque_sampler_state samplers[MAX_SAMPLERS];
-
   struct {
     uint32_t num_cs_entries;
     uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
@@ -86,9 +78,6 @@ struct intel_gpgpu
   uint32_t max_threads;      /* max threads requested by the user */
 };
 
-/* Be sure that the size is still valid */
-STATIC_ASSERT(sizeof(struct opaque_sampler_state) == 16);//sizeof(struct i965_sampler_state));
-
 LOCAL intel_gpgpu_t*
 intel_gpgpu_new(intel_driver_t *drv)
 {
@@ -258,51 +247,51 @@ static const uint32_t gpgpu_l3_config_reg2[] =
 
 enum INSTRUCTION_PIPELINE
 {
-    PIPE_COMMON       = 0x0,
-    PIPE_SINGLE_DWORD = 0x1,
-    PIPE_COMMON_CTG   = 0x1,
-    PIPE_MEDIA        = 0x2,
-    PIPE_3D           = 0x3
+  PIPE_COMMON       = 0x0,
+  PIPE_SINGLE_DWORD = 0x1,
+  PIPE_COMMON_CTG   = 0x1,
+  PIPE_MEDIA        = 0x2,
+  PIPE_3D           = 0x3
 };
 
 enum GFX_OPCODE
 {
-    GFXOP_PIPELINED     = 0x0,
-    GFXOP_NONPIPELINED  = 0x1,
-    GFXOP_3DPRIMITIVE   = 0x3
+  GFXOP_PIPELINED     = 0x0,
+  GFXOP_NONPIPELINED  = 0x1,
+  GFXOP_3DPRIMITIVE   = 0x3
 };
 
 enum INSTRUCTION_TYPE
 {
-    INSTRUCTION_MI      = 0x0,
-    INSTRUCTION_TRUSTED = 0x1,
-    INSTRUCTION_2D      = 0x2,
-    INSTRUCTION_GFX     = 0x3
+  INSTRUCTION_MI      = 0x0,
+  INSTRUCTION_TRUSTED = 0x1,
+  INSTRUCTION_2D      = 0x2,
+  INSTRUCTION_GFX     = 0x3
 };
 
 enum GFX3DCONTROL_SUBOPCODE
 {
-    GFX3DSUBOP_3DCONTROL    = 0x00
+  GFX3DSUBOP_3DCONTROL    = 0x00
 };
 
 enum GFX3D_OPCODE
 {
-    GFX3DOP_3DSTATE_PIPELINED       = 0x0,
-    GFX3DOP_3DSTATE_NONPIPELINED    = 0x1,
-    GFX3DOP_3DCONTROL               = 0x2,
-    GFX3DOP_3DPRIMITIVE             = 0x3
+  GFX3DOP_3DSTATE_PIPELINED       = 0x0,
+  GFX3DOP_3DSTATE_NONPIPELINED    = 0x1,
+  GFX3DOP_3DCONTROL               = 0x2,
+  GFX3DOP_3DPRIMITIVE             = 0x3
 };
 
 enum GFX3DSTATE_PIPELINED_SUBOPCODE
 {
-    GFX3DSUBOP_3DSTATE_PIPELINED_POINTERS       = 0x00,
-    GFX3DSUBOP_3DSTATE_BINDING_TABLE_POINTERS   = 0x01,
-    GFX3DSUBOP_3DSTATE_STATE_POINTER_INVALIDATE = 0x02,
-    GFX3DSUBOP_3DSTATE_VERTEX_BUFFERS           = 0x08,
-    GFX3DSUBOP_3DSTATE_VERTEX_ELEMENTS          = 0x09,
-    GFX3DSUBOP_3DSTATE_INDEX_BUFFER             = 0x0A,
-    GFX3DSUBOP_3DSTATE_VF_STATISTICS            = 0x0B,
-    GFX3DSUBOP_3DSTATE_CC_STATE_POINTERS        = 0x0E
+  GFX3DSUBOP_3DSTATE_PIPELINED_POINTERS       = 0x00,
+  GFX3DSUBOP_3DSTATE_BINDING_TABLE_POINTERS   = 0x01,
+  GFX3DSUBOP_3DSTATE_STATE_POINTER_INVALIDATE = 0x02,
+  GFX3DSUBOP_3DSTATE_VERTEX_BUFFERS           = 0x08,
+  GFX3DSUBOP_3DSTATE_VERTEX_ELEMENTS          = 0x09,
+  GFX3DSUBOP_3DSTATE_INDEX_BUFFER             = 0x0A,
+  GFX3DSUBOP_3DSTATE_VF_STATISTICS            = 0x0B,
+  GFX3DSUBOP_3DSTATE_CC_STATE_POINTERS        = 0x0E
 };
 
 static void
@@ -463,11 +452,12 @@ gpgpu_state_init(intel_gpgpu_t *state,
     dri_bo_unreference(state->sampler_state_b.bo);
   bo = dri_bo_alloc(state->drv->bufmgr, 
                     "sample states",
-                    MAX_SAMPLERS * sizeof(struct gen6_sampler_state),
+                    MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
                     32);
   assert(bo);
+  dri_bo_map(bo, 1);
+  memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * MAX_SAMPLERS);
   state->sampler_state_b.bo = bo;
-  memset(state->samplers, 0, sizeof(state->samplers));
 }
 
 static void
@@ -621,15 +611,6 @@ gpgpu_bind_image2D(intel_gpgpu_t *state,
 }
 
 static void
-gpgpu_build_sampler_table(intel_gpgpu_t *state)
-{
-  dri_bo_subdata(state->sampler_state_b.bo,
-                 0,
-                 sizeof(state->samplers),
-                 state->samplers);
-}
-
-static void
 gpgpu_build_idrt(intel_gpgpu_t *state,
                  genx_gpgpu_kernel_t *kernel,
                  uint32_t ker_n)
@@ -693,7 +674,7 @@ gpgpu_build_idrt(intel_gpgpu_t *state,
 }
 
 LOCAL void
-gpgpu_upload_constants(intel_gpgpu_t *state, void* data, uint32_t size)
+gpgpu_upload_constants(intel_gpgpu_t *state, const void* data, uint32_t size)
 {
   unsigned char *constant_buffer = NULL;
 
@@ -705,12 +686,22 @@ gpgpu_upload_constants(intel_gpgpu_t *state, void* data, uint32_t size)
 }
 
 LOCAL void
+gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n)
+{
+  if (n) {
+    /*sizeof(gen6_sampler_state_t) == sizeof(gen7_surface_state_t) */
+    const size_t sz = n * sizeof(gen6_sampler_state_t);
+    memcpy(state->sampler_state_b.bo->virtual, data, sz);
+  }
+}
+
+LOCAL void
 gpgpu_states_setup(intel_gpgpu_t *state, genx_gpgpu_kernel_t *kernel, uint32_t ker_n)
 {
   state->ker = kernel;
-  gpgpu_build_sampler_table(state);
   gpgpu_build_idrt(state, kernel, ker_n);
   dri_bo_unmap(state->surface_heap_b.bo);
+  dri_bo_unmap(state->sampler_state_b.bo);
 }
 
 LOCAL void 
index 2b7b26a..13c9d27 100644 (file)
@@ -91,7 +91,7 @@ extern void gpgpu_state_init(intel_gpgpu_t*, uint32_t max_threads, uint32_t size
 extern void gpgpu_set_perf_counters(intel_gpgpu_t*, struct _drm_intel_bo *perf);
 
 /* Fills current constant buffer with data */
-extern void gpgpu_upload_constants(intel_gpgpu_t*, void* data, uint32_t size);
+extern void gpgpu_upload_constants(intel_gpgpu_t*, const void* data, uint32_t size);
 
 /* Setup all indirect states */
 extern void gpgpu_states_setup(intel_gpgpu_t*, genx_gpgpu_kernel_t* kernel, uint32_t ker_n);
@@ -99,7 +99,10 @@ extern void gpgpu_states_setup(intel_gpgpu_t*, genx_gpgpu_kernel_t* kernel, uint
 /* Make HW threads use barrierID */
 extern void gpgpu_update_barrier(intel_gpgpu_t*, uint32_t barrierID, uint32_t thread_n);
 
-/* Set a sampler (TODO: add other sampler fields) */
+/* Upload the constant samplers as specified inside the OCL kernel */
+extern void gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n);
+
+/* Set a sampler */
 extern void gpgpu_set_sampler(intel_gpgpu_t*, uint32_t index, uint32_t non_normalized);
 
 /* Allocate the batch buffer and return the BO used for the batch buffer */
index 23bfc59..0301692 100644 (file)
@@ -404,6 +404,8 @@ typedef struct gen7_sampler_state
   } ss3;
 } gen7_sampler_state_t;
 
+STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t));
+
 #undef BITFIELD_BIT
 #undef BITFIELD_RANGE