First test now passes with images (format still hard-coded and constant patching...
authorbsegovia <devnull@localhost>
Fri, 14 Oct 2011 04:41:19 +0000 (04:41 +0000)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:14:58 +0000 (16:14 -0700)
kernels/test_copy_image.cl
src/CMakeLists.txt
src/cl_command_queue.c
src/cl_image.c [new file with mode: 0644]
src/cl_image.h [new file with mode: 0644]
src/cl_kernel.c
src/cl_mem.c
src/intel/intel_gpgpu.c
src/intel/intel_gpgpu.h
src/intel/intel_structs.h

index 1ccc581..9693e86 100644 (file)
@@ -5,10 +5,11 @@ __kernel void
 test_copy_image(__read_only image2d_t src,
                 __global uchar4 *dst)
 {
-    const int x = (int) get_global_id(0);
-    const int y = (int) get_global_id(1);
-    const int id = x + y * get_image_width(src);
-    const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y)));
-    dst[id] = from;
+  const int x = (int) get_global_id(0);
+  const int y = (int) get_global_id(1);
+  //const int id = x + y * get_image_width(src);
+  const int id = x + y * 32;
+  const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y)));
+  dst[id] = from;
 }
 
index 8a9b3f2..d1fff6c 100644 (file)
@@ -9,6 +9,7 @@ SET(OPENCL_SRC
     cl_kernel.c
     cl_program.c
     cl_event.c
+    cl_image.c
     cl_mem.c
     cl_platform_id.c
     cl_device_id.c
index 3f2a8e7..1faec2b 100644 (file)
@@ -118,8 +118,8 @@ cl_command_queue_bind_surface(cl_command_queue queue,
 
   /* Bind user defined surface */
   for (i = 0; i < k->arg_info_n; ++i) {
-    if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER)
-      continue;
+//    if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER)
+//      continue;
     assert(k->arg_info[i].offset % SURFACE_SZ == 0);
     index = k->arg_info[i].offset / SURFACE_SZ;
     mem = (cl_mem) k->args[k->arg_info[i].arg_index];
@@ -127,7 +127,18 @@ cl_command_queue_bind_surface(cl_command_queue queue,
     CHECK_MEM(mem);
     bo = mem->bo;
     assert(bo);
-    gpgpu_bind_buf(gpgpu, index, bo, 0, bo->size, cc_llc_mlc);
+    if (mem->is_image)
+#define I965_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB 
+      gpgpu_bind_image2D(gpgpu,
+                         index,
+                         bo,
+                         I965_SURFACEFORMAT_R8G8B8A8_UINT,
+                         mem->w,
+                         mem->h,
+                         4,
+                         cc_llc_mlc);
+    else
+      gpgpu_bind_buf(gpgpu, index, bo, bo->size, cc_llc_mlc);
   }
 
   /* Allocate the constant surface (if any) */
@@ -135,7 +146,6 @@ cl_command_queue_bind_surface(cl_command_queue queue,
     assert(k->const_bo_index != MAX_SURFACES - 1);
     gpgpu_bind_buf(gpgpu, k->const_bo_index,
                    k->const_bo,
-                   0,
                    k->const_bo->size,
                    cc_llc_mlc);
   }
@@ -147,7 +157,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
     index = k->patch.local_surf.offset / SURFACE_SZ;
     assert(index != MAX_SURFACES - 1);
     *local = drm_intel_bo_alloc(bufmgr, "CL local surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *local, 0, sz, cc_llc_mlc);
+    gpgpu_bind_buf(gpgpu, index, *local, sz, cc_llc_mlc);
   }
   else if (local)
     *local = NULL;
@@ -162,7 +172,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
     index = k->patch.private_surf.offset / SURFACE_SZ;
     assert(index != MAX_SURFACES - 1);
     *priv = drm_intel_bo_alloc(bufmgr, "CL private surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *priv, 0, sz, cc_llc_mlc);
+    gpgpu_bind_buf(gpgpu, index, *priv, sz, cc_llc_mlc);
   }
   else if(priv)
     *priv = NULL;
@@ -177,14 +187,14 @@ cl_command_queue_bind_surface(cl_command_queue queue,
     assert(index != MAX_SURFACES - 1);
     index = k->patch.scratch.offset / SURFACE_SZ;
     *scratch = drm_intel_bo_alloc(bufmgr, "CL scratch surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *scratch, 0, sz, cc_llc_mlc);
+    gpgpu_bind_buf(gpgpu, index, *scratch, sz, cc_llc_mlc);
   }
   else if (scratch)
     *scratch = NULL;
 
   /* Now bind a bo used for synchronization */
   sync_bo = drm_intel_bo_alloc(bufmgr, "sync surface", 64, 64);
-  gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, 0, 64, cc_llc_mlc);
+  gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, 64, cc_llc_mlc);
   if (queue->last_batch != NULL)
     drm_intel_bo_unreference(queue->last_batch);
   queue->last_batch = sync_bo;
diff --git a/src/cl_image.c b/src/cl_image.c
new file mode 100644 (file)
index 0000000..7e6e700
--- /dev/null
@@ -0,0 +1,95 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "cl_image.h"
+#include "cl_utils.h"
+
+#include <assert.h>
+
+LOCAL cl_int
+cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
+{
+  assert(bpp);
+
+  const uint32_t type = fmt->image_channel_data_type;
+  const uint32_t order = fmt->image_channel_order;
+  switch (type) {
+#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE;
+    DECL_BPP(CL_SNORM_INT8, 1); break;
+    DECL_BPP(CL_SNORM_INT16, 2); break;
+    DECL_BPP(CL_UNORM_INT8, 1); break;
+    DECL_BPP(CL_UNORM_INT16, 2); break;
+    DECL_BPP(CL_UNORM_SHORT_565, 2);
+      if (order != CL_RGBx && order != CL_RGB)
+        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+    break;
+    DECL_BPP(CL_UNORM_SHORT_555, 2);
+      if (order != CL_RGBx && order != CL_RGB)
+        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+    break;
+    DECL_BPP(CL_UNORM_INT_101010, 4);
+      if (order != CL_RGBx && order != CL_RGB)
+        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+    break;
+    DECL_BPP(CL_SIGNED_INT8, 1); break;
+    DECL_BPP(CL_SIGNED_INT16, 2); break;
+    DECL_BPP(CL_SIGNED_INT32, 4); break;
+    DECL_BPP(CL_UNSIGNED_INT8, 1); break;
+    DECL_BPP(CL_UNSIGNED_INT16, 2); break;
+    DECL_BPP(CL_UNSIGNED_INT32, 4); break;
+    DECL_BPP(CL_HALF_FLOAT, 2); break;
+    DECL_BPP(CL_FLOAT, 4); break;
+#undef DECL_BPP
+    default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+  };
+
+  switch (order) {
+    case CL_R: break;
+    case CL_A: break;
+    case CL_RA: *bpp *= 2; break;
+    case CL_RG: *bpp *= 2; break;
+    case CL_Rx: *bpp *= 2; break;
+    case CL_INTENSITY:
+    case CL_LUMINANCE:
+      if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 &&
+          type != CL_SNORM_INT8 && type != CL_SNORM_INT16 &&
+          type != CL_HALF_FLOAT && type != CL_FLOAT)
+        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+    break;
+    case CL_RGB:
+    case CL_RGBx:
+      if (type != CL_UNORM_SHORT_555 &&
+          type != CL_UNORM_SHORT_565 &&
+          type != CL_UNORM_INT_101010)
+        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+    break;
+    case CL_RGBA: *bpp *= 4; break;
+    case CL_ARGB:
+    case CL_BGRA:
+      if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 &&
+          type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8)
+        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+      *bpp *= 4;
+    break;
+    default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+  };
+
+  return CL_SUCCESS;
+}
+
diff --git a/src/cl_image.h b/src/cl_image.h
new file mode 100644 (file)
index 0000000..923bf1d
--- /dev/null
@@ -0,0 +1,31 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#ifndef __CL_IMAGE_H__
+#define __CL_IMAGE_H__
+
+#include "cl_internals.h"
+#include "CL/cl.h"
+#include <stdint.h>
+
+/* Compute the number of bytes per pixel if the format is supported */
+extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp);
+
+#endif /* __CL_IMAGE_H__ */
+
index c199786..2d19b04 100644 (file)
@@ -451,54 +451,6 @@ error:
 }
 
 #undef ASSOC_ITEM
-typedef struct i965_sampler_state
-{
-  struct
-  {
-    uint32_t shadow_function:3; 
-    uint32_t lod_bias:11; 
-    uint32_t min_filter:3; 
-    uint32_t mag_filter:3; 
-    uint32_t mip_filter:2; 
-    uint32_t base_level:5; 
-    uint32_t min_mag_neq:1;
-    uint32_t lod_preclamp:1; 
-    uint32_t default_color_mode:1; 
-    uint32_t pad0:1;
-    uint32_t disable:1; 
-  } ss0;
-
-  struct
-  {
-    uint32_t r_wrap_mode:3; 
-    uint32_t t_wrap_mode:3; 
-    uint32_t s_wrap_mode:3; 
-    uint32_t cube_control_mode:1;
-    uint32_t pad:2;
-    uint32_t max_lod:10; 
-    uint32_t min_lod:10; 
-  } ss1;
-
-
-  struct
-  {
-    uint32_t pad:5;
-    uint32_t default_color_pointer:27; 
-  } ss2;
-
-  struct
-  {
-    uint32_t non_normalized_coord:1;
-    uint32_t pad:12;
-    uint32_t address_round:6;
-    uint32_t max_aniso:3; 
-    uint32_t chroma_key_mode:1; 
-    uint32_t chroma_key_index:2; 
-    uint32_t chroma_key_enable:1; 
-    uint32_t monochrome_filter_width:3; 
-    uint32_t monochrome_filter_height:3; 
-  } ss3;
-} i965_sampler_state_t;
 
 LOCAL int
 cl_kernel_setup(cl_kernel k, const char *ker)
index 7151df7..f2fad25 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include "cl_mem.h"
+#include "cl_image.h"
 #include "cl_context.h"
 #include "cl_utils.h"
 #include "cl_alloc.h"
@@ -123,77 +124,6 @@ error:
   goto exit;
 }
 
-static cl_int
-cl_mem_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
-{
-  assert(bpp);
-
-  const uint32_t type = fmt->image_channel_data_type;
-  const uint32_t order = fmt->image_channel_order;
-  switch (type) {
-#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE;
-    DECL_BPP(CL_SNORM_INT8, 1); break;
-    DECL_BPP(CL_SNORM_INT16, 2); break;
-    DECL_BPP(CL_UNORM_INT8, 1); break;
-    DECL_BPP(CL_UNORM_INT16, 2); break;
-    DECL_BPP(CL_UNORM_SHORT_565, 2);
-      if (order != CL_RGBx && order != CL_RGB)
-        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-    break;
-    DECL_BPP(CL_UNORM_SHORT_555, 2);
-      if (order != CL_RGBx && order != CL_RGB)
-        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-    break;
-    DECL_BPP(CL_UNORM_INT_101010, 4);
-      if (order != CL_RGBx && order != CL_RGB)
-        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-    break;
-    DECL_BPP(CL_SIGNED_INT8, 1); break;
-    DECL_BPP(CL_SIGNED_INT16, 2); break;
-    DECL_BPP(CL_SIGNED_INT32, 4); break;
-    DECL_BPP(CL_UNSIGNED_INT8, 1); break;
-    DECL_BPP(CL_UNSIGNED_INT16, 2); break;
-    DECL_BPP(CL_UNSIGNED_INT32, 4); break;
-    DECL_BPP(CL_HALF_FLOAT, 2); break;
-    DECL_BPP(CL_FLOAT, 4); break;
-#undef DECL_BPP
-    default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-  };
-
-  switch (order) {
-    case CL_R: break;
-    case CL_A: break;
-    case CL_RA: *bpp *= 2; break;
-    case CL_RG: *bpp *= 2; break;
-    case CL_Rx: *bpp *= 2; break;
-    case CL_INTENSITY:
-    case CL_LUMINANCE:
-      if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 &&
-          type != CL_SNORM_INT8 && type != CL_SNORM_INT16 &&
-          type != CL_HALF_FLOAT && type != CL_FLOAT)
-        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-    break;
-    case CL_RGB:
-    case CL_RGBx:
-      if (type != CL_UNORM_SHORT_555 &&
-          type != CL_UNORM_SHORT_565 &&
-          type != CL_UNORM_INT_101010)
-        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-    break;
-    case CL_RGBA: *bpp *= 4; break;
-    case CL_ARGB:
-    case CL_BGRA:
-      if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 &&
-          type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8)
-        return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-      *bpp *= 4;
-    break;
-    default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
-  };
-
-  return CL_SUCCESS;
-}
-
 LOCAL cl_mem
 cl_mem_new_image2D(cl_context ctx,
                    cl_mem_flags flags,
@@ -216,7 +146,7 @@ cl_mem_new_image2D(cl_context ctx,
   }
 
   /* Get the size of each pixel */
-  if (UNLIKELY((err = cl_mem_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS))
+  if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS))
     goto error;
 
   /* See if the user parameters match */
index f9e7db1..9a8a63f 100644 (file)
@@ -82,10 +82,10 @@ struct intel_gpgpu
 
   struct {
     uint32_t num_cs_entries;
-    uint32_t size_cs_entry;     /* size of one entry in 512bit elements */
+    uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
   } urb;
 
-  uint32_t max_threads;         /* max threads requested by the user */
+  uint32_t max_threads;      /* max threads requested by the user */
 };
 
 /* Be sure that the size is still valid */
@@ -467,68 +467,161 @@ gpgpu_state_init(intel_gpgpu_t *state,
     dri_bo_unreference(state->sampler_state_b.bo);
   bo = dri_bo_alloc(state->drv->bufmgr, 
                     "sample states",
-                    //MAX_SAMPLERS * sizeof(struct i965_sampler_state),
-                    MAX_SAMPLERS * 16,
+                    MAX_SAMPLERS * sizeof(struct gen6_sampler_state),
                     32);
   assert(bo);
   state->sampler_state_b.bo = bo;
   memset(state->samplers, 0, sizeof(state->samplers));
 }
 
+static void
+gpgpu_set_buf_reloc_gen6(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo)
+{
+  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
+  heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+                               index * sizeof(gen6_surface_state_t);
+  dri_bo_emit_reloc(state->surface_heap_b.bo,
+                    I915_GEM_DOMAIN_RENDER,
+                    I915_GEM_DOMAIN_RENDER,
+                    0,
+                    heap->binding_table[index] +
+                    offsetof(gen6_surface_state_t, ss1),
+                    obj_bo);
+}
+
+static void
+gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo)
+{
+  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
+  heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+                               index * sizeof(gen7_surface_state_t);
+  dri_bo_emit_reloc(state->surface_heap_b.bo,
+                    I915_GEM_DOMAIN_RENDER,
+                    I915_GEM_DOMAIN_RENDER,
+                    0,
+                    heap->binding_table[index] +
+                    offsetof(gen7_surface_state_t, ss1),
+                    obj_bo);
+}
+
+static void
+gpgpu_bind_buf_gen6(intel_gpgpu_t *state,
+                    int32_t index,
+                    dri_bo* obj_bo,
+                    uint32_t size,
+                    uint32_t cchint)
+{
+  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
+  gen6_surface_state_t *ss = (gen6_surface_state_t *) heap->surface[index];
+  const uint32_t size_ss = ((size+0xf) >> 4) - 1; /* ceil(size/16) - 1 */
+  memset(ss, 0, sizeof(*ss));
+  ss->ss0.surface_type = I965_SURFACE_BUFFER;
+  ss->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_FLOAT;
+  ss->ss1.base_addr = obj_bo->offset;
+  ss->ss2.width = size_ss & 0x7f;           /* bits 6:0 of size_ss */
+  ss->ss2.height = (size_ss >> 7) & 0x1fff; /* bits 19:7 of size_ss */
+  ss->ss3.depth = size_ss >> 20;            /* bits 26:20 of size_ss */
+  ss->ss3.pitch = 0xf;                      /* sizeof(RGBA32) - 1 */;
+  ss->ss5.cache_control = cchint;
+  gpgpu_set_buf_reloc_gen6(state, index, obj_bo);
+}
+
+static void
+gpgpu_bind_buf_gen7(intel_gpgpu_t *state,
+                    int32_t index,
+                    dri_bo* obj_bo,
+                    uint32_t size,
+                    uint32_t cchint)
+{
+  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
+  gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+  const uint32_t size_ss = size - 1;
+  memset(ss, 0, sizeof(*ss));
+  ss->ss0.surface_type = I965_SURFACE_BUFFER;
+  ss->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+  ss->ss1.base_addr = obj_bo->offset;
+  ss->ss2.width  = size_ss & 0x7f;               /* bits 6:0 of size_ss */
+  ss->ss2.height = (size_ss & 0x1fff80) >> 7;    /* bits 20:7 of size_ss */
+  ss->ss3.depth  = (size_ss & 0xffe00000) >> 20; /* bits 27:21 of size_ss */
+  ss->ss5.surface_object_control_state = GEN7_CACHED_IN_LLC;
+  gpgpu_set_buf_reloc_gen7(state, index, obj_bo);
+}
+
+static void
+gpgpu_bind_image2D_gen6(intel_gpgpu_t *state,
+                        int32_t index,
+                        dri_bo* obj_bo,
+                        uint32_t format,
+                        int32_t w,
+                        int32_t h,
+                        int bpp,
+                        uint32_t cchint)
+{
+  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
+  gen6_surface_state_t *ss = (gen6_surface_state_t *) heap->surface[index];
+  memset(ss, 0, sizeof(*ss));
+  ss->ss0.surface_type = I965_SURFACE_2D;
+  ss->ss0.surface_format = format;
+  ss->ss1.base_addr = obj_bo->offset;
+  ss->ss2.width = w - 1;
+  ss->ss2.height = h - 1;
+  ss->ss3.pitch = w*bpp - 1;
+  ss->ss5.cache_control = cchint;
+  gpgpu_set_buf_reloc_gen6(state, index, obj_bo);
+}
+
+static void
+gpgpu_bind_image2D_gen7(intel_gpgpu_t *state,
+                        int32_t index,
+                        dri_bo* obj_bo,
+                        uint32_t format,
+                        int32_t w,
+                        int32_t h,
+                        int bpp,
+                        uint32_t cchint)
+{
+  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
+  gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+  memset(ss, 0, sizeof(*ss));
+  ss->ss0.surface_type = I965_SURFACE_2D;
+  ss->ss0.surface_format = format;
+  ss->ss1.base_addr = obj_bo->offset;
+  ss->ss2.width = w - 1;
+  ss->ss2.height = h - 1;
+  ss->ss3.pitch = w*bpp - 1;
+  //ss->ss5.cache_control = cchint;
+  gpgpu_set_buf_reloc_gen7(state, index, obj_bo);
+}
+
 LOCAL void
 gpgpu_bind_buf(intel_gpgpu_t *state,
                int32_t index,
                dri_bo* obj_bo,
-               uint32_t offset,
                uint32_t size,
                uint32_t cchint)
 {
-  assert(offset < MAX_SURFACES);
-  surface_heap_t *heap = state->surface_heap_b.bo->virtual;
-
-  if(state->drv->gen_ver == 6) {
-    gen6_surface_state_t *ss = (gen6_surface_state_t *) heap->surface[index];
-    const uint32_t size_ss = ((size+0xf) >> 4) - 1; /* ceil(size/16) - 1 */
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_BUFFER;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_FLOAT;
-    ss->ss1.base_addr = obj_bo->offset + offset;
-    ss->ss2.width = size_ss & 0x7f;           /* bits 6:0 of size_ss */
-    ss->ss2.height = (size_ss >> 7) & 0x1fff; /* bits 19:7 of size_ss */
-    ss->ss3.depth = size_ss >> 20;            /* bits 26:20 of size_ss */
-    ss->ss3.pitch = 0xf;                      /* sizeof(RGBA32) - 1 */;
-    ss->ss5.cache_control = cchint;
-    heap->binding_table[index] = offsetof(surface_heap_t, surface) +
-                                 index * sizeof(gen6_surface_state_t);
-    dri_bo_emit_reloc(state->surface_heap_b.bo,
-                      I915_GEM_DOMAIN_RENDER,
-                      I915_GEM_DOMAIN_RENDER,
-                      offset,
-                      heap->binding_table[index] +
-                      offsetof(gen6_surface_state_t, ss1),
-                      obj_bo);
-  } else if (state->drv->gen_ver == 7) {
-    gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
-    const uint32_t size_ss = size - 1;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_BUFFER;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_RAW;
-    ss->ss1.base_addr = obj_bo->offset + offset;
-    ss->ss2.width  = size_ss & 0x7f;               /* bits 6:0 of size_ss */
-    ss->ss2.height = (size_ss & 0x1fff80) >> 7;    /* bits 20:7 of size_ss */
-    ss->ss3.depth  = (size_ss & 0xffe00000) >> 20; /* bits 27:21 of size_ss */
-    ss->ss5.surface_object_control_state = GEN7_CACHED_IN_LLC;
-    heap->binding_table[index] = offsetof(surface_heap_t, surface) +
-                                 index * sizeof(gen7_surface_state_t);
-    dri_bo_emit_reloc(state->surface_heap_b.bo,
-                      I915_GEM_DOMAIN_RENDER,
-                      I915_GEM_DOMAIN_RENDER,
-                      offset,
-                      heap->binding_table[index] +
-                      offsetof(gen7_surface_state_t, ss1),
-                      obj_bo);
-  }
+  assert(index < MAX_SURFACES);
+  if(state->drv->gen_ver == 6)
+    gpgpu_bind_buf_gen6(state, index, obj_bo, size, cchint);
+  else if (state->drv->gen_ver == 7)
+    gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint);
+}
 
+LOCAL void
+gpgpu_bind_image2D(intel_gpgpu_t *state,
+                   int32_t index,
+                   dri_bo* obj_bo,
+                   uint32_t format,
+                   int32_t w,
+                   int32_t h,
+                   int bpp,
+                   uint32_t cchint)
+{
+  assert(index < MAX_SURFACES);
+  if(state->drv->gen_ver == 6)
+    gpgpu_bind_image2D_gen6(state, index, obj_bo, format, w, h, bpp, cchint);
+  else if (state->drv->gen_ver == 7)
+    gpgpu_bind_image2D_gen7(state, index, obj_bo, format, w, h, bpp, cchint);
 }
 
 static void
index 3ab7274..47f23a8 100644 (file)
@@ -25,6 +25,9 @@
 #include <stdlib.h>
 #include <stdint.h>
 
+#define MAX_SURFACES 128
+#define MAX_SAMPLERS 16
+
 enum gen6_cache_control {
   cc_gtt        = 0x0,    /* don't use L3, use GTT for LLC */
   cc_mlc        = 0x1,    /* IVB: use L3, use GTT for LLC; SNB: UC */
@@ -32,9 +35,6 @@ enum gen6_cache_control {
   cc_llc_mlc    = 0x3,
 };
 
-#define MAX_SURFACES   128
-#define MAX_SAMPLERS   16
-
 /* Use this structure to bind kernels in the gpgpu state */
 typedef struct genx_gpgpu_kernel {
   const char *name;        /* kernel name and bo name */
@@ -71,10 +71,19 @@ extern int32_t intel_gpgpu_version(intel_gpgpu_t*);
 extern void gpgpu_bind_buf(intel_gpgpu_t*,
                            int32_t index,
                            struct _drm_intel_bo* obj_bo,
-                           uint32_t offset,
                            uint32_t size,
                            uint32_t cchint);
 
+/* Set a 2d texture */
+extern void gpgpu_bind_image2D(intel_gpgpu_t *state,
+                               int32_t index,
+                               struct _drm_intel_bo* obj_bo,
+                               uint32_t format,
+                               int32_t w,
+                               int32_t h,
+                               int bpp,
+                               uint32_t cchint);
+
 /* Configure state, size in 512-bit units */
 extern void gpgpu_state_init(intel_gpgpu_t*, uint32_t max_threads, uint32_t size_cs_entry);
 
index 134b270..a455bdf 100644 (file)
@@ -17,8 +17,8 @@
  * Author: Benjamin Segovia <benjamin.segovia@intel.com>
  */
 
-#ifndef __GENX_STRUCTS_H__
-#define __GENX_STRUCTS_H__
+#ifndef __INTEL_STRUCTS_H__
+#define __INTEL_STRUCTS_H__
 
 #include <stdint.h>
 
@@ -70,13 +70,8 @@ typedef struct gen6_interface_descriptor
     uint32_t barrier_return_grf_offset:8;
   } desc5;
 
-  struct {
-    uint32_t reserved_mbz;
-  } desc6;
-
-  struct {
-    uint32_t reserved_mbz;
-  } desc7;
+  uint32_t desc6;
+  uint32_t desc7;
 } gen6_interface_descriptor_t;
 
 typedef struct gen6_surface_state
@@ -206,7 +201,6 @@ typedef struct gen7_surface_state
 
   uint32_t ss6; /* unused */
   uint32_t ss7; /* unused */
-
 } gen7_surface_state_t;
 
 STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
@@ -275,8 +269,7 @@ typedef struct gen6_vfe_state_inline
 
 typedef struct gen6_pipe_control
 {
-  struct
-  {
+  struct {
     uint32_t length                              : BITFIELD_RANGE(0, 7);
     uint32_t reserved                            : BITFIELD_RANGE(8, 15);
     uint32_t instruction_subopcode               : BITFIELD_RANGE(16, 23);
@@ -285,8 +278,7 @@ typedef struct gen6_pipe_control
     uint32_t instruction_type                    : BITFIELD_RANGE(29, 31);
   } dw0;
 
-  struct
-  {
+  struct {
     uint32_t depth_cache_flush_enable            : BITFIELD_BIT(0);
     uint32_t stall_at_pixel_scoreboard           : BITFIELD_BIT(1);
     uint32_t state_cache_invalidation_enable     : BITFIELD_BIT(2);
@@ -312,23 +304,20 @@ typedef struct gen6_pipe_control
     uint32_t reserved                            : BITFIELD_RANGE(23, 31);
   } dw1;
 
-  struct
-  {
+  struct {
     uint32_t reserved                            : BITFIELD_RANGE(0, 1);
     uint32_t destination_address_type            : BITFIELD_BIT(2);
     uint32_t address                             : BITFIELD_RANGE(3, 31);
   } dw2;
 
-  struct
-  {
+  struct {
     uint64_t data;
   } qw0;
 } gen6_pipe_control_t;
-#if 0
+
 typedef struct gen6_sampler_state
 {
-  struct
-  {
+  struct {
     uint32_t shadow_function:3; 
     uint32_t lod_bias:11; 
     uint32_t min_filter:3; 
@@ -342,8 +331,7 @@ typedef struct gen6_sampler_state
     uint32_t disable:1; 
   } ss0;
 
-  struct
-  {
+  struct {
     uint32_t r_wrap_mode:3; 
     uint32_t t_wrap_mode:3; 
     uint32_t s_wrap_mode:3; 
@@ -353,15 +341,12 @@ typedef struct gen6_sampler_state
     uint32_t min_lod:10; 
   } ss1;
 
-
-  struct
-  {
+  struct {
     uint32_t pad:5;
     uint32_t default_color_pointer:27; 
   } ss2;
 
-  struct
-  {
+  struct {
     uint32_t non_normalized_coord:1;
     uint32_t pad:12;
     uint32_t address_round:6;
@@ -373,12 +358,10 @@ typedef struct gen6_sampler_state
     uint32_t monochrome_filter_height:3; 
   } ss3;
 } gen6_sampler_state_t;
-#endif
 
 typedef struct gen7_sampler_state
 {
-  struct
-  {
+  struct {
     uint32_t aniso_algorithm:1;
     uint32_t lod_bias:13;
     uint32_t min_filter:3;
@@ -392,8 +375,7 @@ typedef struct gen7_sampler_state
     uint32_t disable:1;
   } ss0;
 
-  struct
-  {
+  struct {
     uint32_t cube_control_mode:1;
     uint32_t shadow_function:3;
     uint32_t pad:4;
@@ -401,14 +383,12 @@ typedef struct gen7_sampler_state
     uint32_t min_lod:12;
   } ss1;
 
-  struct
-  {
+  struct {
     uint32_t pad:5;
     uint32_t default_color_pointer:27;
   } ss2;
 
-  struct
-  {
+  struct {
     uint32_t r_wrap_mode:3;
     uint32_t t_wrap_mode:3;
     uint32_t s_wrap_mode:3;
@@ -427,4 +407,5 @@ typedef struct gen7_sampler_state
 #undef BITFIELD_BIT
 #undef BITFIELD_RANGE
 
-#endif /* __GENX_STRUCTS_H__ */
+#endif /* __INTEL_STRUCTS_H__ */
+