Made the EOT code run on the simulator
authorBenjamin Segovia <segovia.benjamin@gmail.com>
Wed, 4 Apr 2012 15:53:49 +0000 (15:53 +0000)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:16:02 +0000 (16:16 -0700)
src/cl_command_queue_gen7.c
src/cl_driver.h
src/cl_gen7_device.h
src/intel/intel_gpgpu.c
src/sim/sim_driver.c

index 23584b2..a1c7388 100644 (file)
@@ -118,7 +118,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
     cl_buffer_unreference(queue->last_batch);
   queue->last_batch = NULL;
   cl_command_queue_bind_surface(queue, ker, curbe, NULL, &private_bo, &scratch_bo, 0);
-  cl_gpgpu_states_setup(gpgpu, &kernel, 1);
+  cl_gpgpu_states_setup(gpgpu, &kernel);
 
   /* CURBE step 2. Give the localID and upload it to video memory */
   TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
index 586a3fd..305cb27 100644 (file)
@@ -131,7 +131,7 @@ typedef void (cl_gpgpu_upload_constants_cb)(cl_gpgpu, const void* data, uint32_t
 extern cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants;
 
 /* Setup all indirect states */
-typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel* kernel, uint32_t ker_n);
+typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel *kernel);
 extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup;
 
 /* Upload the constant samplers as specified inside the OCL kernel */
index 75c4e3f..5fb5827 100644 (file)
@@ -17,8 +17,7 @@
  * Author: Benjamin Segovia <benjamin.segovia@intel.com>
  */
 
-/* Common fields for both SNB devices (either GT1 or GT2)
- */
+/* Common fields for both IVB devices (either GT1 or GT2) */
 .max_parameter_size = 256, 
 .global_mem_cache_line_size = 128, /* XXX */
 .global_mem_cache_size = 8 << 10, /* XXX */
index 7974bbf..c3fb558 100644 (file)
@@ -648,10 +648,10 @@ intel_gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n)
 }
 
 static void
-intel_gpgpu_states_setup(intel_gpgpu_t *state, cl_gpgpu_kernel *kernel, uint32_t ker_n)
+intel_gpgpu_states_setup(intel_gpgpu_t *state, cl_gpgpu_kernel *kernel)
 {
   state->ker = kernel;
-  intel_gpgpu_build_idrt(state, kernel, ker_n);
+  intel_gpgpu_build_idrt(state, kernel, 1);
   dri_bo_unmap(state->surface_heap_b.bo);
   dri_bo_unmap(state->sampler_state_b.bo);
 }
index 61a4613..cec2e25 100644 (file)
 #include "cl_driver.h"
 
 /* Fake buffer manager that just counts allocations */
-typedef struct sim_bufmgr { volatile int buf_n; } sim_bufmgr_t;
+struct _sim_bufmgr { volatile int buf_n; };
+typedef struct _sim_bufmgr *sim_bufmgr;
 
-static sim_bufmgr_t*
+static sim_bufmgr
 sim_bufmgr_new(void)
 {
-  return cl_calloc(1,sizeof(sim_bufmgr_t));
+  return cl_calloc(1,sizeof(struct _sim_bufmgr));
 }
 
 static void
-sim_bufmgr_delete(sim_bufmgr_t *bufmgr)
+sim_bufmgr_delete(sim_bufmgr bufmgr)
 {
   cl_free(bufmgr);
 }
 
-/* Fake low-driver */
-typedef struct sim_driver {
-  sim_bufmgr_t *bufmgr;
+/* Fake low-level driver */
+struct _sim_driver {
+  sim_bufmgr bufmgr;
   int gen_ver;
-} sim_driver_t;
+};
+
+typedef struct _sim_driver *sim_driver;
 
 static void
-sim_driver_delete(sim_driver_t *driver)
+sim_driver_delete(sim_driver driver)
 {
   if (driver == NULL) return;
   sim_bufmgr_delete(driver->bufmgr);
   cl_free(driver);
 }
 
-static sim_driver_t*
+static sim_driver
 sim_driver_new(void)
 {
-  sim_driver_t *driver = NULL;
-  TRY_ALLOC_NO_ERR(driver, cl_calloc(1, sizeof(sim_driver_t)));
+  sim_driver driver = NULL;
+  TRY_ALLOC_NO_ERR(driver, cl_calloc(1, sizeof(struct _sim_driver)));
   TRY_ALLOC_NO_ERR(driver->bufmgr, sim_bufmgr_new());
   driver->gen_ver = 7; // XXX make it flexible
 exit:
@@ -74,13 +77,13 @@ error:
 }
 
 static int
-sim_driver_get_ver(sim_driver_t *driver)
+sim_driver_get_ver(sim_driver driver)
 {
   return driver->gen_ver;
 }
 
-static sim_bufmgr_t*
-sim_driver_get_bufmgr(sim_driver_t *driver)
+static sim_bufmgr
+sim_driver_get_bufmgr(sim_driver driver)
 {
   return driver->bufmgr;
 }
@@ -91,18 +94,18 @@ sim_driver_get_device_id(void)
   return PCI_CHIP_IVYBRIDGE_GT2; // XXX get some env variable instead
 }
 
-
 /* Just a named buffer to mirror real drm functions */
-typedef struct sim_buffer {
-  void *data;           /* data in the buffer */
-  size_t sz;            /* size allocated */
-  volatile int ref_n;   /* number of references */
-  char *name;           /* name of the buffer */
-  sim_bufmgr_t *bufmgr; /* owns the buffer */
-} sim_buffer_t;
+struct _sim_buffer {
+  void *data;         /* data in the buffer */
+  size_t sz;          /* size allocated */
+  volatile int ref_n; /* number of references */
+  char *name;         /* name of the buffer */
+  sim_bufmgr bufmgr;  /* owns the buffer */
+};
+typedef struct _sim_buffer *sim_buffer;
 
 static void
-sim_buffer_delete(sim_buffer_t *buf)
+sim_buffer_delete(sim_buffer buf)
 {
   if (buf == NULL) return;
   cl_free(buf->data);
@@ -110,12 +113,12 @@ sim_buffer_delete(sim_buffer_t *buf)
   cl_free(buf);
 }
 
-static sim_buffer_t*
-sim_buffer_alloc(sim_bufmgr_t *bufmgr, const char *name, unsigned long sz, unsigned long align)
+static sim_buffer
+sim_buffer_alloc(sim_bufmgr bufmgr, const char *name, unsigned long sz, unsigned long align)
 {
-  sim_buffer_t *buf = NULL;
+  sim_buffer buf = NULL;
   assert(bufmgr);
-  TRY_ALLOC_NO_ERR(buf, cl_calloc(1, sizeof(sim_buffer_t)));
+  TRY_ALLOC_NO_ERR(buf, cl_calloc(1, sizeof(struct _sim_buffer)));
   if (sz) buf->data = cl_aligned_malloc(sz, align);
   if (name) {
     const size_t len = strlen(name);
@@ -137,7 +140,7 @@ error:
 }
 
 static void
-sim_buffer_unreference(sim_buffer_t *buf)
+sim_buffer_unreference(sim_buffer buf)
 {
   if (UNLIKELY(buf == NULL)) return;
   if (atomic_dec(&buf->ref_n) > 1) return;
@@ -146,55 +149,62 @@ sim_buffer_unreference(sim_buffer_t *buf)
 }
 
 static void
-sim_buffer_reference(sim_buffer_t *buf)
+sim_buffer_reference(sim_buffer buf)
 {
   if (UNLIKELY(buf == NULL)) return;
   atomic_inc(&buf->ref_n);
 }
 
 static void*
-sim_buffer_get_virtual(sim_buffer_t *buf)
+sim_buffer_get_virtual(sim_buffer buf)
 {
   if (UNLIKELY(buf == NULL)) return NULL;
   return buf->data;
 }
 
 static void*
-sim_buffer_get_size(sim_buffer_t *buf)
+sim_buffer_get_size(sim_buffer buf)
 {
   if (UNLIKELY(buf == NULL)) return 0;
   return buf->data;
 }
 
 static int
-sim_buffer_subdata(sim_buffer_t *buf, unsigned long offset, unsigned long size, const void *data)
+sim_buffer_subdata(sim_buffer buf, unsigned long offset, unsigned long size, const void *data)
 {
   if (data == NULL) return 0;
   if (buf == NULL) return 0;
   memcpy((char*) buf->data + offset, data, size);
   return 0;
 }
-static int sim_buffer_map(sim_buffer_t *buf, uint32_t write_enable) {return 0;}
-static int sim_buffer_unmap(sim_buffer_t *buf) {return 0;}
-static int sim_buffer_pin(sim_buffer_t *buf, uint32_t alignment) {return 0;}
-static int sim_buffer_unpin(sim_buffer_t *buf) {return 0;}
-static int sim_buffer_wait_rendering(sim_buffer_t *buf) {return 0;}
+static int sim_buffer_map(sim_buffer buf, uint32_t write_enable) {return 0;}
+static int sim_buffer_unmap(sim_buffer buf) {return 0;}
+static int sim_buffer_pin(sim_buffer buf, uint32_t alignment) {return 0;}
+static int sim_buffer_unpin(sim_buffer buf) {return 0;}
+static int sim_buffer_wait_rendering(sim_buffer buf) {return 0;}
+
+/* Function to call for each HW thread we simulate */
+typedef void (sim_kernel_cb)(void);
 
 /* Encapsulates operations needed to run one NDrange */
-typedef struct sim_gpgpu
+struct _sim_gpgpu
 {
-  sim_driver_t *driver; // the driver the gpgpu states belongs to
-} sim_gpgpu_t;
+  sim_driver driver;     /* the driver the gpgpu states belongs to */
+  sim_kernel_cb *kernel; /* call it for each HW thread */
+  uint32_t max_threads;  /* HW threads running */
+  uint32_t cst_sz;       /* size of the constant buffer */
+};
+typedef struct _sim_gpgpu *sim_gpgpu;
 
-static void sim_gpgpu_delete(sim_gpgpu_t *gpgpu)
+static void sim_gpgpu_delete(sim_gpgpu gpgpu)
 {
   cl_free(gpgpu);
 }
 
-static sim_gpgpu_t *sim_gpgpu_new(sim_driver_t *driver)
+static sim_gpgpu sim_gpgpu_new(sim_driver driver)
 {
-  sim_gpgpu_t *gpgpu = NULL;
-  TRY_ALLOC_NO_ERR(gpgpu, cl_calloc(1, sizeof(sim_gpgpu_t)));
+  sim_gpgpu gpgpu = NULL;
+  TRY_ALLOC_NO_ERR(gpgpu, cl_calloc(1, sizeof(struct _sim_gpgpu)));
 
 exit:
   return gpgpu;
@@ -207,42 +217,64 @@ error:
 #undef NOT_IMPLEMENTED
 #define NOT_IMPLEMENTED
 
-static void sim_gpgpu_bind_buf(sim_gpgpu_t *gpgpu, int32_t index, sim_buffer_t *buf, uint32_t cchint)
-{ NOT_IMPLEMENTED; }
-static void sim_gpgpu_bind_image2D(sim_gpgpu_t *gpgpu,
-                            int32_t index,
-                            sim_buffer_t *obj_bo,
-                            uint32_t format,
-                            int32_t w,
-                            int32_t h,
-                            int pitch,
-                            cl_gpgpu_tiling tiling)
+static void sim_gpgpu_bind_buf(sim_gpgpu gpgpu, int32_t index, sim_buffer buf, uint32_t cchint)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_state_init(sim_gpgpu_t *gpgpu, uint32_t max_threads, uint32_t size_cs_entry)
+static void sim_gpgpu_bind_image2D(sim_gpgpu gpgpu,
+                                   int32_t index,
+                                   sim_buffer obj_bo,
+                                   uint32_t format,
+                                   int32_t w,
+                                   int32_t h,
+                                   int pitch,
+                                   cl_gpgpu_tiling tiling)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_set_perf_counters(sim_gpgpu_t *gpgpu, sim_buffer_t *perf)
-{ NOT_IMPLEMENTED; }
-static void sim_gpgpu_upload_constants(sim_gpgpu_t *gpgpu, const void* data, uint32_t size)
+static void sim_gpgpu_state_init(sim_gpgpu gpgpu, uint32_t max_threads, uint32_t size_cs_entry)
+{
+  assert(gpgpu);
+  memset(gpgpu, 0, sizeof(*gpgpu));
+  gpgpu->cst_sz = size_cs_entry * 32;
+  gpgpu->max_threads = max_threads;
+}
+
+static void sim_gpgpu_set_perf_counters(sim_gpgpu gpgpu, sim_buffer perf)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_states_setup(sim_gpgpu_t *gpgpu, cl_gpgpu_kernel* kernel, uint32_t ker_n)
+static void sim_gpgpu_upload_constants(sim_gpgpu gpgpu, const void* data, uint32_t size)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_upload_samplers(sim_gpgpu_t *state, const void *data, uint32_t n)
+static void sim_gpgpu_states_setup(sim_gpgpu gpgpu, cl_gpgpu_kernel *kernel)
+{
+  cl_buffer_map(kernel->bo, 0);
+  gpgpu->kernel = *(sim_kernel_cb **) cl_buffer_get_virtual(kernel->bo);
+}
+
+static void sim_gpgpu_upload_samplers(sim_gpgpu gpgpu, const void *data, uint32_t n)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_batch_reset(sim_gpgpu_t *state, size_t sz)
+static void sim_gpgpu_batch_reset(sim_gpgpu gpgpu, size_t sz)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_batch_start(sim_gpgpu_t *state)
+static void sim_gpgpu_batch_start(sim_gpgpu gpgpu)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_batch_end(sim_gpgpu_t *state, int32_t flush_mode)
+static void sim_gpgpu_batch_end(sim_gpgpu gpgpu, int32_t flush_mode)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_flush(sim_gpgpu_t *state)
+static void sim_gpgpu_flush(sim_gpgpu gpgpu)
 { NOT_IMPLEMENTED; }
-static void sim_gpgpu_walker(sim_gpgpu_t *state,
+static void sim_gpgpu_walker(sim_gpgpu gpgpu,
                              uint32_t simd_sz,
                              uint32_t thread_n,
                              const size_t global_wk_off[3],
                              const size_t global_wk_sz[3],
                              const size_t local_wk_sz[3])
-{ NOT_IMPLEMENTED; }
+{
+  uint32_t x, y, z;
+  const uint32_t global_wk_dim[3] = {
+    global_wk_sz[0] / local_wk_sz[0],
+    global_wk_sz[1] / local_wk_sz[1],
+    global_wk_sz[2] / local_wk_sz[2]
+  };
+  assert(simd_sz == 8 || simd_sz == 16);
+  for (z = 0; z < global_wk_dim[2]; ++z)
+  for (y = 0; y < global_wk_dim[1]; ++y)
+  for (x = 0; x < global_wk_dim[0]; ++x)
+    gpgpu->kernel();
+}
 
 LOCAL void
 sim_setup_callbacks(void)