Added all the call backs to support the simulator. The simulator does nothing but...
authorBenjamin Segovia <segovia.benjamin@gmail.com>
Fri, 30 Mar 2012 17:45:13 +0000 (17:45 +0000)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:56 +0000 (16:15 -0700)
13 files changed:
src/cl_command_queue.c
src/cl_context.c
src/cl_device_data.h [moved from src/intel/cl_device_data.h with 94% similarity]
src/cl_device_id.c
src/cl_driver.c
src/cl_driver.h
src/cl_genx_driver.h [deleted file]
src/cl_program.c
src/intel/intel_driver.c
src/intel/intel_driver.h
src/intel/intel_gpgpu.c
src/intel/intel_gpgpu.h
src/sim/sim_driver.c

index 215b676..a5b867e 100644 (file)
@@ -66,8 +66,7 @@ LOCAL void
 cl_command_queue_delete(cl_command_queue queue)
 {
   assert(queue);
-  if (atomic_dec(&queue->ref_n) != 1)
-    return;
+  if (atomic_dec(&queue->ref_n) != 1) return;
 
   /* Remove it from the list */
   assert(queue->ctx);
@@ -83,6 +82,7 @@ cl_command_queue_delete(cl_command_queue queue)
     cl_mem_delete(queue->fulsim_out);
     queue->fulsim_out = NULL;
   }
+  cl_buffer_unreference(queue->last_batch);
   cl_mem_delete(queue->perf);
   cl_context_delete(queue->ctx);
   cl_gpgpu_delete(queue->gpgpu);
@@ -110,96 +110,10 @@ cl_command_queue_bind_surface(cl_command_queue queue,
   cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
   cl_buffer sync_bo = NULL;
   cl_int err = CL_SUCCESS;
-#if 0
-  cl_context ctx = queue->ctx;
-  intel_gpgpu_t *gpgpu = queue->gpgpu;
-  drm_intel_bufmgr *bufmgr = cl_context_get_intel_bufmgr(ctx);
-  cl_mem mem = NULL;
-  cl_buffer *bo = NULL, *sync_bo = NULL;
-  const size_t max_thread = ctx->device->max_compute_unit;
-  cl_int err = CL_SUCCESS;
-  uint32_t i, index;
 
-  /* Bind user defined surface */
-  for (i = 0; i < k->arg_info_n; ++i) {
-    assert(k->arg_info[i].offset % SURFACE_SZ == 0);
-    index = k->arg_info[i].offset / SURFACE_SZ;
-    mem = (cl_mem) k->args[k->arg_info[i].arg_index];
-    assert(index != MAX_SURFACES - 1);
-    CHECK_MEM(mem);
-    bo = mem->bo;
-    assert(bo);
-    if (mem->is_image) {
-      const int32_t w = mem->w, h = mem->h, pitch = mem->pitch;
-      const uint32_t fmt = mem->intel_fmt;
-      gpgpu_tiling_t tiling = GPGPU_NO_TILE;
-      if (mem->tiling == CL_TILE_X)
-        tiling = GPGPU_TILE_X;
-      else if (mem->tiling == CL_TILE_Y)
-        tiling = GPGPU_TILE_Y;
-      gpgpu_bind_image2D(gpgpu, index, bo, fmt, w, h, pitch, tiling);
-
-      /* Copy the image parameters (width, height) in the constant buffer if the
-       * user requests them
-       */
-      cl_kernel_copy_image_parameters(k, mem, index, curbe);
-    } else
-      gpgpu_bind_buf(gpgpu, index, bo, cc_llc_l3);
-  }
-
-  /* Allocate the constant surface (if any) */
-  if (k->const_bo) {
-    assert(k->const_bo_index != MAX_SURFACES - 1);
-    gpgpu_bind_buf(gpgpu, k->const_bo_index,
-                   k->const_bo,
-                   cc_llc_l3);
-  }
-
-  /* Allocate local surface needed for SLM and bind it */
-  if (local && local_sz != 0) {
-    const size_t sz = 16 * local_sz; /* XXX 16 == maximum barrier number */
-    assert(k->patch.local_surf.offset % SURFACE_SZ == 0);
-    index = k->patch.local_surf.offset / SURFACE_SZ;
-    assert(index != MAX_SURFACES - 1);
-    *local = cl_buffer_alloc(bufmgr, "CL local surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *local, cc_llc_l3);
-  }
-  else if (local)
-    *local = NULL;
-
-  /* Allocate private surface and bind it */
-  if (priv && k->patch.private_surf.size != 0) {
-    const size_t sz = max_thread *
-                      k->patch.private_surf.size *
-                      k->patch.exec_env.largest_compiled_simd_sz;
-    // assert(k->patch.exec_env.largest_compiled_simd_sz == 16);
-    assert(k->patch.private_surf.offset % SURFACE_SZ == 0);
-    index = k->patch.private_surf.offset / SURFACE_SZ;
-    assert(index != MAX_SURFACES - 1);
-    *priv = cl_buffer_alloc(bufmgr, "CL private surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *priv, cc_llc_l3);
-  }
-  else if(priv)
-    *priv = NULL;
-
-  /* Allocate scratch surface and bind it */
-  if (scratch && k->patch.scratch.size != 0) {
-    const size_t sz = max_thread * /* XXX is it given per lane ??? */
-                      k->patch.scratch.size *
-                      k->patch.exec_env.largest_compiled_simd_sz;
-    // assert(k->patch.exec_env.largest_compiled_simd_sz == 16);
-    assert(k->patch.scratch.offset % SURFACE_SZ == 0);
-    assert(index != MAX_SURFACES - 1);
-    index = k->patch.scratch.offset / SURFACE_SZ;
-    *scratch = cl_buffer_alloc(bufmgr, "CL scratch surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *scratch, cc_llc_l3);
-  }
-  else if (scratch)
-    *scratch = NULL;
-#endif
   /* Now bind a bo used for synchronization */
   sync_bo = cl_buffer_alloc(bufmgr, "sync surface", 64, 64);
-  cl_gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
+  cl_gpgpu_bind_buf(gpgpu, GEN_MAX_SURFACES-1, sync_bo, cc_llc_l3);
   if (queue->last_batch != NULL)
     cl_buffer_unreference(queue->last_batch);
   queue->last_batch = sync_bo;
index b8e9f01..385c733 100644 (file)
@@ -19,7 +19,6 @@
 
 #include "cl_platform_id.h"
 #include "cl_device_id.h"
-#include "cl_genx_driver.h"
 #include "cl_context.h"
 #include "cl_command_queue.h"
 #include "cl_mem.h"
similarity index 94%
rename from src/intel/cl_device_data.h
rename to src/cl_device_data.h
index 1d38037..e794739 100644 (file)
                            (devid) == PCI_CHIP_HASWELL_L)
 #define IS_GEN75(devid)  IS_HASWELL(devid)
 
-/* Open a connection to the X server and ask the kernel who we are. Returns -1
- * if no Intel GPU was found or an error was encountered in the probing
- * process
- */
-extern int cl_intel_get_device_id(void);
-
 #endif /* __CL_DEVICE_DATA_H__ */
 
index e4457be..b2e1704 100644 (file)
 #include "cl_internals.h"
 #include "cl_utils.h"
 #include "cl_defs.h"
-#ifdef _PLASMA
-#include "plasma/cl_device_data.h"
-#else
-#include "intel/cl_device_data.h"
-#endif
+#include "cl_driver.h"
+#include "cl_device_data.h"
 #include "CL/cl.h"
 
 #include <assert.h>
@@ -40,7 +37,6 @@ static struct _cl_device_id intel_ivb_gt2_device = {
   .max_clock_frequency = 1000,
   .wg_sz = 1024,
   .compile_wg_sz = {0},        
-
 #include "cl_gen7_device.h"
 };
 
@@ -51,7 +47,6 @@ static struct _cl_device_id intel_ivb_gt1_device = {
   .max_clock_frequency = 1000,
   .wg_sz = 512,
   .compile_wg_sz = {0},        
-
 #include "cl_gen7_device.h"
 };
 
@@ -63,7 +58,6 @@ static struct _cl_device_id intel_hsw_device = {
   .max_clock_frequency = 1000,
   .wg_sz = 512,
   .compile_wg_sz = {0},        
-
 #include "cl_gen75_device.h"
 };
 
@@ -71,7 +65,7 @@ LOCAL cl_device_id
 cl_get_gt_device(void)
 {
   cl_device_id ret = NULL;
-  const int device_id = cl_intel_get_device_id();
+  const int device_id = cl_driver_get_device_id();
 
   /* XXX we pick IVB for HSW now */
   if (device_id == PCI_CHIP_HASWELL_M   ||
index 65828e2..9c8bfa2 100644 (file)
 LOCAL cl_driver_new_cb *cl_driver_new = NULL;
 LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
 LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
+LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
 
 /* Buffer */
-LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
 LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
 LOCAL cl_buffer_reference_cb *cl_buffer_reference = NULL;
 LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
 LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
 LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
 LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
+LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
 LOCAL cl_buffer_pin_cb *cl_buffer_pin = NULL;
 LOCAL cl_buffer_unpin_cb *cl_buffer_unpin = NULL;
 LOCAL cl_buffer_subdata_cb *cl_buffer_subdata = NULL;
-LOCAL cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc = NULL;
 LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
 
 /* GPGPU */
@@ -49,7 +50,6 @@ LOCAL cl_gpgpu_state_init_cb *cl_gpgpu_state_init = NULL;
 LOCAL cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters = NULL;
 LOCAL cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants = NULL;
 LOCAL cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup = NULL;
-LOCAL cl_gpgpu_update_barrier_cb *cl_gpgpu_update_barrier = NULL;
 LOCAL cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers = NULL;
 LOCAL cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset = NULL;
 LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL;
index b8958c9..586a3fd 100644 (file)
  * Author: Benjamin Segovia <benjamin.segovia@intel.com>
  */
 
-#ifndef __CL_BUFFER_H__
-#define __CL_BUFFER_H__
+#ifndef __CL_DRIVER_H__
+#define __CL_DRIVER_H__
 
 #include <stdint.h>
 #include <stdlib.h>
 
 /* XXX needed for previous driver */
-#define MAX_SURFACES 128
-#define MAX_SAMPLERS 16
+#define GEN_MAX_SURFACES 128
+#define GEN_MAX_SAMPLERS 16
 
 /**************************************************************************
  * cl_driver:
@@ -134,10 +134,6 @@ extern cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants;
 typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel* kernel, uint32_t ker_n);
 extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup;
 
-/* Make HW threads use barrierID */
-typedef void (cl_gpgpu_update_barrier_cb)(cl_gpgpu, uint32_t barrierID, uint32_t thread_n);
-extern cl_gpgpu_update_barrier_cb *cl_gpgpu_update_barrier;
-
 /* Upload the constant samplers as specified inside the OCL kernel */
 typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n);
 extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers;
@@ -198,6 +194,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap;
 typedef void* (cl_buffer_get_virtual_cb)(cl_buffer);
 extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual;
 
+/* Get the size of the buffer */
+typedef void* (cl_buffer_get_size_cb)(cl_buffer);
+extern cl_buffer_get_size_cb *cl_buffer_get_size;
+
 /* Pin a buffer */
 typedef int (cl_buffer_pin_cb)(cl_buffer, uint32_t alignment);
 extern cl_buffer_pin_cb *cl_buffer_pin;
@@ -210,13 +210,13 @@ extern cl_buffer_unpin_cb *cl_buffer_unpin;
 typedef int (cl_buffer_subdata_cb)(cl_buffer, unsigned long, unsigned long, const void*);
 extern cl_buffer_subdata_cb *cl_buffer_subdata;
 
-/* Emit relocation */
-typedef int (cl_buffer_emit_reloc_cb) (cl_buffer, uint32_t, cl_buffer, uint32_t, uint32_t, uint32_t);
-extern cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc;
-
 /* Wait for all pending rendering for this buffer to complete */
 typedef int (cl_buffer_wait_rendering_cb) (cl_buffer);
 extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
 
-#endif /* __CL_BUFFER_H__ */
+/* Get the device id */
+typedef int (cl_driver_get_device_id_cb)(void);
+extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
+
+#endif /* __CL_DRIVER_H__ */
 
diff --git a/src/cl_genx_driver.h b/src/cl_genx_driver.h
deleted file mode 100644 (file)
index fcfc0f8..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/* 
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#ifndef __CL_INTEL_DRIVER_H__
-#define __CL_INTEL_DRIVER_H__
-
-/* They are mostly wrapper around C++ delete / new to avoid c++ in c files */
-struct intel_driver;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-    
-/* Allocate and initialize the gen driver */
-struct intel_driver* cl_intel_driver_new(void);
-
-/* Destroy and deallocate the gen driver */
-void cl_intel_driver_delete(struct intel_driver*);
-
-#ifdef __cplusplus
-}
-#endif
-    
-#endif /* __CL_INTEL_DRIVER_H__ */
-
index c26b79a..5d15706 100644 (file)
@@ -82,6 +82,9 @@ cl_program_new(cl_context ctx)
   p->magic = CL_MAGIC_PROGRAM_HEADER;
   p->ctx = ctx;
 
+  /* The queue also belongs to its context */
+  cl_context_add_ref(ctx);
+
 exit:
   return p;
 error:
index def75f1..78d6cb7 100644 (file)
@@ -34,7 +34,6 @@
 #include "cl_utils.h"
 #include "cl_alloc.h"
 #include "cl_driver.h"
-#include "cl_genx_driver.h"
 
 #define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
   sigset_t bl_mask;                                           \
   RESTORE_BLOCKED_SIGSET(DRIVER);                             \
 } while (0)
 
-LOCAL intel_driver_t*
+static void
+intel_driver_delete(intel_driver_t *driver)
+{
+  if (driver == NULL)
+    return;
+  cl_free(driver);
+}
+
+static intel_driver_t*
 intel_driver_new(void)
 {
   intel_driver_t *driver = NULL;
@@ -77,14 +84,6 @@ error:
   goto exit;
 }
 
-LOCAL void
-intel_driver_delete(intel_driver_t *driver)
-{
-  if (driver == NULL)
-    return;
-  cl_free(driver);
-}
-
 /* just used for maximum relocation number in drm_intel */
 #define BATCH_SIZE 0x1000
 
@@ -133,7 +132,7 @@ intel_driver_init(intel_driver_t *driver, int dev_fd)
 #endif /* EMULATE_GEN */
 }
 
-LOCAL void
+static void
 intel_driver_open(intel_driver_t *intel)
 {
   int cardi;
@@ -167,7 +166,7 @@ intel_driver_open(intel_driver_t *intel)
   }
 }
 
-LOCAL void
+static void
 intel_driver_close(intel_driver_t *intel)
 {
   if(intel->dri_ctx) dri_state_release(intel->dri_ctx);
@@ -288,8 +287,8 @@ intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo)
   return name;
 }
 
-LOCAL int
-cl_intel_get_device_id(void)
+static int
+intel_get_device_id(void)
 {
   intel_driver_t *driver = NULL;
   int intel_device_id;
@@ -304,7 +303,7 @@ cl_intel_get_device_id(void)
   return intel_device_id;
 }
 
-LOCAL void
+static void
 cl_intel_driver_delete(intel_driver_t *driver)
 {
   if (driver == NULL)
@@ -314,7 +313,7 @@ cl_intel_driver_delete(intel_driver_t *driver)
   intel_driver_delete(driver);
 }
 
-LOCAL intel_driver_t*
+static intel_driver_t*
 cl_intel_driver_new(void)
 {
   intel_driver_t *driver = NULL;
@@ -329,20 +328,20 @@ error:
   goto exit;
 }
 
-LOCAL drm_intel_bufmgr*
+static drm_intel_bufmgr*
 intel_driver_get_bufmgr(intel_driver_t *drv)
 {
   return drv->bufmgr;
 }
 
-LOCAL uint32_t
+static uint32_t
 intel_driver_get_ver(struct intel_driver *drv)
 {
   return drv->gen_ver;
 }
 
-LOCAL uint32_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
-LOCAL void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
+static uint32_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
+static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
 
 LOCAL void
 intel_setup_callbacks(void)
@@ -351,16 +350,17 @@ intel_setup_callbacks(void)
   cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
   cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
   cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
+  cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
   cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
   cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
   cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
   cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
   cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
-  cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_unmap;
+  cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
+  cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
   cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
   cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
   cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
-  cl_buffer_emit_reloc = (cl_buffer_emit_reloc_cb *) drm_intel_bo_emit_reloc;
   cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
   intel_set_gpgpu_callbacks();
 }
index 528de1c..b4f17b6 100644 (file)
@@ -65,10 +65,6 @@ typedef struct intel_driver
   struct dri_state *dri_ctx;
 } intel_driver_t;
 
-/* create / destroy device */
-extern intel_driver_t* intel_driver_new(void);
-extern void intel_driver_delete(intel_driver_t*);
-
 /* device control */
 extern void intel_driver_lock_hardware(intel_driver_t*);
 extern void intel_driver_unlock_hardware(intel_driver_t*);
index 911be92..7974bbf 100644 (file)
@@ -457,11 +457,11 @@ intel_gpgpu_state_init(intel_gpgpu_t *state,
     dri_bo_unreference(state->sampler_state_b.bo);
   bo = dri_bo_alloc(state->drv->bufmgr, 
                     "sample states",
-                    MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
+                    GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
                     32);
   assert(bo);
   dri_bo_map(bo, 1);
-  memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * MAX_SAMPLERS);
+  memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * GEN_MAX_SAMPLERS);
   state->sampler_state_b.bo = bo;
 }
 
@@ -538,7 +538,7 @@ intel_gpgpu_bind_buf(intel_gpgpu_t *state,
                      uint32_t cchint)
 {
   const uint32_t size = obj_bo->size;
-  assert(index < MAX_SURFACES);
+  assert(index < GEN_MAX_SURFACES);
   if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
     intel_gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint);
   else
@@ -555,7 +555,7 @@ intel_gpgpu_bind_image2D(intel_gpgpu_t *state,
                          int32_t pitch,
                          cl_gpgpu_tiling tiling)
 {
-  assert(index < MAX_SURFACES);
+  assert(index < GEN_MAX_SURFACES);
   if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
     intel_gpgpu_bind_image2D_gen7(state, index, (drm_intel_bo*) obj_bo, format, w, h, pitch, tiling);
   else
@@ -656,17 +656,6 @@ intel_gpgpu_states_setup(intel_gpgpu_t *state, cl_gpgpu_kernel *kernel, uint32_t
   dri_bo_unmap(state->sampler_state_b.bo);
 }
 
-static void 
-intel_gpgpu_update_barrier(intel_gpgpu_t *state, uint32_t barrierID, uint32_t thread_n)
-{
-  BEGIN_BATCH(state->batch, 4);
-  OUT_BATCH(state->batch, CMD_MEDIA_STATE_FLUSH | 0);
-  OUT_BATCH(state->batch, 1 << barrierID);
-  OUT_BATCH(state->batch, CMD_MEDIA_GATEWAY_STATE | 0);
-  OUT_BATCH(state->batch, (barrierID << 16) | thread_n);
-  ADVANCE_BATCH(state->batch);
-}
-
 static void
 intel_gpgpu_set_perf_counters(intel_gpgpu_t *state, cl_buffer *perf)
 {
@@ -724,7 +713,6 @@ intel_set_gpgpu_callbacks(void)
   cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
   cl_gpgpu_upload_constants = (cl_gpgpu_upload_constants_cb *) intel_gpgpu_upload_constants;
   cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
-  cl_gpgpu_update_barrier = (cl_gpgpu_update_barrier_cb *) intel_gpgpu_update_barrier;
   cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
   cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
   cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start;
index 006a55b..154fc27 100644 (file)
@@ -17,8 +17,8 @@
  * Author: Benjamin Segovia <benjamin.segovia@intel.com>
  */
 
-#ifndef __GENX_GPGPU_H__
-#define __GENX_GPGPU_H__
+#ifndef __INTEL_GPGPU_H__
+#define __INTEL_GPGPU_H__
 
 #include "cl_utils.h"
 #include "cl_driver.h"
@@ -29,5 +29,5 @@
 /* Set the gpgpu related call backs */
 extern void intel_set_gpgpu_callbacks(void);
 
-#endif /* __GENX_GPGPU_H__ */
+#endif /* __INTEL_GPGPU_H__ */
 
index 5e5cfad..61a4613 100644 (file)
 
 #include "cl_utils.h"
 #include "cl_alloc.h"
+#include "cl_device_data.h"
 #include "sim/sim_driver.h"
 #include "CL/cl.h"
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
+#include <stdio.h>
 
 #include "cl_driver.h"
 
-/* Just to count allocations */
+/* Fake buffer manager that just counts allocations */
 typedef struct sim_bufmgr { volatile int buf_n; } sim_bufmgr_t;
 
+static sim_bufmgr_t*
+sim_bufmgr_new(void)
+{
+  return cl_calloc(1,sizeof(sim_bufmgr_t));
+}
+
+static void
+sim_bufmgr_delete(sim_bufmgr_t *bufmgr)
+{
+  cl_free(bufmgr);
+}
+
+/* Fake low-driver */
+typedef struct sim_driver {
+  sim_bufmgr_t *bufmgr;
+  int gen_ver;
+} sim_driver_t;
+
+static void
+sim_driver_delete(sim_driver_t *driver)
+{
+  if (driver == NULL) return;
+  sim_bufmgr_delete(driver->bufmgr);
+  cl_free(driver);
+}
+
+static sim_driver_t*
+sim_driver_new(void)
+{
+  sim_driver_t *driver = NULL;
+  TRY_ALLOC_NO_ERR(driver, cl_calloc(1, sizeof(sim_driver_t)));
+  TRY_ALLOC_NO_ERR(driver->bufmgr, sim_bufmgr_new());
+  driver->gen_ver = 7; // XXX make it flexible
+exit:
+  return driver;
+error:
+  sim_driver_delete(driver);
+  driver = NULL;
+  goto exit;
+}
+
+static int
+sim_driver_get_ver(sim_driver_t *driver)
+{
+  return driver->gen_ver;
+}
+
+static sim_bufmgr_t*
+sim_driver_get_bufmgr(sim_driver_t *driver)
+{
+  return driver->bufmgr;
+}
+
+static int
+sim_driver_get_device_id(void)
+{
+  return PCI_CHIP_IVYBRIDGE_GT2; // XXX get some env variable instead
+}
+
+
 /* Just a named buffer to mirror real drm functions */
 typedef struct sim_buffer {
   void *data;           /* data in the buffer */
@@ -45,6 +107,7 @@ sim_buffer_delete(sim_buffer_t *buf)
   if (buf == NULL) return;
   cl_free(buf->data);
   cl_free(buf->name);
+  cl_free(buf);
 }
 
 static sim_buffer_t*
@@ -62,6 +125,7 @@ sim_buffer_alloc(sim_bufmgr_t *bufmgr, const char *name, unsigned long sz, unsig
   }
   buf->ref_n = 1;
   buf->bufmgr = bufmgr;
+  buf->sz = sz;
   atomic_inc(&buf->bufmgr->buf_n);
 
 exit:
@@ -81,6 +145,27 @@ sim_buffer_unreference(sim_buffer_t *buf)
   sim_buffer_delete(buf);
 }
 
+static void
+sim_buffer_reference(sim_buffer_t *buf)
+{
+  if (UNLIKELY(buf == NULL)) return;
+  atomic_inc(&buf->ref_n);
+}
+
+static void*
+sim_buffer_get_virtual(sim_buffer_t *buf)
+{
+  if (UNLIKELY(buf == NULL)) return NULL;
+  return buf->data;
+}
+
+static void*
+sim_buffer_get_size(sim_buffer_t *buf)
+{
+  if (UNLIKELY(buf == NULL)) return 0;
+  return buf->data;
+}
+
 static int
 sim_buffer_subdata(sim_buffer_t *buf, unsigned long offset, unsigned long size, const void *data)
 {
@@ -89,33 +174,108 @@ sim_buffer_subdata(sim_buffer_t *buf, unsigned long offset, unsigned long size,
   memcpy((char*) buf->data + offset, data, size);
   return 0;
 }
-
-static int
-sim_buffer_emit_reloc(sim_buffer_t *buf, 
-                      uint32_t offset,
-                      sim_buffer_t *target_buf,
-                      uint32_t target_offset,
-                      uint32_t read_domains,
-                      uint32_t write_domain)
-{
-  return 1;
-}
-
 static int sim_buffer_map(sim_buffer_t *buf, uint32_t write_enable) {return 0;}
 static int sim_buffer_unmap(sim_buffer_t *buf) {return 0;}
 static int sim_buffer_pin(sim_buffer_t *buf, uint32_t alignment) {return 0;}
 static int sim_buffer_unpin(sim_buffer_t *buf) {return 0;}
+static int sim_buffer_wait_rendering(sim_buffer_t *buf) {return 0;}
+
+/* Encapsulates operations needed to run one NDrange */
+typedef struct sim_gpgpu
+{
+  sim_driver_t *driver; // the driver the gpgpu states belongs to
+} sim_gpgpu_t;
+
+static void sim_gpgpu_delete(sim_gpgpu_t *gpgpu)
+{
+  cl_free(gpgpu);
+}
+
+static sim_gpgpu_t *sim_gpgpu_new(sim_driver_t *driver)
+{
+  sim_gpgpu_t *gpgpu = NULL;
+  TRY_ALLOC_NO_ERR(gpgpu, cl_calloc(1, sizeof(sim_gpgpu_t)));
+
+exit:
+  return gpgpu;
+error:
+  sim_gpgpu_delete(gpgpu);
+  gpgpu = NULL;
+  goto exit;
+}
+
+#undef NOT_IMPLEMENTED
+#define NOT_IMPLEMENTED
+
+static void sim_gpgpu_bind_buf(sim_gpgpu_t *gpgpu, int32_t index, sim_buffer_t *buf, uint32_t cchint)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_bind_image2D(sim_gpgpu_t *gpgpu,
+                            int32_t index,
+                            sim_buffer_t *obj_bo,
+                            uint32_t format,
+                            int32_t w,
+                            int32_t h,
+                            int pitch,
+                            cl_gpgpu_tiling tiling)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_state_init(sim_gpgpu_t *gpgpu, uint32_t max_threads, uint32_t size_cs_entry)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_set_perf_counters(sim_gpgpu_t *gpgpu, sim_buffer_t *perf)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_upload_constants(sim_gpgpu_t *gpgpu, const void* data, uint32_t size)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_states_setup(sim_gpgpu_t *gpgpu, cl_gpgpu_kernel* kernel, uint32_t ker_n)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_upload_samplers(sim_gpgpu_t *state, const void *data, uint32_t n)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_batch_reset(sim_gpgpu_t *state, size_t sz)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_batch_start(sim_gpgpu_t *state)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_batch_end(sim_gpgpu_t *state, int32_t flush_mode)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_flush(sim_gpgpu_t *state)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_walker(sim_gpgpu_t *state,
+                             uint32_t simd_sz,
+                             uint32_t thread_n,
+                             const size_t global_wk_off[3],
+                             const size_t global_wk_sz[3],
+                             const size_t local_wk_sz[3])
+{ NOT_IMPLEMENTED; }
 
 LOCAL void
 sim_setup_callbacks(void)
 {
+  cl_driver_new = (cl_driver_new_cb *) sim_driver_new;
+  cl_driver_delete = (cl_driver_delete_cb *) sim_driver_delete;
+  cl_driver_get_ver = (cl_driver_get_ver_cb *) sim_driver_get_ver;
+  cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) sim_driver_get_bufmgr;
+  cl_driver_get_device_id = (cl_driver_get_device_id_cb *) sim_driver_get_device_id;
   cl_buffer_alloc = (cl_buffer_alloc_cb *) sim_buffer_alloc;
+  cl_buffer_reference = (cl_buffer_reference_cb *) sim_buffer_reference;
   cl_buffer_unreference = (cl_buffer_unreference_cb *) sim_buffer_unreference;
   cl_buffer_map = (cl_buffer_map_cb *) sim_buffer_map;
   cl_buffer_unmap = (cl_buffer_unmap_cb *) sim_buffer_unmap;
+  cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) sim_buffer_get_virtual;
+  cl_buffer_get_size = (cl_buffer_get_size_cb *) sim_buffer_get_size;
   cl_buffer_pin = (cl_buffer_pin_cb *) sim_buffer_pin;
   cl_buffer_unpin = (cl_buffer_unpin_cb *) sim_buffer_unpin;
   cl_buffer_subdata = (cl_buffer_subdata_cb *) sim_buffer_subdata;
-  cl_buffer_emit_reloc = (cl_buffer_emit_reloc_cb *) sim_buffer_emit_reloc;
+  cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) sim_buffer_wait_rendering;
+  cl_gpgpu_new = (cl_gpgpu_new_cb *) sim_gpgpu_new;
+  cl_gpgpu_delete = (cl_gpgpu_delete_cb *) sim_gpgpu_delete;
+  cl_gpgpu_bind_image2D = (cl_gpgpu_bind_image2D_cb *) sim_gpgpu_bind_image2D;
+  cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) sim_gpgpu_bind_buf;
+  cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) sim_gpgpu_state_init;
+  cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) sim_gpgpu_set_perf_counters;
+  cl_gpgpu_upload_constants = (cl_gpgpu_upload_constants_cb *) sim_gpgpu_upload_constants;
+  cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) sim_gpgpu_states_setup;
+  cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) sim_gpgpu_upload_samplers;
+  cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) sim_gpgpu_batch_reset;
+  cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) sim_gpgpu_batch_start;
+  cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) sim_gpgpu_batch_end;
+  cl_gpgpu_flush = (cl_gpgpu_flush_cb *) sim_gpgpu_flush;
+  cl_gpgpu_walker = (cl_gpgpu_walker_cb *) sim_gpgpu_walker;
 }