Removed all direct dependencies to the intel driver (ie files contained in intel...

author Benjamin Segovia <segovia.benjamin@gmail.com>

Thu, 29 Mar 2012 17:26:36 +0000 (17:26 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:15:54 +0000 (16:15 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Thu, 29 Mar 2012 17:26:36 +0000 (17:26 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:54 +0000 (16:15 -0700)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index facc990..d3517c1 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,9 +18,9 @@ SET(OPENCL_SRC
      cl_context.c
      cl_command_queue.c
      cl_command_queue_gen7.c
-    cl_buffer.c
-    cl_buffer.cpp
-    sim/sim_buffer.c
+    cl_driver.c
+    cl_driver.cpp
+    sim/sim_driver.c
      intel/intel_gpgpu.c
      intel/intel_batchbuffer.c
      intel/intel_driver.c
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c

index cef1485..0be37c8 100644 (file)
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -25,8 +25,7 @@
  #include "cl_mem.h"
  #include "cl_utils.h"
  #include "cl_alloc.h"
-#include "intel_bufmgr.h"
-#include "intel/intel_gpgpu.h"
+#include "cl_driver.h"
  
  #include <assert.h>
  #include <stdio.h>
@@ -42,8 +41,7 @@ cl_command_queue_new(cl_context ctx)
    queue->magic = CL_MAGIC_QUEUE_HEADER;
    queue->ref_n = 1;
    queue->ctx = ctx;
-  TRY_ALLOC_NO_ERR (queue->gpgpu,
-                    intel_gpgpu_new((struct intel_driver*) ctx->intel_drv));
+  TRY_ALLOC_NO_ERR (queue->gpgpu, cl_gpgpu_new(ctx->drv));
  
    /* Append the command queue in the list */
    pthread_mutex_lock(&ctx->queue_lock);
@@ -87,7 +85,7 @@ cl_command_queue_delete(cl_command_queue queue)
    }
    cl_mem_delete(queue->perf);
    cl_context_delete(queue->ctx);
-  intel_gpgpu_delete(queue->gpgpu);
+  cl_gpgpu_delete(queue->gpgpu);
    queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
    cl_free(queue);
  }
@@ -98,26 +96,26 @@ cl_command_queue_add_ref(cl_command_queue queue)
    atomic_inc(&queue->ref_n);
  }
  
-  LOCAL cl_int
+LOCAL cl_int
  cl_command_queue_bind_surface(cl_command_queue queue,
                                cl_kernel k,
                                char *curbe,
-                              drm_intel_bo **local, 
-                              drm_intel_bo **priv,
-                              drm_intel_bo **scratch,
+                              cl_buffer **local, 
+                              cl_buffer **priv,
+                              cl_buffer **scratch,
                                uint32_t local_sz)
  {
    cl_context ctx = queue->ctx;
-  intel_gpgpu_t *gpgpu = queue->gpgpu;
-  drm_intel_bufmgr *bufmgr = cl_context_get_intel_bufmgr(ctx);
-  drm_intel_bo *sync_bo = NULL;
+  cl_gpgpu *gpgpu = queue->gpgpu;
+  cl_buffer_mgr *bufmgr = cl_context_get_bufmgr(ctx);
+  cl_buffer *sync_bo = NULL;
    cl_int err = CL_SUCCESS;
  #if 0
    cl_context ctx = queue->ctx;
    intel_gpgpu_t *gpgpu = queue->gpgpu;
    drm_intel_bufmgr *bufmgr = cl_context_get_intel_bufmgr(ctx);
    cl_mem mem = NULL;
-  drm_intel_bo *bo = NULL, *sync_bo = NULL;
+  cl_buffer *bo = NULL, *sync_bo = NULL;
    const size_t max_thread = ctx->device->max_compute_unit;
    cl_int err = CL_SUCCESS;
    uint32_t i, index;
@@ -163,7 +161,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      assert(k->patch.local_surf.offset % SURFACE_SZ == 0);
      index = k->patch.local_surf.offset / SURFACE_SZ;
      assert(index != MAX_SURFACES - 1);
-    *local = drm_intel_bo_alloc(bufmgr, "CL local surface", sz, 64);
+    *local = cl_buffer_alloc(bufmgr, "CL local surface", sz, 64);
      gpgpu_bind_buf(gpgpu, index, *local, cc_llc_l3);
    }
    else if (local)
@@ -178,7 +176,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      assert(k->patch.private_surf.offset % SURFACE_SZ == 0);
      index = k->patch.private_surf.offset / SURFACE_SZ;
      assert(index != MAX_SURFACES - 1);
-    *priv = drm_intel_bo_alloc(bufmgr, "CL private surface", sz, 64);
+    *priv = cl_buffer_alloc(bufmgr, "CL private surface", sz, 64);
      gpgpu_bind_buf(gpgpu, index, *priv, cc_llc_l3);
    }
    else if(priv)
@@ -193,17 +191,17 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      assert(k->patch.scratch.offset % SURFACE_SZ == 0);
      assert(index != MAX_SURFACES - 1);
      index = k->patch.scratch.offset / SURFACE_SZ;
-    *scratch = drm_intel_bo_alloc(bufmgr, "CL scratch surface", sz, 64);
+    *scratch = cl_buffer_alloc(bufmgr, "CL scratch surface", sz, 64);
      gpgpu_bind_buf(gpgpu, index, *scratch, cc_llc_l3);
    }
    else if (scratch)
      *scratch = NULL;
  #endif
    /* Now bind a bo used for synchronization */
-  sync_bo = drm_intel_bo_alloc(bufmgr, "sync surface", 64, 64);
-  gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
+  sync_bo = cl_buffer_alloc(bufmgr, "sync surface", 64, 64);
+  cl_gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
    if (queue->last_batch != NULL)
-    drm_intel_bo_unreference(queue->last_batch);
+    cl_buffer_unreference(queue->last_batch);
    queue->last_batch = sync_bo;
  
  // error:
@@ -212,9 +210,9 @@ cl_command_queue_bind_surface(cl_command_queue queue,
  }
  
  #if USE_FULSIM
-extern void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr*);
-extern void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr*, FILE*);
-extern void aub_exec_dump_raw_file(drm_intel_bo*, size_t offset, size_t sz);
+extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr*);
+extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr*, FILE*);
+extern void aub_exec_dump_raw_file(cl_buffer*, size_t offset, size_t sz);
  
  static void
  cl_run_fulsim(void)
@@ -435,15 +433,14 @@ cl_command_queue_ND_range(cl_command_queue queue,
                            const size_t *global_wk_sz,
                            const size_t *local_wk_sz)
  {
-  intel_gpgpu_t *gpgpu = queue->gpgpu;
-  const int32_t ver = intel_gpgpu_version(gpgpu);
+  const int32_t ver = cl_driver_get_ver(queue->ctx->drv);
    cl_int err = CL_SUCCESS;
  
  #if USE_FULSIM
-  drm_intel_bufmgr *bufmgr = NULL;
+  cl_buffer_mgr *bufmgr = NULL;
    FILE *file = fopen("dump.aub", "wb");
    FATAL_IF (file == NULL, "Unable to open file dump.aub");
-  bufmgr = cl_context_get_intel_bufmgr(queue->ctx);
+  bufmgr = cl_context_get_bufmgr(queue->ctx);
    drm_intel_bufmgr_gem_set_aubfile(bufmgr, file);
  #endif /* USE_FULSIM */
  
@@ -469,8 +466,8 @@ cl_command_queue_finish(cl_command_queue queue)
  {
    if (queue->last_batch == NULL)
      return CL_SUCCESS;
-  drm_intel_bo_wait_rendering(queue->last_batch);
-  drm_intel_bo_unreference(queue->last_batch);
+  cl_buffer_wait_rendering(queue->last_batch);
+  cl_buffer_unreference(queue->last_batch);
    queue->last_batch = NULL;
    return CL_SUCCESS;
  }
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h

index c47350e..f03e61c 100644 (file)
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -21,6 +21,7 @@
  #define __CL_COMMAND_QUEUE_H__
  
  #include "cl_internals.h"
+#include "cl_driver.h"
  #include "CL/cl.h"
  #include <stdint.h>
  
@@ -28,14 +29,14 @@ struct intel_gpgpu;
  
  /* Basically, this is a (kind-of) batch buffer */
  struct _cl_command_queue {
-  uint64_t magic;                  /* To identify it as a command queue */
-  volatile int ref_n;              /* We reference count this object */
-  cl_context ctx;                  /* Its parent context */
-  cl_command_queue prev, next;     /* We chain the command queues together */
-  struct intel_gpgpu *gpgpu;  /* Setup all GEN commands */
-  cl_mem perf;                     /* Where to put the perf counters */
-  cl_mem fulsim_out;               /* Fulsim will output this buffer */
-  struct _drm_intel_bo *last_batch;/* To synchronize using clFinish */
+  uint64_t magic;               /* To identify it as a command queue */
+  volatile int ref_n;           /* We reference count this object */
+  cl_context ctx;               /* Its parent context */
+  cl_command_queue prev, next;  /* We chain the command queues together */
+  cl_gpgpu *gpgpu;              /* Setup all GEN commands */
+  cl_mem perf;                  /* Where to put the perf counters */
+  cl_mem fulsim_out;            /* Fulsim will output this buffer */
+  cl_buffer *last_batch;        /* To synchronize using clFinish */
  };
  
  /* Allocate and initialize a new command queue. Also insert it in the list of
@@ -69,9 +70,9 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
  extern cl_int cl_command_queue_bind_surface(cl_command_queue queue,
                                              cl_kernel k,
                                              char *curbe,
-                                            struct _drm_intel_bo **local, 
-                                            struct _drm_intel_bo **priv,
-                                            struct _drm_intel_bo **scratch,
+                                            cl_buffer **local,
+                                            cl_buffer **priv,
+                                            cl_buffer **scratch,
                                              uint32_t local_sz);
  
  #endif /* __CL_COMMAND_QUEUE_H__ */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c

index ecf27dd..78af68f 100644 (file)
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -25,8 +25,6 @@
  #include "cl_mem.h"
  #include "cl_utils.h"
  #include "cl_alloc.h"
-#include "intel_bufmgr.h"
-#include "intel/intel_gpgpu.h"
  
  #include <assert.h>
  #include <stdio.h>
@@ -90,11 +88,11 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
                                 const size_t *local_wk_sz)
  {
    cl_context ctx = queue->ctx;
-  intel_gpgpu_t *gpgpu = queue->gpgpu;
+  cl_gpgpu *gpgpu = queue->gpgpu;
    char *curbe = NULL;        /* Does not include per-thread local IDs */
    char *final_curbe = NULL;  /* Includes them */
-  drm_intel_bo *private_bo = NULL, *scratch_bo = NULL;
-  genx_gpgpu_kernel_t kernel;
+  cl_buffer *private_bo = NULL, *scratch_bo = NULL;
+  cl_gpgpu_kernel_t kernel;
    const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
    size_t i, batch_sz = 0u, local_sz = 0u, thread_n = 0u, id_offset = 0u, cst_sz = 0u;
    cl_int err = CL_SUCCESS;
@@ -115,12 +113,12 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
    kernel.cst_sz = cst_sz += 3 * 32;       /* Add local IDs (16 words) */
  
    /* Setup the kernel */
-  gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32);
+  cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32);
    if (queue->last_batch != NULL)
-    drm_intel_bo_unreference(queue->last_batch);
+    cl_buffer_unreference(queue->last_batch);
    queue->last_batch = NULL;
    cl_command_queue_bind_surface(queue, ker, curbe, NULL, &private_bo, &scratch_bo, 0);
-  gpgpu_states_setup(gpgpu, &kernel, 1);
+  cl_gpgpu_states_setup(gpgpu, &kernel, 1);
  
    /* CURBE step 2. Give the localID and upload it to video memory */
    TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
@@ -128,99 +126,19 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
      for (i = 0; i < thread_n; ++i)
        memcpy(final_curbe + cst_sz * i, curbe, cst_sz - 3*32);
    TRY (cl_set_local_ids, final_curbe, local_wk_sz, simd_sz, cst_sz, id_offset, thread_n);
-  gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
+  cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
  
    /* Start a new batch buffer */
    batch_sz = cl_kernel_compute_batch_sz(ker);
-  gpgpu_batch_reset(gpgpu, batch_sz);
-  gpgpu_batch_start(gpgpu);
+  cl_gpgpu_batch_reset(gpgpu, batch_sz);
+  cl_gpgpu_batch_start(gpgpu);
  
    /* Issue the GPGPU_WALKER command */
-  gpgpu_walker(gpgpu, simd_sz, thread_n, global_wk_off, global_wk_sz, local_wk_sz);
+  cl_gpgpu_walker(gpgpu, simd_sz, thread_n, global_wk_off, global_wk_sz, local_wk_sz);
  
    /* Close the batch buffer and submit it */
-  gpgpu_batch_end(gpgpu, 0);
-  gpgpu_flush(gpgpu);
-
-#if 0
-  cl_context ctx = queue->ctx;
-  intel_gpgpu_t *gpgpu = queue->gpgpu;
-  drm_intel_bo *private_bo = NULL, *scratch_bo = NULL;
-  char *curbe = NULL;        /* Does not include per-thread local IDs */
-  char *final_curbe = NULL;  /* Includes them */
-  genx_gpgpu_kernel_t kernel;
-  //const size_t simd_sz = ker->patch.exec_env.largest_compiled_simd_sz;
-  const size_t simd_sz = 16;
-  size_t local_sz, batch_sz, cst_sz = ker->patch.curbe.sz;
-  size_t i, thread_n, id_offset;
-  cl_int err = CL_SUCCESS;
-
-  /* Setup kernel */
-  kernel.name = "OCL kernel";
-  kernel.grf_blocks = 128;
-  kernel.bin = ker->kernel_heap; // _PLASMA ; NULL
-  kernel.size = ker->kernel_heap_sz; // _PLASMA ; 0
-  kernel.bo = ker->bo;
-  kernel.barrierID = 0;
-  kernel.use_barrier = ker->patch.exec_env.has_barriers;
-  kernel.slm_sz = cl_kernel_local_memory_sz(ker);
-
-  /* All arguments must have been set */
-  TRY (cl_kernel_check_args, ker);
-
-  /* Check that the local work sizes are OK */
-  TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz);
-  //kernel.thread_n = thread_n = local_sz / simd_sz;
-  kernel.thread_n = thread_n = local_sz / simd_sz;
-
-  /* CURBE step 1. Allocate and fill fields shared by threads in workgroup */
-  if (cst_sz > 0) {
-    assert(ker->cst_buffer);
-    curbe = cl_kernel_create_cst_buffer(ker,
-                                        global_wk_off,
-                                        global_wk_sz,
-                                        local_wk_sz,
-                                        3,
-                                        thread_n);
-  }
-  id_offset = cst_sz = ALIGN(cst_sz, 32); /* Align the user data on 32 bytes */
-  kernel.cst_sz = cst_sz += 3 * 32;       /* Add local IDs (16 words) */
-
-  /* Setup the kernel */
-  gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32);
-  if (queue->last_batch != NULL)
-    drm_intel_bo_unreference(queue->last_batch);
-  queue->last_batch = NULL;
-  cl_command_queue_bind_surface(queue, ker, curbe, NULL, &private_bo, &scratch_bo, 0);
-  gpgpu_states_setup(gpgpu, &kernel, 1);
-
-  /* CURBE step 2. Give the localID and upload it to video memory */
-  TRY_ALLOC (final_curbe, (char*) cl_calloc(thread_n, cst_sz));
-  for (i = 0; i < thread_n; ++i)
-    memcpy(final_curbe + cst_sz * i, curbe, cst_sz);
-  TRY (cl_set_local_ids, final_curbe, local_wk_sz, simd_sz, cst_sz, id_offset, thread_n);
-  gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
-
-  /* Start a new batch buffer */
-  batch_sz = cl_kernel_compute_batch_sz(ker);
-  gpgpu_batch_reset(gpgpu, batch_sz);
-  gpgpu_batch_start(gpgpu);
-
-  /* Issue the GPGPU_WALKER command */
-  gpgpu_walker(gpgpu, simd_sz, thread_n, global_wk_off, global_wk_sz, local_wk_sz);
-
-  /* Close the batch buffer and submit it */
-  gpgpu_batch_end(gpgpu, 0);
-  gpgpu_flush(gpgpu);
-
-error:
-  /* Release all temporary buffers */
-  if (private_bo) drm_intel_bo_unreference(private_bo);
-  if (scratch_bo) drm_intel_bo_unreference(scratch_bo);
-  cl_free(final_curbe);
-  cl_free(curbe);
-  return err;
-#endif
+  cl_gpgpu_batch_end(gpgpu, 0);
+  cl_gpgpu_flush(gpgpu);
  
  error:
    return err;
diff --git a/src/cl_context.c b/src/cl_context.c

index 2e8ff24..3d926b4 100644 (file)
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -25,6 +25,7 @@
  #include "cl_mem.h"
  #include "cl_alloc.h"
  #include "cl_utils.h"
+#include "cl_driver.h"
  
  #include "CL/cl.h"
  
@@ -33,13 +34,6 @@
  #include <stdint.h>
  #include <assert.h>
  
-/* Do not include the full dependency */
-struct intel_driver;
-/* Get the command buffer interface */
-extern struct _drm_intel_bufmgr* intel_driver_get_bufmgr(struct intel_driver*);
-/* Get the Gen HW version */
-extern uint32_t intel_driver_get_ver(struct intel_driver*);
-
  static cl_int
  cl_context_properties_is_ok(const cl_context_properties *properties)
  {
@@ -130,10 +124,10 @@ cl_context_new(void)
    cl_context ctx = NULL;
  
    TRY_ALLOC_NO_ERR (ctx, CALLOC(struct _cl_context));
-  TRY_ALLOC_NO_ERR (ctx->intel_drv, cl_intel_driver_new());
+  TRY_ALLOC_NO_ERR (ctx->drv, cl_driver_new());
    ctx->magic = CL_MAGIC_CONTEXT_HEADER;
    ctx->ref_n = 1;
-  ctx->ver = intel_driver_get_ver(ctx->intel_drv);
+  ctx->ver = cl_driver_get_ver(ctx->drv);
    pthread_mutex_init(&ctx->program_lock, NULL);
    pthread_mutex_init(&ctx->queue_lock, NULL);
    pthread_mutex_init(&ctx->buffer_lock, NULL);
@@ -163,8 +157,8 @@ cl_context_delete(cl_context ctx)
    assert(ctx->queues == NULL);
    assert(ctx->programs == NULL);
    assert(ctx->buffers == NULL);
-  assert(ctx->intel_drv);
-  cl_intel_driver_delete(ctx->intel_drv);
+  assert(ctx->drv);
+  cl_driver_delete(ctx->drv);
    ctx->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
    cl_free(ctx);
  }
@@ -202,9 +196,9 @@ error:
    goto exit;
  }
  
-struct _drm_intel_bufmgr*
-cl_context_get_intel_bufmgr(cl_context ctx)
+struct cl_buffer_mgr*
+cl_context_get_bufmgr(cl_context ctx)
  {
-  return intel_driver_get_bufmgr((struct intel_driver*) ctx->intel_drv);
+  return cl_driver_get_bufmgr(ctx->drv);
  }
  
diff --git a/src/cl_context.h b/src/cl_context.h

index f2c6302..27f6d87 100644 (file)
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -21,6 +21,7 @@
  #define __CL_CONTEXT_H__
  
  #include "cl_internals.h"
+#include "cl_driver.h"
  #include "CL/cl.h"
  
  #include <stdint.h>
@@ -33,7 +34,7 @@ struct intel_driver;
  struct _cl_context {
    uint64_t magic;                   /* To identify it as a context */
    volatile int ref_n;               /* We reference count this object */
-  struct intel_driver *intel_drv;   /* Handles the real HW */
+  cl_driver *drv;                   /* Handles HW or simulator */
    cl_device_id device;              /* All information about the GPU device */
    cl_command_queue queues;          /* All command queues currently allocated */
    cl_program programs;              /* All programs currently allocated */
@@ -69,9 +70,6 @@ extern cl_command_queue cl_context_create_queue(cl_context,
                                                  cl_command_queue_properties,
                                                  cl_int*);
  
-/* Use for all GPU buffers */
-extern struct _drm_intel_bufmgr* cl_context_get_intel_bufmgr(cl_context);
-
  /* Enqueue a ND Range kernel */
  extern cl_int cl_context_ND_kernel(cl_context,
                                     cl_command_queue,
@@ -82,8 +80,7 @@ extern cl_int cl_context_ND_kernel(cl_context,
                                     const size_t*);
  
  /* Used for allocation */
-extern struct _drm_intel_bufmgr*
-cl_context_get_intel_bufmgr(cl_context ctx);
+extern cl_buffer_mgr* cl_context_get_bufmgr(cl_context ctx);
  
  #endif /* __CL_CONTEXT_H__ */
  
diff --git a/src/cl_driver.c b/src/cl_driver.c

index ed2aaa0..65828e2 100644 (file)
--- a/src/cl_driver.c
+++ b/src/cl_driver.c
@@ -17,17 +17,43 @@
   * Author: Benjamin Segovia <benjamin.segovia@intel.com>
   */
  
-#include "cl_buffer.h"
+#include "cl_driver.h"
+#include "cl_utils.h"
  #include <stdlib.h>
  
-cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
-cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
-cl_buffer_map_cb *cl_buffer_map = NULL;
-cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
-cl_buffer_pin_cb *cl_buffer_pin = NULL;
-cl_buffer_unpin_cb *cl_buffer_unpin = NULL;
-cl_buffer_subdata_cb *cl_buffer_subdata = NULL;
-cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc = NULL;
-cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
-cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+/* Driver */
+LOCAL cl_driver_new_cb *cl_driver_new = NULL;
+LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
+LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
+
+/* Buffer */
+LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
+LOCAL cl_buffer_reference_cb *cl_buffer_reference = NULL;
+LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
+LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
+LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
+LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
+LOCAL cl_buffer_pin_cb *cl_buffer_pin = NULL;
+LOCAL cl_buffer_unpin_cb *cl_buffer_unpin = NULL;
+LOCAL cl_buffer_subdata_cb *cl_buffer_subdata = NULL;
+LOCAL cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc = NULL;
+LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
+
+/* GPGPU */
+LOCAL cl_gpgpu_new_cb *cl_gpgpu_new = NULL;
+LOCAL cl_gpgpu_delete_cb *cl_gpgpu_delete = NULL;
+LOCAL cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf = NULL;
+LOCAL cl_gpgpu_bind_image2D_cb *cl_gpgpu_bind_image2D = NULL;
+LOCAL cl_gpgpu_state_init_cb *cl_gpgpu_state_init = NULL;
+LOCAL cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters = NULL;
+LOCAL cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants = NULL;
+LOCAL cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup = NULL;
+LOCAL cl_gpgpu_update_barrier_cb *cl_gpgpu_update_barrier = NULL;
+LOCAL cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers = NULL;
+LOCAL cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset = NULL;
+LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL;
+LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL;
+LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL;
+LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
  
diff --git a/src/cl_driver.cpp b/src/cl_driver.cpp

index 87e1f2e..df26518 100644 (file)
--- a/src/cl_driver.cpp
+++ b/src/cl_driver.cpp
@@ -18,8 +18,8 @@
   */
  
  extern "C" {
-#include "sim/sim_buffer.h"
-#include "intel/intel_buffer.h"
+#include "sim/sim_driver.h"
+#include "intel/intel_driver.h"
  #include "cl_utils.h"
  #include <stdlib.h>
  #include <string.h>
diff --git a/src/cl_driver.h b/src/cl_driver.h

index ff9fa6f..116959e 100644 (file)
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -21,11 +21,18 @@
  #define __CL_BUFFER_H__
  
  #include <stdint.h>
+#include <stdlib.h>
  
-/* Hide behind some call backs the buffer allocation / deallocation ... This
+/* XXX needed for previous driver */
+#define MAX_SURFACES 128
+#define MAX_SAMPLERS 16
+
+/**************************************************************************
+ * cl_driver:
+ * Hide behind some call backs the buffer allocation / deallocation ... This
   * will allow us to make the use of a software performance simulator easier and
   * to minimize the code specific for the HW and for the simulator
- */
+ **************************************************************************/
  
  /* Encapsulates command buffer / data buffer / kernels */
  typedef struct cl_buffer cl_buffer;
@@ -36,13 +43,19 @@ typedef struct cl_buffer_mgr cl_buffer_mgr;
  /* Encapsulates the driver backend functionalities */
  typedef struct cl_driver cl_driver;
  
+/* Encapsulates the gpgpu stream of commands */
+typedef struct cl_gpgpu cl_gpgpu;
+
+/**************************************************************************
+ * Driver
+ **************************************************************************/
  /* Create a new driver */
  typedef cl_driver* (cl_driver_new_cb)(void);
-extern cl_driver_new_cb cl_driver_new;
+extern cl_driver_new_cb *cl_driver_new;
  
  /* Delete the driver */
-typedef cl_driver* (cl_driver_delete_cb)(void);
-extern cl_driver_delete_cb cl_driver_delete;
+typedef void (cl_driver_delete_cb)(cl_driver*);
+extern cl_driver_delete_cb *cl_driver_delete;
  
  /* Get the buffer manager from the driver */
  typedef cl_buffer_mgr* (cl_driver_get_bufmgr_cb)(cl_driver*);
@@ -52,6 +65,116 @@ extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr;
  typedef uint32_t (cl_driver_get_ver_cb)(cl_driver*);
  extern cl_driver_get_ver_cb *cl_driver_get_ver;
  
+/**************************************************************************
+ * GPGPU command streamer
+ **************************************************************************/
+/* Describe texture tiling */
+typedef enum cl_gpgpu_tiling {
+  GPGPU_NO_TILE = 0,
+  GPGPU_TILE_X  = 1,
+  GPGPU_TILE_Y  = 2,
+} cl_gpgpu_tiling_t;
+
+/* Cache control options */
+enum gen6_cache_control {
+  cc_gtt      = 0x0,
+  cc_l3       = 0x1,
+  cc_llc      = 0x2,
+  cc_llc_l3   = 0x3
+};
+
+
+/* Use this structure to bind kernels in the gpgpu state */
+typedef struct cl_gpgpu_kernel {
+  const char *name;        /* kernel name and bo name */
+  uint32_t grf_blocks;     /* register blocks kernel wants (in 8 reg blocks) */
+  uint32_t cst_sz;         /* total size of all constants */
+  cl_buffer *bo;           /* kernel code in the proper addr space */
+  int32_t barrierID;       /* barrierID for _this_ kernel */
+  uint32_t use_barrier:1;  /* For gen7 (automatic barrier management) */
+  uint32_t thread_n:15;    /* For gen7 (automatic barrier management) */
+  uint32_t slm_sz:16;      /* For gen7 (automatic SLM allocation) */
+} cl_gpgpu_kernel_t;
+
+/* Create a new gpgpu state */
+typedef cl_gpgpu* (cl_gpgpu_new_cb)(cl_driver*);
+extern cl_gpgpu_new_cb *cl_gpgpu_new;
+
+/* Delete the gpgpu state */
+typedef void (cl_gpgpu_delete_cb)(cl_gpgpu*);
+extern cl_gpgpu_delete_cb *cl_gpgpu_delete;
+
+/* Bind a regular unformatted buffer */
+typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu*, int32_t index, cl_buffer *, uint32_t cchint);
+extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
+
+/* Set a 2d texture */
+typedef void (cl_gpgpu_bind_image2D_cb)(cl_gpgpu state,
+                                        int32_t index,
+                                        struct cl_buffer* obj_bo,
+                                        uint32_t format,
+                                        int32_t w,
+                                        int32_t h,
+                                        int pitch,
+                                        cl_gpgpu_tiling_t tiling);
+extern cl_gpgpu_bind_image2D_cb *cl_gpgpu_bind_image2D;
+
+/* Configure internal state */
+typedef void (cl_gpgpu_state_init_cb)(cl_gpgpu*, uint32_t max_threads, uint32_t size_cs_entry);
+extern cl_gpgpu_state_init_cb *cl_gpgpu_state_init;
+
+/* Set the buffer object where to report performance counters */
+typedef void (cl_gpgpu_set_perf_counters_cb)(cl_gpgpu*, struct cl_buffer *perf);
+extern cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters;
+
+/* Fills current constant buffer with data */
+typedef void (cl_gpgpu_upload_constants_cb)(cl_gpgpu*, const void* data, uint32_t size);
+extern cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants;
+
+/* Setup all indirect states */
+typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu*, cl_gpgpu_kernel_t* kernel, uint32_t ker_n);
+extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup;
+
+/* Make HW threads use barrierID */
+typedef void (cl_gpgpu_update_barrier_cb)(cl_gpgpu*, uint32_t barrierID, uint32_t thread_n);
+extern cl_gpgpu_update_barrier_cb *cl_gpgpu_update_barrier;
+
+/* Upload the constant samplers as specified inside the OCL kernel */
+typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n);
+extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers;
+
+/* Set a sampler */
+typedef void (cl_gpgpu_set_sampler_cb)(cl_gpgpu*, uint32_t index, uint32_t non_normalized);
+extern cl_gpgpu_set_sampler_cb *cl_gpgpu_set_sampler;
+
+/* Allocate the batch buffer and return the BO used for the batch buffer */
+typedef void (cl_gpgpu_batch_reset_cb)(cl_gpgpu*, size_t sz);
+extern cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset;
+
+/* Atomic begin, pipeline select, urb, pipeline state and constant buffer */
+typedef void (cl_gpgpu_batch_start_cb)(cl_gpgpu*);
+extern cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start;
+
+/* atomic end with possibly inserted flush */
+typedef void (cl_gpgpu_batch_end_cb)(cl_gpgpu*, int32_t flush_mode);
+extern cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end;
+
+/* Flush the command buffer */
+typedef void (cl_gpgpu_flush_cb)(cl_gpgpu *state);
+extern cl_gpgpu_flush_cb *cl_gpgpu_flush;
+
+/* Will spawn all threads */
+typedef void (cl_gpgpu_walker_cb)(cl_gpgpu *state,
+                                  uint32_t simd_sz,
+                                  uint32_t thread_n,
+                                  const size_t global_wk_off[3],
+                                  const size_t global_wk_sz[3],
+                                  const size_t local_wk_sz[3]);
+extern cl_gpgpu_walker_cb *cl_gpgpu_walker;
+
+/**************************************************************************
+ * Buffer
+ **************************************************************************/
  /* Allocate a buffer */
  typedef cl_buffer* (cl_buffer_alloc_cb)(cl_buffer_mgr*, const char*, unsigned long, unsigned long);
  extern cl_buffer_alloc_cb *cl_buffer_alloc;
@@ -60,16 +183,24 @@ extern cl_buffer_alloc_cb *cl_buffer_alloc;
  typedef void (cl_buffer_unreference_cb)(cl_buffer*);
  extern cl_buffer_unreference_cb *cl_buffer_unreference;
  
+/* Add one more ref on a buffer */
+typedef void (cl_buffer_reference_cb)(cl_buffer*);
+extern cl_buffer_reference_cb *cl_buffer_reference;
+
  /* Map a buffer */
-typedef void* (cl_buffer_map_cb)(cl_buffer*);
+typedef int (cl_buffer_map_cb)(cl_buffer*, uint32_t write_enable);
  extern cl_buffer_map_cb *cl_buffer_map;
  
  /* Unmap a buffer */
-typedef void* (cl_buffer_unmap_cb)(cl_buffer*);
+typedef int (cl_buffer_unmap_cb)(cl_buffer*);
  extern cl_buffer_unmap_cb *cl_buffer_unmap;
  
+/* Get the virtual address (when mapped) */
+typedef void* (cl_buffer_get_virtual_cb)(cl_buffer*);
+extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual;
+
  /* Pin a buffer */
-typedef int (cl_buffer_pin_cb)(cl_buffer*);
+typedef int (cl_buffer_pin_cb)(cl_buffer*, uint32_t alignment);
  extern cl_buffer_pin_cb *cl_buffer_pin;
  
  /* Unpin a buffer */
@@ -84,5 +215,9 @@ extern cl_buffer_subdata_cb *cl_buffer_subdata;
  typedef int (cl_buffer_emit_reloc_cb) (cl_buffer *, uint32_t, cl_buffer*, uint32_t, uint32_t, uint32_t);
  extern cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc;
  
+/* Wait for all pending rendering for this buffer to complete */
+typedef int (cl_buffer_wait_rendering_cb) (cl_buffer *);
+extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
+
  #endif /* __CL_BUFFER_H__ */
  
diff --git a/src/cl_image.c b/src/cl_image.c

index 143c9fb..bba741d 100644 (file)
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -19,11 +19,7 @@
  
  #include "cl_image.h"
  #include "cl_utils.h"
-#ifdef _PLASMA
-#include "plasma/intel_defines.h"
-#else
  #include "intel/intel_defines.h"
-#endif
  
  #include <assert.h>
  
diff --git a/src/cl_kernel.c b/src/cl_kernel.c

index bc63fb8..6214aea 100644 (file)
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -25,8 +25,6 @@
  #include "cl_alloc.h"
  #include "cl_utils.h"
  #include "CL/cl.h"
-#include "intel_bufmgr.h"
-#include "intel/intel_gpgpu.h"
  #include "gen/program.h"
  
  #include <stdio.h>
@@ -45,8 +43,8 @@ cl_kernel_delete(cl_kernel k)
    if (atomic_dec(&k->ref_n) > 1) return;
  
    /* Release one reference on all bos we own */
-  if (k->bo)       drm_intel_bo_unreference(k->bo);
-  if (k->const_bo) drm_intel_bo_unreference(k->const_bo);
+  if (k->bo)       cl_buffer_unreference(k->bo);
+  if (k->const_bo) cl_buffer_unreference(k->const_bo);
  
    /* This will be true for kernels created by clCreateKernel */
    if (k->ref_its_program) cl_program_delete(k->program);
@@ -104,15 +102,15 @@ LOCAL void
  cl_kernel_setup(cl_kernel k, const struct GenKernel *gen_kernel)
  {
    cl_context ctx = k->program->ctx;
-  drm_intel_bufmgr *bufmgr = cl_context_get_intel_bufmgr(ctx);
+  cl_buffer_mgr *bufmgr = cl_context_get_bufmgr(ctx);
  
    /* Allocate the gen code here */
    const uint32_t code_sz = GenKernelGetCodeSize(gen_kernel);
    const char *code = GenKernelGetCode(gen_kernel);
-  k->bo = drm_intel_bo_alloc(bufmgr, "CL kernel", code_sz, 64u);
+  k->bo = cl_buffer_alloc(bufmgr, "CL kernel", code_sz, 64u);
  
    /* Upload the code */
-  drm_intel_bo_subdata(k->bo, 0, code_sz, code);
+  cl_buffer_subdata(k->bo, 0, code_sz, code);
    k->gen_kernel = gen_kernel;
  }
  
@@ -132,8 +130,8 @@ cl_kernel_dup(const cl_kernel from)
    to->program = from->program;
  
    /* Retain the bos */
-  if (from->bo)       drm_intel_bo_reference(from->bo);
-  if (from->const_bo) drm_intel_bo_reference(from->const_bo);
+  if (from->bo)       cl_buffer_reference(from->bo);
+  if (from->const_bo) cl_buffer_reference(from->const_bo);
  
    /* We retain the program destruction since this kernel (user allocated)
     * depends on the program for some of its pointers
diff --git a/src/cl_kernel.h b/src/cl_kernel.h

index f5a7876..fbc17ca 100644 (file)
--- a/src/cl_kernel.h
+++ b/src/cl_kernel.h
@@ -22,6 +22,7 @@
  
  #include "cl_defs.h"
  #include "cl_internals.h"
+#include "cl_driver.h"
  #include "CL/cl.h"
  
  #include <stdint.h>
@@ -34,8 +35,8 @@ struct GenKernel;
  struct _cl_kernel {
    uint64_t magic;                     /* To identify it as a kernel */
    volatile int ref_n;                 /* We reference count this object */
-  struct _drm_intel_bo *bo;           /* The code itself */
-  struct _drm_intel_bo *const_bo;     /* Buffer for all __constants values in the OCL program */
+  struct cl_buffer *bo;               /* The code itself */
+  struct cl_buffer *const_bo;         /* Buffer for all __constants values in the OCL program */
    cl_program program;                 /* Owns this structure (and pointers) */
    const struct GenKernel *gen_kernel; /* (Opaque) compiler structure for the OCL kernel */
    uint8_t ref_its_program;            /* True only for the user kernel (those created by clCreateKernel) */
diff --git a/src/cl_mem.c b/src/cl_mem.c

index a1aa53b..2d29b9d 100644 (file)
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -23,11 +23,7 @@
  #include "cl_utils.h"
  #include "cl_alloc.h"
  #include "cl_device_id.h"
-
-#include "intel/intel_driver.h"
-#include "intel/intel_gpgpu.h"
-#include "intel_bufmgr.h" /* libdrm_intel */
-#include "cl_buffer.h"
+#include "cl_driver.h"
  
  #include "CL/cl.h"
  #include "CL/cl_intel.h"
@@ -41,7 +37,7 @@ cl_mem_allocate(cl_context ctx,
                  cl_int is_tiled,
                  cl_int *errcode)
  {
-  drm_intel_bufmgr *bufmgr = NULL;
+  cl_buffer_mgr *bufmgr = NULL;
    cl_mem mem = NULL;
    cl_int err = CL_SUCCESS;
    size_t alignment = 64;
@@ -67,9 +63,9 @@ cl_mem_allocate(cl_context ctx,
      alignment = 4096;
  
    /* Allocate space in memory */
-  bufmgr = cl_context_get_intel_bufmgr(ctx);
+  bufmgr = cl_context_get_bufmgr(ctx);
    assert(bufmgr);
-  mem->bo = drm_intel_bo_alloc(bufmgr, "CL memory object", sz, alignment);
+  mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
    if (UNLIKELY(mem->bo == NULL)) {
      err = CL_MEM_ALLOCATION_FAILURE;
      goto error;
@@ -119,7 +115,7 @@ cl_mem_new(cl_context ctx,
  
    /* Copy the data if required */
    if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
-    drm_intel_bo_subdata(mem->bo, 0, sz, data);
+    cl_buffer_subdata(mem->bo, 0, sz, data);
  
  exit:
    if (errcode_ret)
@@ -141,8 +137,8 @@ cl_mem_copy_data_linear(cl_mem mem,
  {
    size_t x, y, p;
    char *dst;
-  drm_intel_bo_map(mem->bo, 1);
-  dst = drm_intel_bo_get_virtual(mem->bo);
+  cl_buffer_map(mem->bo, 1);
+  dst = cl_buffer_get_virtual(mem->bo);
    for (y = 0; y < h; ++y) {
      char *src = (char*) data + pitch * y;
      for (x = 0; x < w; ++x) {
@@ -152,7 +148,7 @@ cl_mem_copy_data_linear(cl_mem mem,
        src += bpp;
      }
    }
-  drm_intel_bo_unmap(mem->bo);
+  cl_buffer_unmap(mem->bo);
  }
  
  static const uint32_t tile_sz = 4096; /* 4KB per tile */
@@ -179,8 +175,8 @@ cl_mem_copy_data_tilex(cl_mem mem,
    char *img = NULL;
    char *end = (char*) data + pitch * h;
  
-  drm_intel_bo_map(mem->bo, 1);
-  img = drm_intel_bo_get_virtual(mem->bo);
+  cl_buffer_map(mem->bo, 1);
+  img = cl_buffer_get_virtual(mem->bo);
    for (tiley = 0; tiley < tiley_n; ++tiley)
    for (tilex = 0; tilex < tilex_n; ++tilex) {
      char *tile = img + (tilex + tiley * tilex_n) * tile_sz;
@@ -193,7 +189,7 @@ cl_mem_copy_data_tilex(cl_mem mem,
        }
      }
    }
-  drm_intel_bo_unmap(mem->bo);
+  cl_buffer_unmap(mem->bo);
  }
  
  static void
@@ -214,8 +210,8 @@ cl_mem_copy_data_tiley(cl_mem mem,
    char *img = NULL;
    char *end = (char*) data + pitch * h;
  
-  drm_intel_bo_map(mem->bo, 1);
-  img = drm_intel_bo_get_virtual(mem->bo);
+  cl_buffer_map(mem->bo, 1);
+  img = cl_buffer_get_virtual(mem->bo);
    for (tiley = 0; tiley < tiley_n; ++tiley)
    for (tilex = 0; tilex < tilex_n; ++tilex) {
      char *tile = img + (tiley * tilex_n + tilex) * tile_sz;
@@ -229,7 +225,7 @@ cl_mem_copy_data_tiley(cl_mem mem,
        }
      }
    }
-  drm_intel_bo_unmap(mem->bo);
+  cl_buffer_unmap(mem->bo);
  }
  
  LOCAL cl_mem
@@ -279,7 +275,7 @@ cl_mem_new_image2D(cl_context ctx,
  #undef DO_IMAGE_ERROR
  
    /* Pick up tiling mode (we do only linear on SNB) */
-  if (ctx->intel_drv->gen_ver != 6)
+  if (cl_driver_get_ver(ctx->drv) != 6)
      tiling = CL_TILE_Y;
  
    /* Tiling requires to align both pitch and height */
@@ -336,7 +332,7 @@ cl_mem_delete(cl_mem mem)
    if (atomic_dec(&mem->ref_n) > 1)
      return;
    if (LIKELY(mem->bo != NULL))
-    drm_intel_bo_unreference(mem->bo);
+    cl_buffer_unreference(mem->bo);
  
    /* Remove it from the list */
    assert(mem->ctx);
@@ -363,15 +359,15 @@ cl_mem_add_ref(cl_mem mem)
  LOCAL void*
  cl_mem_map(cl_mem mem)
  {
-  drm_intel_bo_map(mem->bo, 1);
-  assert(drm_intel_bo_get_virtual(mem->bo));
-  return drm_intel_bo_get_virtual(mem->bo);
+  cl_buffer_map(mem->bo, 1);
+  assert(cl_buffer_get_virtual(mem->bo));
+  return cl_buffer_get_virtual(mem->bo);
  }
  
  LOCAL cl_int
  cl_mem_unmap(cl_mem mem)
  {
-  drm_intel_bo_unmap(mem->bo);
+  cl_buffer_unmap(mem->bo);
    return CL_SUCCESS;
  }
  
@@ -381,7 +377,7 @@ cl_mem_pin(cl_mem mem)
    assert(mem);
    if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0))
      return CL_INVALID_MEM;
-  drm_intel_bo_pin(mem->bo, 4096);
+  cl_buffer_pin(mem->bo, 4096);
    return CL_SUCCESS;
  }
  
@@ -391,7 +387,7 @@ cl_mem_unpin(cl_mem mem)
    assert(mem);
    if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0))
      return CL_INVALID_MEM;
-  drm_intel_bo_unpin(mem->bo);
+  cl_buffer_unpin(mem->bo);
    return CL_SUCCESS;
  }
  
diff --git a/src/cl_mem.h b/src/cl_mem.h

index 1177a76..00dcd93 100644 (file)
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -21,11 +21,9 @@
  #define __CL_MEM_H__
  
  #include "cl_internals.h"
+#include "cl_driver.h"
  #include "CL/cl.h"
  
-/* Store the object in video memory */
-struct _drm_intel_bo;
-
  typedef enum cl_image_tiling {
    CL_NO_TILE = 0,
    CL_TILE_X  = 1,
@@ -36,7 +34,7 @@ typedef enum cl_image_tiling {
  struct _cl_mem {
    uint64_t magic;           /* To identify it as a memory object */
    volatile int ref_n;       /* This object is reference counted */
-  struct _drm_intel_bo *bo; /* Data in GPU memory */
+  cl_buffer *bo;            /* Data in GPU memory */
    cl_mem prev, next;        /* We chain the memory buffers together */
    cl_context ctx;           /* Context it belongs to */
    cl_mem_flags flags;       /* Flags specified at the creation time */
@@ -67,10 +65,10 @@ extern void cl_mem_delete(cl_mem);
  /* Add one more reference to this object */
  extern void cl_mem_add_ref(cl_mem);
  
-/* Directly map a memory object (just use drm_intel_bo_map) */
+/* Directly map a memory object */
  extern void *cl_mem_map(cl_mem);
  
-/* Unmap a memory object (just use drm_intel_bo_unmap) */
+/* Unmap a memory object */
  extern cl_int cl_mem_unmap(cl_mem);
  
  /* Pin/unpin the buffer in memory (you must be root) */
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c

index f151d92..def75f1 100644 (file)
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -18,6 +18,7 @@
   */
  
  #include "intel_driver.h"
+#include "intel_gpgpu.h"
  #include "intel_batchbuffer.h"
  #include "x11/dricommon.h"
  
@@ -32,6 +33,7 @@
  
  #include "cl_utils.h"
  #include "cl_alloc.h"
+#include "cl_driver.h"
  #include "cl_genx_driver.h"
  
  #define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
@@ -59,7 +61,6 @@
    RESTORE_BLOCKED_SIGSET(DRIVER);                             \
  } while (0)
  
-
  LOCAL intel_driver_t*
  intel_driver_new(void)
  {
@@ -340,18 +341,27 @@ intel_driver_get_ver(struct intel_driver *drv)
    return drv->gen_ver;
  }
  
+LOCAL uint32_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
+LOCAL void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
+
  LOCAL void
  intel_setup_callbacks(void)
  {
+  cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
+  cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
+  cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+  cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
    cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
+  cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
    cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
    cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
    cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
+  cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_unmap;
    cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
    cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
    cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
    cl_buffer_emit_reloc = (cl_buffer_emit_reloc_cb *) drm_intel_bo_emit_reloc;
-  cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
-  cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+  cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
+  intel_set_gpgpu_callbacks();
  }
  
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c

index 7f01f61..a82ba6e 100644 (file)
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -58,7 +58,7 @@ struct intel_gpgpu
  {
    intel_driver_t *drv;
    intel_batchbuffer_t *batch;
-  genx_gpgpu_kernel_t *ker;
+  cl_gpgpu_kernel_t *ker;
  
    struct {
      dri_bo *bo;
@@ -78,26 +78,9 @@ struct intel_gpgpu
    uint32_t max_threads;      /* max threads requested by the user */
  };
  
-LOCAL intel_gpgpu_t*
-intel_gpgpu_new(intel_driver_t *drv)
-{
-  intel_gpgpu_t *state = NULL;
+typedef struct intel_gpgpu intel_gpgpu_t;
  
-  TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t));
-  state->drv = drv;
-  state->batch = intel_batchbuffer_new(state->drv);
-  assert(state->batch);
-  intel_batchbuffer_init(state->batch, state->drv);
-
-exit:
-  return state;
-error:
-  intel_gpgpu_delete(state);
-  state = NULL;
-  goto exit;
-}
-
-LOCAL void
+static void
  intel_gpgpu_delete(intel_gpgpu_t *state)
  {
    if (state == NULL)
@@ -118,8 +101,27 @@ intel_gpgpu_delete(intel_gpgpu_t *state)
    cl_free(state);
  }
  
+static intel_gpgpu_t*
+intel_gpgpu_new(intel_driver_t *drv)
+{
+  intel_gpgpu_t *state = NULL;
+
+  TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t));
+  state->drv = drv;
+  state->batch = intel_batchbuffer_new(state->drv);
+  assert(state->batch);
+  intel_batchbuffer_init(state->batch, state->drv);
+
+exit:
+  return state;
+error:
+  intel_gpgpu_delete(state);
+  state = NULL;
+  goto exit;
+}
+
  static void
-gpgpu_select_pipeline(intel_gpgpu_t *state)
+intel_gpgpu_select_pipeline(intel_gpgpu_t *state)
  {
    BEGIN_BATCH(state->batch, 1);
    OUT_BATCH(state->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
@@ -127,7 +129,7 @@ gpgpu_select_pipeline(intel_gpgpu_t *state)
  }
  
  static void
-gpgpu_set_base_address(intel_gpgpu_t *state)
+intel_gpgpu_set_base_address(intel_gpgpu_t *state)
  {
    const uint32_t def_cc = cc_llc_l3; /* default Cache Control value */
    BEGIN_BATCH(state->batch, 10);
@@ -162,7 +164,7 @@ gpgpu_set_base_address(intel_gpgpu_t *state)
  }
  
  static void
-gpgpu_load_vfe_state(intel_gpgpu_t *state)
+intel_gpgpu_load_vfe_state(intel_gpgpu_t *state)
  {
    BEGIN_BATCH(state->batch, 8);
    OUT_BATCH(state->batch, CMD_MEDIA_STATE_POINTERS | (8-2));
@@ -171,23 +173,13 @@ gpgpu_load_vfe_state(intel_gpgpu_t *state)
      intel_batchbuffer_alloc_space(state->batch,0);
  
    memset(vfe, 0, sizeof(struct gen6_vfe_state_inline));
-  vfe->vfe1.gpgpu_mode =
-    (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) ? 1 : 0;
+  vfe->vfe1.gpgpu_mode = 1;
    vfe->vfe1.bypass_gateway_ctl = 1;
    vfe->vfe1.reset_gateway_timer = 1;
    vfe->vfe1.max_threads = state->max_threads - 1;
    vfe->vfe1.urb_entries = 64;
-  if (state->drv->gen_ver >= 7) {
-    vfe->vfe3.curbe_size = 480;
-    vfe->vfe4.scoreboard_mask = 0;
-  } else {
-    vfe->vfe3.curbe_size = 63;
-    vfe->vfe3.urb_size = 13;
-    vfe->vfe4.scoreboard_mask = 0x80000000;
-  }
-
-  //M
-  //p
+  vfe->vfe3.curbe_size = 480;
+  vfe->vfe4.scoreboard_mask = 0;
    //vfe->vfe3.urb_size = 13;
    //vfe->vfe4.scoreboard_mask = (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) ? 0 : 0x80000000;
    intel_batchbuffer_alloc_space(state->batch, sizeof(gen6_vfe_state_inline_t));
@@ -195,7 +187,7 @@ gpgpu_load_vfe_state(intel_gpgpu_t *state)
  }
  
  static void
-gpgpu_load_constant_buffer(intel_gpgpu_t *state) 
+intel_gpgpu_load_constant_buffer(intel_gpgpu_t *state) 
  {
    BEGIN_BATCH(state->batch, 4);
    OUT_BATCH(state->batch, CMD(2,0,1) | (4 - 2));  /* length-2 */
@@ -213,7 +205,7 @@ gpgpu_load_constant_buffer(intel_gpgpu_t *state)
  }
  
  static void
-gpgpu_load_idrt(intel_gpgpu_t *state) 
+intel_gpgpu_load_idrt(intel_gpgpu_t *state) 
  {
    BEGIN_BATCH(state->batch, 4);
    OUT_BATCH(state->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
@@ -311,7 +303,7 @@ enum GFX3DSTATE_PIPELINED_SUBOPCODE
  };
  
  static void
-gpgpu_pipe_control(intel_gpgpu_t *state)
+intel_gpgpu_pipe_control(intel_gpgpu_t *state)
  {
    BEGIN_BATCH(state->batch, SIZEOF32(gen6_pipe_control_t));
    gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
@@ -328,7 +320,7 @@ gpgpu_pipe_control(intel_gpgpu_t *state)
    ADVANCE_BATCH(state->batch);
  }
  
-LOCAL void
+static void
  intel_gpgpu_set_L3(intel_gpgpu_t *state, uint32_t use_barrier)
  {
    BEGIN_BATCH(state->batch, 6);
@@ -347,21 +339,21 @@ intel_gpgpu_set_L3(intel_gpgpu_t *state, uint32_t use_barrier)
      OUT_BATCH(state->batch, gpgpu_l3_config_reg2[4]);
    ADVANCE_BATCH(state->batch);
  
-  gpgpu_pipe_control(state);
+  intel_gpgpu_pipe_control(state);
  }
  
-LOCAL void
-gpgpu_batch_start(intel_gpgpu_t *state)
+static void
+intel_gpgpu_batch_start(intel_gpgpu_t *state)
  {
    intel_batchbuffer_start_atomic(state->batch, 256);
-  gpgpu_pipe_control(state);
+  intel_gpgpu_pipe_control(state);
    if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
      intel_gpgpu_set_L3(state, state->ker->use_barrier);
-  gpgpu_select_pipeline(state);
-  gpgpu_set_base_address(state);
-  gpgpu_load_vfe_state(state);
-  gpgpu_load_constant_buffer(state);
-  gpgpu_load_idrt(state);
+  intel_gpgpu_select_pipeline(state);
+  intel_gpgpu_set_base_address(state);
+  intel_gpgpu_load_vfe_state(state);
+  intel_gpgpu_load_constant_buffer(state);
+  intel_gpgpu_load_idrt(state);
  
    if (state->perf_b.bo) {
      BEGIN_BATCH(state->batch, 3);
@@ -378,8 +370,8 @@ gpgpu_batch_start(intel_gpgpu_t *state)
    }
  }
  
-LOCAL void
-gpgpu_batch_end(intel_gpgpu_t *state, int32_t flush_mode)
+static void
+intel_gpgpu_batch_end(intel_gpgpu_t *state, int32_t flush_mode)
  {
    /* Insert the performance counter command */
    if (state->perf_b.bo) {
@@ -396,26 +388,26 @@ gpgpu_batch_end(intel_gpgpu_t *state, int32_t flush_mode)
      ADVANCE_BATCH(state->batch);
    }
  
-  if(flush_mode) gpgpu_pipe_control(state);
+  if(flush_mode) intel_gpgpu_pipe_control(state);
    intel_batchbuffer_end_atomic(state->batch);
  }
  
-LOCAL void
-gpgpu_batch_reset(intel_gpgpu_t *state, size_t sz)
+static void
+intel_gpgpu_batch_reset(intel_gpgpu_t *state, size_t sz)
  {
    intel_batchbuffer_reset(state->batch, sz);
  }
  
-LOCAL void
-gpgpu_flush(intel_gpgpu_t *state)
+static void
+intel_gpgpu_flush(intel_gpgpu_t *state)
  {
    intel_batchbuffer_flush(state->batch);
  }
  
-LOCAL void
-gpgpu_state_init(intel_gpgpu_t *state,
-                 uint32_t max_threads,
-                 uint32_t size_cs_entry)
+static void
+intel_gpgpu_state_init(intel_gpgpu_t *state,
+                       uint32_t max_threads,
+                       uint32_t size_cs_entry)
  {
    dri_bo *bo;
  
@@ -477,7 +469,7 @@ gpgpu_state_init(intel_gpgpu_t *state,
  }
  
  static void
-gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo)
+intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo)
  {
    surface_heap_t *heap = state->surface_heap_b.bo->virtual;
    heap->binding_table[index] = offsetof(surface_heap_t, surface) +
@@ -492,11 +484,11 @@ gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo)
  }
  
  static void
-gpgpu_bind_buf_gen7(intel_gpgpu_t *state,
-                    int32_t index,
-                    dri_bo* obj_bo,
-                    uint32_t size,
-                    uint32_t cchint)
+intel_gpgpu_bind_buf_gen7(intel_gpgpu_t *state,
+                          int32_t index,
+                          dri_bo* obj_bo,
+                          uint32_t size,
+                          uint32_t cchint)
  {
    surface_heap_t *heap = state->surface_heap_b.bo->virtual;
    gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
@@ -509,18 +501,18 @@ gpgpu_bind_buf_gen7(intel_gpgpu_t *state,
    ss->ss2.height = (size_ss & 0x1fff80) >> 7;    /* bits 20:7 of size_ss */
    ss->ss3.depth  = (size_ss & 0xffe00000) >> 20; /* bits 27:21 of size_ss */
    ss->ss5.cache_control = cc_llc_l3;
-  gpgpu_set_buf_reloc_gen7(state, index, obj_bo);
+  intel_gpgpu_set_buf_reloc_gen7(state, index, obj_bo);
  }
  
  static void
-gpgpu_bind_image2D_gen7(intel_gpgpu_t *state,
-                        int32_t index,
-                        dri_bo* obj_bo,
-                        uint32_t format,
-                        int32_t w,
-                        int32_t h,
-                        int32_t pitch,
-                        int32_t tiling)
+intel_gpgpu_bind_image2D_gen7(intel_gpgpu_t *state,
+                              int32_t index,
+                              dri_bo* obj_bo,
+                              uint32_t format,
+                              int32_t w,
+                              int32_t h,
+                              int32_t pitch,
+                              int32_t tiling)
  {
    surface_heap_t *heap = state->surface_heap_b.bo->virtual;
    gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
@@ -539,48 +531,47 @@ gpgpu_bind_image2D_gen7(intel_gpgpu_t *state,
      ss->ss0.tiled_surface = 1;
      ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
    }
-  gpgpu_set_buf_reloc_gen7(state, index, obj_bo);
+  intel_gpgpu_set_buf_reloc_gen7(state, index, obj_bo);
  }
  
-LOCAL void
-gpgpu_bind_buf(intel_gpgpu_t *state,
-               int32_t index,
-               dri_bo* obj_bo,
-               uint32_t cchint)
+static void
+intel_gpgpu_bind_buf(intel_gpgpu_t *state,
+                     int32_t index,
+                     drm_intel_bo *obj_bo,
+                     uint32_t cchint)
  {
-  uint32_t size = obj_bo->size;
-
+  const uint32_t size = obj_bo->size;
    assert(index < MAX_SURFACES);
    if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
-    gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint);
+    intel_gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint);
    else
      NOT_IMPLEMENTED;
  }
  
-LOCAL void
-gpgpu_bind_image2D(intel_gpgpu_t *state,
-                   int32_t index,
-                   dri_bo* obj_bo,
-                   uint32_t format,
-                   int32_t w,
-                   int32_t h,
-                   int32_t pitch,
-                   gpgpu_tiling_t tiling)
+static void
+intel_gpgpu_bind_image2D(intel_gpgpu_t *state,
+                         int32_t index,
+                         cl_buffer *obj_bo,
+                         uint32_t format,
+                         int32_t w,
+                         int32_t h,
+                         int32_t pitch,
+                         cl_gpgpu_tiling_t tiling)
  {
    assert(index < MAX_SURFACES);
    if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
-    gpgpu_bind_image2D_gen7(state, index, obj_bo, format, w, h, pitch, tiling);
+    intel_gpgpu_bind_image2D_gen7(state, index, (drm_intel_bo*) obj_bo, format, w, h, pitch, tiling);
    else
      NOT_IMPLEMENTED;
  }
  
  static void
-gpgpu_build_idrt(intel_gpgpu_t *state,
-                 genx_gpgpu_kernel_t *kernel,
-                 uint32_t ker_n)
+intel_gpgpu_build_idrt(intel_gpgpu_t *state,
+                       cl_gpgpu_kernel_t *kernel,
+                       uint32_t ker_n)
  {
    gen6_interface_descriptor_t *desc;
-  dri_bo *bo;
+  drm_intel_bo *bo = NULL, *ker_bo = NULL;
    uint32_t i;
  
    bo = state->idrt_b.bo;
@@ -590,7 +581,8 @@ gpgpu_build_idrt(intel_gpgpu_t *state,
  
    for (i = 0; i < ker_n; i++) {
      memset(desc, 0, sizeof(*desc));
-    desc->desc0.kernel_start_pointer = kernel[i].bo->offset >> 6; /* reloc */
+    ker_bo = (drm_intel_bo *) kernel[i].bo;
+    desc->desc0.kernel_start_pointer = ker_bo->offset >> 6; /* reloc */
      desc->desc2.sampler_state_pointer = state->sampler_state_b.bo->offset >> 5;
      desc->desc3.binding_table_entry_count = 0; /* no prefetch */
      desc->desc3.binding_table_pointer = 0;
@@ -624,7 +616,7 @@ gpgpu_build_idrt(intel_gpgpu_t *state,
                        I915_GEM_DOMAIN_INSTRUCTION, 0,
                        0,
                        i * sizeof(*desc) + offsetof(gen6_interface_descriptor_t, desc0),
-                      kernel[i].bo);
+                      ker_bo);
  
      dri_bo_emit_reloc(bo,
                        I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -637,8 +629,8 @@ gpgpu_build_idrt(intel_gpgpu_t *state,
    dri_bo_unmap(bo);
  }
  
-LOCAL void
-gpgpu_upload_constants(intel_gpgpu_t *state, const void* data, uint32_t size)
+static void
+intel_gpgpu_upload_constants(intel_gpgpu_t *state, const void* data, uint32_t size)
  {
    unsigned char *constant_buffer = NULL;
  
@@ -649,8 +641,8 @@ gpgpu_upload_constants(intel_gpgpu_t *state, const void* data, uint32_t size)
    dri_bo_unmap(state->curbe_b.bo);
  }
  
-LOCAL void
-gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n)
+static void
+intel_gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n)
  {
    if (n) {
      /*sizeof(gen6_sampler_state_t) == sizeof(gen7_surface_state_t) */
@@ -659,17 +651,17 @@ gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n)
    }
  }
  
-LOCAL void
-gpgpu_states_setup(intel_gpgpu_t *state, genx_gpgpu_kernel_t *kernel, uint32_t ker_n)
+static void
+intel_gpgpu_states_setup(intel_gpgpu_t *state, cl_gpgpu_kernel_t *kernel, uint32_t ker_n)
  {
    state->ker = kernel;
-  gpgpu_build_idrt(state, kernel, ker_n);
+  intel_gpgpu_build_idrt(state, kernel, ker_n);
    dri_bo_unmap(state->surface_heap_b.bo);
    dri_bo_unmap(state->sampler_state_b.bo);
  }
  
-LOCAL void 
-gpgpu_update_barrier(intel_gpgpu_t *state, uint32_t barrierID, uint32_t thread_n)
+static void 
+intel_gpgpu_update_barrier(intel_gpgpu_t *state, uint32_t barrierID, uint32_t thread_n)
  {
    BEGIN_BATCH(state->batch, 4);
    OUT_BATCH(state->batch, CMD_MEDIA_STATE_FLUSH | 0);
@@ -679,52 +671,22 @@ gpgpu_update_barrier(intel_gpgpu_t *state, uint32_t barrierID, uint32_t thread_n
    ADVANCE_BATCH(state->batch);
  }
  
-LOCAL void
-gpgpu_set_perf_counters(intel_gpgpu_t *state, dri_bo *perf)
+static void
+intel_gpgpu_set_perf_counters(intel_gpgpu_t *state, cl_buffer *perf)
  {
    if (state->perf_b.bo)
      drm_intel_bo_unreference(state->perf_b.bo);
-  drm_intel_bo_reference(perf);
-  state->perf_b.bo = perf;
-}
-
-LOCAL void
-gpgpu_run(intel_gpgpu_t *state, int32_t ki)
-{
-  BEGIN_BATCH(state->batch, 6);
-  OUT_BATCH(state->batch, GEN_CMD_MEDIA_OBJECT | 5);
-  OUT_BATCH(state->batch, ki); /* kernel index + DBGOBJ=0 << 8 */
-  OUT_BATCH(state->batch, 0);  /* no children, no thread sync, no scoreboard, any half-slice, indirect data size = 0 */
-  OUT_BATCH(state->batch, 0);  /* no indirect data ptr */
-  OUT_BATCH(state->batch, 0);  /* scoreboard Y,X = 0 or just 0 in reg.0 */
-  OUT_BATCH(state->batch, 0);  /* scoreboard color, mask = 0  or just 0 in reg.1 */
-  OUT_BATCH(state->batch, 0);  /* scoreboard color, mask = 0  or just 0 in reg.1 */
-  ADVANCE_BATCH(state->batch);
-}
-
-LOCAL char*
-gpgpu_run_with_inline(intel_gpgpu_t *state, int32_t ki, size_t sz)
-{
-  const uint32_t len = (uint32_t) (sz >> 2);
-
-  assert(sz % sizeof(int32_t) == 0);
-  BEGIN_BATCH(state->batch, len + 6);
-  OUT_BATCH(state->batch, GEN_CMD_MEDIA_OBJECT | (len + 4));
-  OUT_BATCH(state->batch, ki); /* kernel index + DBGOBJ=0 << 8 */
-  OUT_BATCH(state->batch, 0);  /* no children, no thread sync, no scoreboard, any half-slice, indirect data size = 0 */
-  OUT_BATCH(state->batch, 0);  /* no indirect data ptr */
-  OUT_BATCH(state->batch, 0);  /* scoreboard Y,X = 0 or just 0 in reg.0 */
-  OUT_BATCH(state->batch, 0);  /* scoreboard color, mask = 0  or just 0 in reg.1 */
-  return (char*) intel_batchbuffer_alloc_space(state->batch,sz);
+  drm_intel_bo_reference((drm_intel_bo*) perf);
+  state->perf_b.bo = (drm_intel_bo*) perf;
  }
  
-LOCAL void
-gpgpu_walker(intel_gpgpu_t *state,
-             uint32_t simd_sz,
-             uint32_t thread_n,
-             const size_t global_wk_off[3],
-             const size_t global_wk_sz[3],
-             const size_t local_wk_sz[3])
+static void
+intel_gpgpu_walker(intel_gpgpu_t *state,
+                   uint32_t simd_sz,
+                   uint32_t thread_n,
+                   const size_t global_wk_off[3],
+                   const size_t global_wk_sz[3],
+                   const size_t local_wk_sz[3])
  {
    const uint32_t global_wk_dim[3] = {
      global_wk_sz[0] / local_wk_sz[0],
@@ -755,17 +717,23 @@ gpgpu_walker(intel_gpgpu_t *state,
    ADVANCE_BATCH(state->batch);
  }
  
-LOCAL int32_t
-intel_gpgpu_version(intel_gpgpu_t *gpgpu)
-{
-  return gpgpu->drv->gen_ver;
+LOCAL void
+intel_set_gpgpu_callbacks(void)
+{
+  cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new;
+  cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete;
+  cl_gpgpu_bind_image2D = (cl_gpgpu_bind_image2D_cb *) intel_gpgpu_bind_image2D;
+  cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf;
+  cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
+  cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
+  cl_gpgpu_upload_constants = (cl_gpgpu_upload_constants_cb *) intel_gpgpu_upload_constants;
+  cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
+  cl_gpgpu_update_barrier = (cl_gpgpu_update_barrier_cb *) intel_gpgpu_update_barrier;
+  cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
+  cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
+  cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start;
+  cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
+  cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
+  cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker;
  }
  
-uint32_t drm_intel_bo_get_size(drm_intel_bo *bo)
-{
-    return (bo->size);
-}
-void *drm_intel_bo_get_virtual(drm_intel_bo *bo)
-{
-    return (bo->virtual);
-}
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h

index e3b8dfd..006a55b 100644 (file)
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -21,126 +21,13 @@
  #define __GENX_GPGPU_H__
  
  #include "cl_utils.h"
-#include "intel_bufmgr.h"
+#include "cl_driver.h"
  
  #include <stdlib.h>
  #include <stdint.h>
  
-#define MAX_SURFACES 128
-#define MAX_SAMPLERS 16
-
-enum gen6_cache_control {
-  cc_gtt      = 0x0,
-  cc_l3       = 0x1,
-  cc_llc      = 0x2,
-  cc_llc_l3   = 0x3
-};
-
-/* Use this structure to bind kernels in the gpgpu state */
-typedef struct genx_gpgpu_kernel {
-  const char *name;        /* kernel name and bo name */
-  uint32_t grf_blocks;     /* register blocks kernel wants (in 8 reg blocks) */
-  uint32_t cst_sz;         /* total size of all constants */
-  struct _drm_intel_bo *bo;/* kernel code in the proper addr space */
-  int32_t barrierID;       /* barrierID for _this_ kernel */
-  uint32_t use_barrier:1;  /* For gen7 (automatic barrier management) */
-  uint32_t thread_n:15;    /* For gen7 (automatic barrier management) */
-  uint32_t slm_sz:16;      /* For gen7 (automatic SLM allocation) */
-} genx_gpgpu_kernel_t;
-
-/* Convenient abstraction of the device */
-struct intel_driver;
-
-/* Covenient way to talk to the device */
-typedef struct intel_gpgpu intel_gpgpu_t;
-
-/* Buffer object as exposed by drm_intel */
-struct _drm_intel_bo;
-
-/* Allocate and initialize a GPGPU state */
-extern intel_gpgpu_t* intel_gpgpu_new(struct intel_driver*);
-
-/* Destroy and deallocate a GPGPU state */
-extern void intel_gpgpu_delete(intel_gpgpu_t*);
-
-/* Get the device generation */
-extern int32_t intel_gpgpu_version(intel_gpgpu_t*);
-
-/* Buffer management wrapper APIs */
-extern uint32_t drm_intel_bo_get_size(drm_intel_bo *bo);
-extern void *drm_intel_bo_get_virtual(drm_intel_bo *bo);
-
-/* Set typeless buffer descriptor in the current binding table */
-extern void gpgpu_bind_buf(intel_gpgpu_t*,
-                           int32_t index,
-                           struct _drm_intel_bo* obj_bo,
-                           uint32_t cchint);
-
-typedef enum gpgpu_tiling {
-  GPGPU_NO_TILE = 0,
-  GPGPU_TILE_X  = 1,
-  GPGPU_TILE_Y  = 2,
-} gpgpu_tiling_t;
-
-/* Set a 2d texture */
-extern void gpgpu_bind_image2D(intel_gpgpu_t *state,
-                               int32_t index,
-                               struct _drm_intel_bo* obj_bo,
-                               uint32_t format,
-                               int32_t w,
-                               int32_t h,
-                               int pitch,
-                               gpgpu_tiling_t tiling);
-
-/* Configure state, size in 512-bit units */
-extern void gpgpu_state_init(intel_gpgpu_t*, uint32_t max_threads, uint32_t size_cs_entry);
-
-/* Set the buffer object where to report performance counters */
-extern void gpgpu_set_perf_counters(intel_gpgpu_t*, struct _drm_intel_bo *perf);
-
-/* Fills current constant buffer with data */
-extern void gpgpu_upload_constants(intel_gpgpu_t*, const void* data, uint32_t size);
-
-/* Setup all indirect states */
-extern void gpgpu_states_setup(intel_gpgpu_t*, genx_gpgpu_kernel_t* kernel, uint32_t ker_n);
-
-/* Make HW threads use barrierID */
-extern void gpgpu_update_barrier(intel_gpgpu_t*, uint32_t barrierID, uint32_t thread_n);
-
-/* Upload the constant samplers as specified inside the OCL kernel */
-extern void gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n);
-
-/* Set a sampler */
-extern void gpgpu_set_sampler(intel_gpgpu_t*, uint32_t index, uint32_t non_normalized);
-
-/* Allocate the batch buffer and return the BO used for the batch buffer */
-extern void gpgpu_batch_reset(intel_gpgpu_t*, size_t sz);
-
-/* Atomic begin, pipeline select, urb, pipeline state and constant buffer */
-extern void gpgpu_batch_start(intel_gpgpu_t*);
-
-/* atomic end with possibly inserted flush */
-extern void gpgpu_batch_end(intel_gpgpu_t*, int32_t flush_mode);
-
-/* Emit MI_FLUSH */
-extern void gpgpu_flush(intel_gpgpu_t*);
-
-/* Enqueue a MEDIA object with no inline data */
-extern void gpgpu_run(intel_gpgpu_t*, int32_t ki);
-
-/* Enqueue a MEDIA object with inline data to push afterward. Returns the
- * pointer where to push. sz is the size of the data we are going to pass
- */
-extern char* gpgpu_run_with_inline(intel_gpgpu_t*, int32_t ki, size_t sz);
-
-/* Will spawn all threads */
-extern void
-gpgpu_walker(intel_gpgpu_t *state,
-             uint32_t simd_sz,
-             uint32_t thread_n,
-             const size_t global_wk_off[3],
-             const size_t global_wk_sz[3],
-             const size_t local_wk_sz[3]);
+/* Set the gpgpu related call backs */
+extern void intel_set_gpgpu_callbacks(void);
  
  #endif /* __GENX_GPGPU_H__ */
  
diff --git a/src/sim/sim_buffer.c b/src/sim/sim_driver.c

similarity index 94%

rename from src/sim/sim_buffer.c

rename to src/sim/sim_driver.c

index 820c770..5e5cfad 100644 (file)
--- a/src/sim/sim_buffer.c
+++ b/src/sim/sim_driver.c
@@ -19,13 +19,13 @@
  
  #include "cl_utils.h"
  #include "cl_alloc.h"
-#include "sim/sim_buffer.h"
+#include "sim/sim_driver.h"
  #include "CL/cl.h"
  #include <stdlib.h>
  #include <assert.h>
  #include <string.h>
  
-#include "cl_buffer.h"
+#include "cl_driver.h"
  
  /* Just to count allocations */
  typedef struct sim_bufmgr { volatile int buf_n; } sim_bufmgr_t;
@@ -81,13 +81,6 @@ sim_buffer_unreference(sim_buffer_t *buf)
    sim_buffer_delete(buf);
  }
  
-static void*
-sim_buffer_map(sim_buffer_t *buf)
-{
-  assert(buf);
-  return buf->data;
-}
-
  static int
  sim_buffer_subdata(sim_buffer_t *buf, unsigned long offset, unsigned long size, const void *data)
  {
@@ -107,8 +100,10 @@ sim_buffer_emit_reloc(sim_buffer_t *buf,
  {
    return 1;
  }
+
+static int sim_buffer_map(sim_buffer_t *buf, uint32_t write_enable) {return 0;}
  static int sim_buffer_unmap(sim_buffer_t *buf) {return 0;}
-static int sim_buffer_pin(sim_buffer_t *buf) {return 0;}
+static int sim_buffer_pin(sim_buffer_t *buf, uint32_t alignment) {return 0;}
  static int sim_buffer_unpin(sim_buffer_t *buf) {return 0;}
  
  LOCAL void
diff --git a/src/sim/sim_buffer.h b/src/sim/sim_driver.h

similarity index 91%

rename from src/sim/sim_buffer.h

rename to src/sim/sim_driver.h

index 230d89c..f80f234 100644 (file)
--- a/src/sim/sim_buffer.h
+++ b/src/sim/sim_driver.h
@@ -17,11 +17,12 @@
   * Author: Benjamin Segovia <benjamin.segovia@intel.com>
   */
  
-#ifndef __SIM_BUFFER_H__
-#define __SIM_BUFFER_H__
+#ifndef __SIM_DRIVER_H__
+#define __SIM_DRIVER_H__
  
  /* Setup the call backs when using the simulator */
  extern void sim_setup_callbacks(void);
  
-#endif /* __SIM_BUFFER_H__ */
+#endif /* __SIM_DRIVER_H__ */
+
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Thu, 29 Mar 2012 17:26:36 +0000 (17:26 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:15:54 +0000 (16:15 -0700)
src/CMakeLists.txt		patch \| blob \| history
src/cl_command_queue.c		patch \| blob \| history
src/cl_command_queue.h		patch \| blob \| history
src/cl_command_queue_gen7.c		patch \| blob \| history
src/cl_context.c		patch \| blob \| history
src/cl_context.h		patch \| blob \| history
src/cl_driver.c		patch \| blob \| history
src/cl_driver.cpp		patch \| blob \| history
src/cl_driver.h		patch \| blob \| history
src/cl_image.c		patch \| blob \| history
src/cl_kernel.c		patch \| blob \| history
src/cl_kernel.h		patch \| blob \| history
src/cl_mem.c		patch \| blob \| history
src/cl_mem.h		patch \| blob \| history
src/intel/intel_driver.c		patch \| blob \| history
src/intel/intel_gpgpu.c		patch \| blob \| history
src/intel/intel_gpgpu.h		patch \| blob \| history
src/sim/sim_driver.c	[moved from src/sim/sim_buffer.c with 94% similarity]	patch \| blob \| history
src/sim/sim_driver.h	[moved from src/sim/sim_buffer.h with 91% similarity]	patch \| blob \| history