Plasma specific changes

author ojomer <devnull@localhost>

Sat, 11 Feb 2012 13:44:11 +0000 (13:44 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:15:14 +0000 (16:15 -0700)
author ojomer <devnull@localhost>
Sat, 11 Feb 2012 13:44:11 +0000 (13:44 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:14 +0000 (16:15 -0700)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index d7a10e3..7837dd1 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -64,7 +64,6 @@ ADD_EXECUTABLE(dct tests/dct.c)
  ADD_EXECUTABLE(binomialOption tests/binomialOption.c)
  ADD_EXECUTABLE(nbody tests/nbody.c)
  ADD_EXECUTABLE(svm_test tests/svm_test.c)
-ADD_EXECUTABLE(fast_mat_mul tests/fast_mat_mul.cpp)
  TARGET_LINK_LIBRARIES(test_copy_buffer cl_test m)
  TARGET_LINK_LIBRARIES(test_copy_image cl_test m)
  TARGET_LINK_LIBRARIES(test_enqueue_read cl_test m)
@@ -89,5 +88,3 @@ TARGET_LINK_LIBRARIES(dct cl_test m)
  TARGET_LINK_LIBRARIES(binomialOption cl_test m)
  TARGET_LINK_LIBRARIES(nbody cl_test m)
  TARGET_LINK_LIBRARIES(svm_test cl_test m)
-TARGET_LINK_LIBRARIES(fast_mat_mul cl_test m)
-
diff --git a/src/cl_api.c b/src/cl_api.c

index 3b0ef5a..248b2ef 100644 (file)
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -36,6 +36,12 @@
  #include <string.h>
  #include <assert.h>
  
+#ifdef _PLASMA
+#define PRINTF(x)    printf("CL API => cl%s\n", x)
+#else
+#define PRINTF(x)
+#endif
+
  cl_int
  clGetPlatformIDs(cl_uint          num_entries,
                   cl_platform_id * platforms,
@@ -94,6 +100,7 @@ clCreateContext(const cl_context_properties *  properties,
                  void *                         user_data,
                  cl_int *                       errcode_ret)
  {
+    PRINTF("CreateContext");
    return cl_create_context(properties,
                             num_devices,
                             devices,
@@ -126,6 +133,8 @@ error:
  cl_int
  clReleaseContext(cl_context context)
  {
+    PRINTF("ReleaseContext");
+    
    cl_int err = CL_SUCCESS;
    CHECK_CONTEXT (context);
    cl_context_delete(context);
@@ -150,6 +159,7 @@ clCreateCommandQueue(cl_context                   context,
                       cl_command_queue_properties  properties,
                       cl_int *                     errcode_ret)
  {
+    PRINTF("CreateCommandQueue");    
    cl_command_queue queue = NULL;
    cl_int err = CL_SUCCESS;
    CHECK_CONTEXT (context);
@@ -171,6 +181,7 @@ error:
  cl_int
  clReleaseCommandQueue(cl_command_queue command_queue)
  {
+    PRINTF("ReleaseCommandQueue");    
    cl_int err = CL_SUCCESS;
    CHECK_QUEUE (command_queue);
    cl_command_queue_delete(command_queue);
@@ -212,6 +223,7 @@ clCreateBuffer(cl_context    context,
                 void *        host_ptr,
                 cl_int *      errcode_ret)
  {
+    PRINTF("CreateBuffer");    
    cl_mem mem = NULL;
    cl_int err = CL_SUCCESS;
    CHECK_CONTEXT (context);
@@ -249,6 +261,7 @@ clCreateImage2D(cl_context              context,
                  void *                  host_ptr,
                  cl_int *                errcode_ret)
  {
+    PRINTF("CreateImage2D");    
    cl_mem mem = NULL;
    cl_int err = CL_SUCCESS;
    CHECK_CONTEXT (context);
@@ -431,6 +444,7 @@ clCreateProgramWithBinary(cl_context             context,
                            cl_int *               binary_status,
                            cl_int *               errcode_ret)
  {
+    PRINTF("CreateProgramWithBinary");    
    cl_program program = NULL;
    cl_int err = CL_SUCCESS;
  
@@ -461,6 +475,7 @@ error:
  cl_int
  clReleaseProgram(cl_program program)
  {
+    PRINTF("clReleaseProgram");
    cl_int err = CL_SUCCESS;
    CHECK_PROGRAM (program);
    cl_program_delete(program);
@@ -541,6 +556,7 @@ clCreateKernel(cl_program   program,
                 const char * kernel_name,
                 cl_int *     errcode_ret)
  {
+    PRINTF("CreateKernel");    
    cl_kernel kernel = NULL;
    cl_int err = CL_SUCCESS;
  
@@ -582,6 +598,7 @@ error:
  cl_int
  clReleaseKernel(cl_kernel kernel)
  {
+    PRINTF("clReleaseKernel");    
    cl_int err = CL_SUCCESS;
    CHECK_KERNEL(kernel);
    cl_kernel_delete(kernel);
@@ -964,6 +981,7 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
                         const cl_event *  event_wait_list,
                         cl_event *        event)
  {
+    PRINTF("EnqueueNDRangeKernel");
    size_t fixed_global_off[] = {0,0,0};
    size_t fixed_global_sz[] = {1,1,1};
    size_t fixed_local_sz[] = {16,1,1};
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c

index af3a822..9c1dab3 100644 (file)
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -26,8 +26,12 @@
  #include "cl_utils.h"
  #include "cl_alloc.h"
  
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
  #include "intel_bufmgr.h"
  #include "intel/intel_gpgpu.h"
+#endif
  
  #include <assert.h>
  #include <stdio.h>
@@ -165,7 +169,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
         */
        cl_kernel_copy_image_parameters(k, mem, index, curbe);
      } else
-      gpgpu_bind_buf(gpgpu, index, bo, bo->size, cc_llc_l3);
+      gpgpu_bind_buf(gpgpu, index, bo, cc_llc_l3);
    }
  
    /* Allocate the constant surface (if any) */
@@ -173,7 +177,6 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      assert(k->const_bo_index != MAX_SURFACES - 1);
      gpgpu_bind_buf(gpgpu, k->const_bo_index,
                     k->const_bo,
-                   k->const_bo->size,
                     cc_llc_l3);
    }
  
@@ -184,7 +187,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      index = k->patch.local_surf.offset / SURFACE_SZ;
      assert(index != MAX_SURFACES - 1);
      *local = drm_intel_bo_alloc(bufmgr, "CL local surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *local, sz, cc_llc_l3);
+    gpgpu_bind_buf(gpgpu, index, *local, cc_llc_l3);
    }
    else if (local)
      *local = NULL;
@@ -199,7 +202,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      index = k->patch.private_surf.offset / SURFACE_SZ;
      assert(index != MAX_SURFACES - 1);
      *priv = drm_intel_bo_alloc(bufmgr, "CL private surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *priv, sz, cc_llc_l3);
+    gpgpu_bind_buf(gpgpu, index, *priv, cc_llc_l3);
    }
    else if(priv)
      *priv = NULL;
@@ -214,14 +217,14 @@ cl_command_queue_bind_surface(cl_command_queue queue,
      assert(index != MAX_SURFACES - 1);
      index = k->patch.scratch.offset / SURFACE_SZ;
      *scratch = drm_intel_bo_alloc(bufmgr, "CL scratch surface", sz, 64);
-    gpgpu_bind_buf(gpgpu, index, *scratch, sz, cc_llc_l3);
+    gpgpu_bind_buf(gpgpu, index, *scratch, cc_llc_l3);
    }
    else if (scratch)
      *scratch = NULL;
  
    /* Now bind a bo used for synchronization */
    sync_bo = drm_intel_bo_alloc(bufmgr, "sync surface", 64, 64);
-  gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, 64, cc_llc_l3);
+  gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
    if (queue->last_batch != NULL)
      drm_intel_bo_unreference(queue->last_batch);
    queue->last_batch = sync_bo;
@@ -250,7 +253,7 @@ cl_command_queue_set_report_buffer(cl_command_queue queue, cl_mem mem)
      queue->perf = NULL;
    }
    if (mem != NULL) {
-    if (mem->bo->size < 1024) { /* 1K for the performance counters is enough */
+      if (drm_intel_bo_get_size(mem->bo) < 1024) { /* 1K for the performance counters is enough */
        err = CL_INVALID_BUFFER_SIZE;
        goto error;
      }
diff --git a/src/cl_command_queue_gen6.c b/src/cl_command_queue_gen6.c

index e811453..a08ff41 100644 (file)
--- a/src/cl_command_queue_gen6.c
+++ b/src/cl_command_queue_gen6.c
@@ -26,8 +26,12 @@
  #include "cl_utils.h"
  #include "cl_alloc.h"
  
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
  #include "intel_bufmgr.h"
  #include "intel/intel_gpgpu.h"
+#endif
  
  #include <assert.h>
  #include <stdio.h>
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c

index dce1c2c..35f7f5e 100644 (file)
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -26,8 +26,12 @@
  #include "cl_utils.h"
  #include "cl_alloc.h"
  
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
  #include "intel_bufmgr.h"
  #include "intel/intel_gpgpu.h"
+#endif
  
  #include <assert.h>
  #include <stdio.h>
@@ -36,7 +40,11 @@
  static INLINE size_t
  cl_kernel_compute_batch_sz(cl_kernel k)
  {
-  size_t sz = 256 + 32;
+#ifdef _PLASMA
+    size_t sz = 0x1000; // _PLASMA
+#else
+    size_t sz = 256 + 32;
+#endif
    return sz;
  }
  
@@ -106,8 +114,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
    /* Setup kernel */
    kernel.name = "OCL kernel";
    kernel.grf_blocks = 128;
-  kernel.bin = NULL,
-  kernel.size = 0,
+  kernel.bin = ker->kernel_heap; // _PLASMA ; NULL
+  kernel.size = ker->kernel_heap_sz; // _PLASMA ; 0
    kernel.bo = ker->bo;
    kernel.barrierID = 0;
    kernel.use_barrier = ker->patch.exec_env.has_barriers;
diff --git a/src/cl_device_id.c b/src/cl_device_id.c

index 3c66f70..f7492eb 100644 (file)
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -22,7 +22,11 @@
  #include "cl_internals.h"
  #include "cl_utils.h"
  #include "cl_defs.h"
+#ifdef _PLASMA
+#include "plasma/cl_device_data.h"
+#else
  #include "intel/cl_device_data.h"
+#endif
  #include "CL/cl.h"
  
  #include <assert.h>
diff --git a/src/cl_genx_driver.h b/src/cl_genx_driver.h

index 4fa35cb..fcfc0f8 100644 (file)
--- a/src/cl_genx_driver.h
+++ b/src/cl_genx_driver.h
@@ -23,11 +23,19 @@
  /* They are mostly wrapper around C++ delete / new to avoid c++ in c files */
  struct intel_driver;
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
  /* Allocate and initialize the gen driver */
-extern struct intel_driver* cl_intel_driver_new(void);
+struct intel_driver* cl_intel_driver_new(void);
  
  /* Destroy and deallocate the gen driver */
-extern void cl_intel_driver_delete(struct intel_driver*);
+void cl_intel_driver_delete(struct intel_driver*);
  
+#ifdef __cplusplus
+}
+#endif
+    
  #endif /* __CL_INTEL_DRIVER_H__ */
  
diff --git a/src/cl_image.c b/src/cl_image.c

index bba741d..143c9fb 100644 (file)
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -19,7 +19,11 @@
  
  #include "cl_image.h"
  #include "cl_utils.h"
+#ifdef _PLASMA
+#include "plasma/intel_defines.h"
+#else
  #include "intel/intel_defines.h"
+#endif
  
  #include <assert.h>
  
diff --git a/src/cl_kernel.c b/src/cl_kernel.c

index a3064c2..6668328 100644 (file)
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -26,7 +26,14 @@
  #include "cl_utils.h"
  
  #include "CL/cl.h"
+
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
  #include "intel_bufmgr.h"
+#include "intel/intel_gpgpu.h"
+#endif
+
  #include <stdio.h>
  #include <string.h>
  #include <stdlib.h>
diff --git a/src/cl_mem.c b/src/cl_mem.c

index c1c0826..ae5a78c 100644 (file)
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -24,8 +24,13 @@
  #include "cl_alloc.h"
  #include "cl_device_id.h"
  
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
  #include "intel/intel_driver.h"
+#include "intel/intel_gpgpu.h"
  #include "intel_bufmgr.h" /* libdrm_intel */
+#endif
  
  #include "CL/cl.h"
  #include "CL/cl_intel.h"
@@ -140,7 +145,7 @@ cl_mem_copy_data_linear(cl_mem mem,
    size_t x, y, p;
    char *dst;
    drm_intel_bo_map(mem->bo, 1);
-  dst = mem->bo->virtual;
+  dst = drm_intel_bo_get_virtual(mem->bo);
    for (y = 0; y < h; ++y) {
      char *src = (char*) data + pitch * y;
      for (x = 0; x < w; ++x) {
@@ -178,7 +183,7 @@ cl_mem_copy_data_tilex(cl_mem mem,
    char *end = (char*) data + pitch * h;
  
    drm_intel_bo_map(mem->bo, 1);
-  img = mem->bo->virtual;
+  img = drm_intel_bo_get_virtual(mem->bo);
    for (tiley = 0; tiley < tiley_n; ++tiley)
    for (tilex = 0; tilex < tilex_n; ++tilex) {
      char *tile = img + (tilex + tiley * tilex_n) * tile_sz;
@@ -213,7 +218,7 @@ cl_mem_copy_data_tiley(cl_mem mem,
    char *end = (char*) data + pitch * h;
  
    drm_intel_bo_map(mem->bo, 1);
-  img = mem->bo->virtual;
+  img = drm_intel_bo_get_virtual(mem->bo);
    for (tiley = 0; tiley < tiley_n; ++tiley)
    for (tilex = 0; tilex < tilex_n; ++tilex) {
      char *tile = img + (tiley * tilex_n + tilex) * tile_sz;
@@ -362,8 +367,8 @@ LOCAL void*
  cl_mem_map(cl_mem mem)
  {
    drm_intel_bo_map(mem->bo, 1);
-  assert(mem->bo->virtual);
-  return mem->bo->virtual;
+  assert(drm_intel_bo_get_virtual(mem->bo));
+  return drm_intel_bo_get_virtual(mem->bo);
  }
  
  LOCAL cl_int
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c

index a5778ee..336d3f1 100644 (file)
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -28,6 +28,7 @@
  #include <sys/stat.h>
  #include <sys/ioctl.h>
  #include <xf86drm.h>
+#include <stdio.h>
  
  #include "cl_utils.h"
  #include "cl_alloc.h"
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c

index 10e20db..f52cbf7 100644 (file)
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -605,9 +605,10 @@ LOCAL void
  gpgpu_bind_buf(intel_gpgpu_t *state,
                 int32_t index,
                 dri_bo* obj_bo,
-               uint32_t size,
                 uint32_t cchint)
  {
+  uint32_t size = obj_bo->size;
+  
    assert(index < MAX_SURFACES);
    if(state->drv->gen_ver == 6)
      gpgpu_bind_buf_gen6(state, index, obj_bo, size, cchint);
@@ -825,3 +826,11 @@ intel_gpgpu_version(intel_gpgpu_t *gpgpu)
    return gpgpu->drv->gen_ver;
  }
  
+uint32_t drm_intel_bo_get_size(drm_intel_bo *bo)
+{
+    return (bo->size);
+}
+void *drm_intel_bo_get_virtual(drm_intel_bo *bo)
+{
+    return (bo->virtual);
+}
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h

index c4c38b3..9bcbdef 100644 (file)
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -21,6 +21,7 @@
  #define __GENX_GPGPU_H__
  
  #include "cl_utils.h"
+#include "intel_bufmgr.h"
  
  #include <stdlib.h>
  #include <stdint.h>
@@ -40,7 +41,7 @@ typedef struct genx_gpgpu_kernel {
    const char *name;        /* kernel name and bo name */
    uint32_t grf_blocks;     /* register blocks kernel wants (in 8 reg blocks) */
    uint32_t cst_sz;         /* total size of all constants */
-  const uint32_t *bin;     /* binary code of the kernel */
+  const char *bin;     /* binary code of the kernel */
    int32_t size;            /* kernel code size */
    struct _drm_intel_bo *bo;/* kernel code in the proper addr space */
    int32_t barrierID;       /* barrierID for _this_ kernel */
@@ -67,11 +68,14 @@ extern void intel_gpgpu_delete(intel_gpgpu_t*);
  /* Get the device generation */
  extern int32_t intel_gpgpu_version(intel_gpgpu_t*);
  
+/* Buffer management wrapper APIs */
+extern uint32_t drm_intel_bo_get_size(drm_intel_bo *bo);
+extern void *drm_intel_bo_get_virtual(drm_intel_bo *bo);
+
  /* Set typeless buffer descriptor in the current binding table */
  extern void gpgpu_bind_buf(intel_gpgpu_t*,
                             int32_t index,
                             struct _drm_intel_bo* obj_bo,
-                           uint32_t size,
                             uint32_t cchint);
  
  typedef enum gpgpu_tiling {
author	ojomer <devnull@localhost>
	Sat, 11 Feb 2012 13:44:11 +0000 (13:44 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:15:14 +0000 (16:15 -0700)
src/CMakeLists.txt		patch \| blob \| history
src/cl_api.c		patch \| blob \| history
src/cl_command_queue.c		patch \| blob \| history
src/cl_command_queue_gen6.c		patch \| blob \| history
src/cl_command_queue_gen7.c		patch \| blob \| history
src/cl_device_id.c		patch \| blob \| history
src/cl_genx_driver.h		patch \| blob \| history
src/cl_image.c		patch \| blob \| history
src/cl_kernel.c		patch \| blob \| history
src/cl_mem.c		patch \| blob \| history
src/intel/intel_driver.c		patch \| blob \| history
src/intel/intel_gpgpu.c		patch \| blob \| history
src/intel/intel_gpgpu.h		patch \| blob \| history