ADD_EXECUTABLE(binomialOption tests/binomialOption.c)
ADD_EXECUTABLE(nbody tests/nbody.c)
ADD_EXECUTABLE(svm_test tests/svm_test.c)
-ADD_EXECUTABLE(fast_mat_mul tests/fast_mat_mul.cpp)
TARGET_LINK_LIBRARIES(test_copy_buffer cl_test m)
TARGET_LINK_LIBRARIES(test_copy_image cl_test m)
TARGET_LINK_LIBRARIES(test_enqueue_read cl_test m)
TARGET_LINK_LIBRARIES(binomialOption cl_test m)
TARGET_LINK_LIBRARIES(nbody cl_test m)
TARGET_LINK_LIBRARIES(svm_test cl_test m)
-TARGET_LINK_LIBRARIES(fast_mat_mul cl_test m)
-
#include <string.h>
#include <assert.h>
+#ifdef _PLASMA
+#define PRINTF(x) printf("CL API => cl%s\n", x)
+#else
+#define PRINTF(x)
+#endif
+
cl_int
clGetPlatformIDs(cl_uint num_entries,
cl_platform_id * platforms,
void * user_data,
cl_int * errcode_ret)
{
+ PRINTF("CreateContext");
return cl_create_context(properties,
num_devices,
devices,
cl_int
clReleaseContext(cl_context context)
{
+ PRINTF("ReleaseContext");
+
cl_int err = CL_SUCCESS;
CHECK_CONTEXT (context);
cl_context_delete(context);
cl_command_queue_properties properties,
cl_int * errcode_ret)
{
+ PRINTF("CreateCommandQueue");
cl_command_queue queue = NULL;
cl_int err = CL_SUCCESS;
CHECK_CONTEXT (context);
cl_int
clReleaseCommandQueue(cl_command_queue command_queue)
{
+ PRINTF("ReleaseCommandQueue");
cl_int err = CL_SUCCESS;
CHECK_QUEUE (command_queue);
cl_command_queue_delete(command_queue);
void * host_ptr,
cl_int * errcode_ret)
{
+ PRINTF("CreateBuffer");
cl_mem mem = NULL;
cl_int err = CL_SUCCESS;
CHECK_CONTEXT (context);
void * host_ptr,
cl_int * errcode_ret)
{
+ PRINTF("CreateImage2D");
cl_mem mem = NULL;
cl_int err = CL_SUCCESS;
CHECK_CONTEXT (context);
cl_int * binary_status,
cl_int * errcode_ret)
{
+ PRINTF("CreateProgramWithBinary");
cl_program program = NULL;
cl_int err = CL_SUCCESS;
cl_int
clReleaseProgram(cl_program program)
{
+ PRINTF("clReleaseProgram");
cl_int err = CL_SUCCESS;
CHECK_PROGRAM (program);
cl_program_delete(program);
const char * kernel_name,
cl_int * errcode_ret)
{
+ PRINTF("CreateKernel");
cl_kernel kernel = NULL;
cl_int err = CL_SUCCESS;
cl_int
clReleaseKernel(cl_kernel kernel)
{
+ PRINTF("clReleaseKernel");
cl_int err = CL_SUCCESS;
CHECK_KERNEL(kernel);
cl_kernel_delete(kernel);
const cl_event * event_wait_list,
cl_event * event)
{
+ PRINTF("EnqueueNDRangeKernel");
size_t fixed_global_off[] = {0,0,0};
size_t fixed_global_sz[] = {1,1,1};
size_t fixed_local_sz[] = {16,1,1};
#include "cl_utils.h"
#include "cl_alloc.h"
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
#include "intel_bufmgr.h"
#include "intel/intel_gpgpu.h"
+#endif
#include <assert.h>
#include <stdio.h>
*/
cl_kernel_copy_image_parameters(k, mem, index, curbe);
} else
- gpgpu_bind_buf(gpgpu, index, bo, bo->size, cc_llc_l3);
+ gpgpu_bind_buf(gpgpu, index, bo, cc_llc_l3);
}
/* Allocate the constant surface (if any) */
assert(k->const_bo_index != MAX_SURFACES - 1);
gpgpu_bind_buf(gpgpu, k->const_bo_index,
k->const_bo,
- k->const_bo->size,
cc_llc_l3);
}
index = k->patch.local_surf.offset / SURFACE_SZ;
assert(index != MAX_SURFACES - 1);
*local = drm_intel_bo_alloc(bufmgr, "CL local surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *local, sz, cc_llc_l3);
+ gpgpu_bind_buf(gpgpu, index, *local, cc_llc_l3);
}
else if (local)
*local = NULL;
index = k->patch.private_surf.offset / SURFACE_SZ;
assert(index != MAX_SURFACES - 1);
*priv = drm_intel_bo_alloc(bufmgr, "CL private surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *priv, sz, cc_llc_l3);
+ gpgpu_bind_buf(gpgpu, index, *priv, cc_llc_l3);
}
else if(priv)
*priv = NULL;
assert(index != MAX_SURFACES - 1);
index = k->patch.scratch.offset / SURFACE_SZ;
*scratch = drm_intel_bo_alloc(bufmgr, "CL scratch surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *scratch, sz, cc_llc_l3);
+ gpgpu_bind_buf(gpgpu, index, *scratch, cc_llc_l3);
}
else if (scratch)
*scratch = NULL;
/* Now bind a bo used for synchronization */
sync_bo = drm_intel_bo_alloc(bufmgr, "sync surface", 64, 64);
- gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, 64, cc_llc_l3);
+ gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
if (queue->last_batch != NULL)
drm_intel_bo_unreference(queue->last_batch);
queue->last_batch = sync_bo;
queue->perf = NULL;
}
if (mem != NULL) {
- if (mem->bo->size < 1024) { /* 1K for the performance counters is enough */
+ if (drm_intel_bo_get_size(mem->bo) < 1024) { /* 1K for the performance counters is enough */
err = CL_INVALID_BUFFER_SIZE;
goto error;
}
#include "cl_utils.h"
#include "cl_alloc.h"
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
#include "intel_bufmgr.h"
#include "intel/intel_gpgpu.h"
+#endif
#include <assert.h>
#include <stdio.h>
#include "cl_utils.h"
#include "cl_alloc.h"
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
#include "intel_bufmgr.h"
#include "intel/intel_gpgpu.h"
+#endif
#include <assert.h>
#include <stdio.h>
static INLINE size_t
cl_kernel_compute_batch_sz(cl_kernel k)
{
- size_t sz = 256 + 32;
+#ifdef _PLASMA
+ size_t sz = 0x1000; // _PLASMA
+#else
+ size_t sz = 256 + 32;
+#endif
return sz;
}
/* Setup kernel */
kernel.name = "OCL kernel";
kernel.grf_blocks = 128;
- kernel.bin = NULL,
- kernel.size = 0,
+ kernel.bin = ker->kernel_heap; // _PLASMA ; NULL
+ kernel.size = ker->kernel_heap_sz; // _PLASMA ; 0
kernel.bo = ker->bo;
kernel.barrierID = 0;
kernel.use_barrier = ker->patch.exec_env.has_barriers;
#include "cl_internals.h"
#include "cl_utils.h"
#include "cl_defs.h"
+#ifdef _PLASMA
+#include "plasma/cl_device_data.h"
+#else
#include "intel/cl_device_data.h"
+#endif
#include "CL/cl.h"
#include <assert.h>
/* They are mostly wrapper around C++ delete / new to avoid c++ in c files */
struct intel_driver;
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Allocate and initialize the gen driver */
-extern struct intel_driver* cl_intel_driver_new(void);
+struct intel_driver* cl_intel_driver_new(void);
/* Destroy and deallocate the gen driver */
-extern void cl_intel_driver_delete(struct intel_driver*);
+void cl_intel_driver_delete(struct intel_driver*);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* __CL_INTEL_DRIVER_H__ */
#include "cl_image.h"
#include "cl_utils.h"
+#ifdef _PLASMA
+#include "plasma/intel_defines.h"
+#else
#include "intel/intel_defines.h"
+#endif
#include <assert.h>
#include "cl_utils.h"
#include "CL/cl.h"
+
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
#include "intel_bufmgr.h"
+#include "intel/intel_gpgpu.h"
+#endif
+
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "cl_alloc.h"
#include "cl_device_id.h"
+#ifdef _PLASMA
+#include "plasma/plasma_export.h"
+#else
#include "intel/intel_driver.h"
+#include "intel/intel_gpgpu.h"
#include "intel_bufmgr.h" /* libdrm_intel */
+#endif
#include "CL/cl.h"
#include "CL/cl_intel.h"
size_t x, y, p;
char *dst;
drm_intel_bo_map(mem->bo, 1);
- dst = mem->bo->virtual;
+ dst = drm_intel_bo_get_virtual(mem->bo);
for (y = 0; y < h; ++y) {
char *src = (char*) data + pitch * y;
for (x = 0; x < w; ++x) {
char *end = (char*) data + pitch * h;
drm_intel_bo_map(mem->bo, 1);
- img = mem->bo->virtual;
+ img = drm_intel_bo_get_virtual(mem->bo);
for (tiley = 0; tiley < tiley_n; ++tiley)
for (tilex = 0; tilex < tilex_n; ++tilex) {
char *tile = img + (tilex + tiley * tilex_n) * tile_sz;
char *end = (char*) data + pitch * h;
drm_intel_bo_map(mem->bo, 1);
- img = mem->bo->virtual;
+ img = drm_intel_bo_get_virtual(mem->bo);
for (tiley = 0; tiley < tiley_n; ++tiley)
for (tilex = 0; tilex < tilex_n; ++tilex) {
char *tile = img + (tiley * tilex_n + tilex) * tile_sz;
cl_mem_map(cl_mem mem)
{
drm_intel_bo_map(mem->bo, 1);
- assert(mem->bo->virtual);
- return mem->bo->virtual;
+ assert(drm_intel_bo_get_virtual(mem->bo));
+ return drm_intel_bo_get_virtual(mem->bo);
}
LOCAL cl_int
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <xf86drm.h>
+#include <stdio.h>
#include "cl_utils.h"
#include "cl_alloc.h"
gpgpu_bind_buf(intel_gpgpu_t *state,
int32_t index,
dri_bo* obj_bo,
- uint32_t size,
uint32_t cchint)
{
+ uint32_t size = obj_bo->size;
+
assert(index < MAX_SURFACES);
if(state->drv->gen_ver == 6)
gpgpu_bind_buf_gen6(state, index, obj_bo, size, cchint);
return gpgpu->drv->gen_ver;
}
+uint32_t drm_intel_bo_get_size(drm_intel_bo *bo)
+{
+ return (bo->size);
+}
+void *drm_intel_bo_get_virtual(drm_intel_bo *bo)
+{
+ return (bo->virtual);
+}
#define __GENX_GPGPU_H__
#include "cl_utils.h"
+#include "intel_bufmgr.h"
#include <stdlib.h>
#include <stdint.h>
const char *name; /* kernel name and bo name */
uint32_t grf_blocks; /* register blocks kernel wants (in 8 reg blocks) */
uint32_t cst_sz; /* total size of all constants */
- const uint32_t *bin; /* binary code of the kernel */
+ const char *bin; /* binary code of the kernel */
int32_t size; /* kernel code size */
struct _drm_intel_bo *bo;/* kernel code in the proper addr space */
int32_t barrierID; /* barrierID for _this_ kernel */
/* Get the device generation */
extern int32_t intel_gpgpu_version(intel_gpgpu_t*);
+/* Buffer management wrapper APIs */
+extern uint32_t drm_intel_bo_get_size(drm_intel_bo *bo);
+extern void *drm_intel_bo_get_virtual(drm_intel_bo *bo);
+
/* Set typeless buffer descriptor in the current binding table */
extern void gpgpu_bind_buf(intel_gpgpu_t*,
int32_t index,
struct _drm_intel_bo* obj_bo,
- uint32_t size,
uint32_t cchint);
typedef enum gpgpu_tiling {