cl_command_queue_delete(cl_command_queue queue)
{
assert(queue);
- if (atomic_dec(&queue->ref_n) != 1)
- return;
+ if (atomic_dec(&queue->ref_n) != 1) return;
/* Remove it from the list */
assert(queue->ctx);
cl_mem_delete(queue->fulsim_out);
queue->fulsim_out = NULL;
}
+ cl_buffer_unreference(queue->last_batch);
cl_mem_delete(queue->perf);
cl_context_delete(queue->ctx);
cl_gpgpu_delete(queue->gpgpu);
cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
cl_buffer sync_bo = NULL;
cl_int err = CL_SUCCESS;
-#if 0
- cl_context ctx = queue->ctx;
- intel_gpgpu_t *gpgpu = queue->gpgpu;
- drm_intel_bufmgr *bufmgr = cl_context_get_intel_bufmgr(ctx);
- cl_mem mem = NULL;
- cl_buffer *bo = NULL, *sync_bo = NULL;
- const size_t max_thread = ctx->device->max_compute_unit;
- cl_int err = CL_SUCCESS;
- uint32_t i, index;
- /* Bind user defined surface */
- for (i = 0; i < k->arg_info_n; ++i) {
- assert(k->arg_info[i].offset % SURFACE_SZ == 0);
- index = k->arg_info[i].offset / SURFACE_SZ;
- mem = (cl_mem) k->args[k->arg_info[i].arg_index];
- assert(index != MAX_SURFACES - 1);
- CHECK_MEM(mem);
- bo = mem->bo;
- assert(bo);
- if (mem->is_image) {
- const int32_t w = mem->w, h = mem->h, pitch = mem->pitch;
- const uint32_t fmt = mem->intel_fmt;
- gpgpu_tiling_t tiling = GPGPU_NO_TILE;
- if (mem->tiling == CL_TILE_X)
- tiling = GPGPU_TILE_X;
- else if (mem->tiling == CL_TILE_Y)
- tiling = GPGPU_TILE_Y;
- gpgpu_bind_image2D(gpgpu, index, bo, fmt, w, h, pitch, tiling);
-
- /* Copy the image parameters (width, height) in the constant buffer if the
- * user requests them
- */
- cl_kernel_copy_image_parameters(k, mem, index, curbe);
- } else
- gpgpu_bind_buf(gpgpu, index, bo, cc_llc_l3);
- }
-
- /* Allocate the constant surface (if any) */
- if (k->const_bo) {
- assert(k->const_bo_index != MAX_SURFACES - 1);
- gpgpu_bind_buf(gpgpu, k->const_bo_index,
- k->const_bo,
- cc_llc_l3);
- }
-
- /* Allocate local surface needed for SLM and bind it */
- if (local && local_sz != 0) {
- const size_t sz = 16 * local_sz; /* XXX 16 == maximum barrier number */
- assert(k->patch.local_surf.offset % SURFACE_SZ == 0);
- index = k->patch.local_surf.offset / SURFACE_SZ;
- assert(index != MAX_SURFACES - 1);
- *local = cl_buffer_alloc(bufmgr, "CL local surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *local, cc_llc_l3);
- }
- else if (local)
- *local = NULL;
-
- /* Allocate private surface and bind it */
- if (priv && k->patch.private_surf.size != 0) {
- const size_t sz = max_thread *
- k->patch.private_surf.size *
- k->patch.exec_env.largest_compiled_simd_sz;
- // assert(k->patch.exec_env.largest_compiled_simd_sz == 16);
- assert(k->patch.private_surf.offset % SURFACE_SZ == 0);
- index = k->patch.private_surf.offset / SURFACE_SZ;
- assert(index != MAX_SURFACES - 1);
- *priv = cl_buffer_alloc(bufmgr, "CL private surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *priv, cc_llc_l3);
- }
- else if(priv)
- *priv = NULL;
-
- /* Allocate scratch surface and bind it */
- if (scratch && k->patch.scratch.size != 0) {
- const size_t sz = max_thread * /* XXX is it given per lane ??? */
- k->patch.scratch.size *
- k->patch.exec_env.largest_compiled_simd_sz;
- // assert(k->patch.exec_env.largest_compiled_simd_sz == 16);
- assert(k->patch.scratch.offset % SURFACE_SZ == 0);
- assert(index != MAX_SURFACES - 1);
- index = k->patch.scratch.offset / SURFACE_SZ;
- *scratch = cl_buffer_alloc(bufmgr, "CL scratch surface", sz, 64);
- gpgpu_bind_buf(gpgpu, index, *scratch, cc_llc_l3);
- }
- else if (scratch)
- *scratch = NULL;
-#endif
/* Now bind a bo used for synchronization */
sync_bo = cl_buffer_alloc(bufmgr, "sync surface", 64, 64);
- cl_gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, cc_llc_l3);
+ cl_gpgpu_bind_buf(gpgpu, GEN_MAX_SURFACES-1, sync_bo, cc_llc_l3);
if (queue->last_batch != NULL)
cl_buffer_unreference(queue->last_batch);
queue->last_batch = sync_bo;
#include "cl_platform_id.h"
#include "cl_device_id.h"
-#include "cl_genx_driver.h"
#include "cl_context.h"
#include "cl_command_queue.h"
#include "cl_mem.h"
(devid) == PCI_CHIP_HASWELL_L)
#define IS_GEN75(devid) IS_HASWELL(devid)
-/* Open a connection to the X server and ask the kernel who we are. Returns -1
- * if no Intel GPU was found or an error was encountered in the probing
- * process
- */
-extern int cl_intel_get_device_id(void);
-
#endif /* __CL_DEVICE_DATA_H__ */
#include "cl_internals.h"
#include "cl_utils.h"
#include "cl_defs.h"
-#ifdef _PLASMA
-#include "plasma/cl_device_data.h"
-#else
-#include "intel/cl_device_data.h"
-#endif
+#include "cl_driver.h"
+#include "cl_device_data.h"
#include "CL/cl.h"
#include <assert.h>
.max_clock_frequency = 1000,
.wg_sz = 1024,
.compile_wg_sz = {0},
-
#include "cl_gen7_device.h"
};
.max_clock_frequency = 1000,
.wg_sz = 512,
.compile_wg_sz = {0},
-
#include "cl_gen7_device.h"
};
.max_clock_frequency = 1000,
.wg_sz = 512,
.compile_wg_sz = {0},
-
#include "cl_gen75_device.h"
};
cl_get_gt_device(void)
{
cl_device_id ret = NULL;
- const int device_id = cl_intel_get_device_id();
+ const int device_id = cl_driver_get_device_id();
/* XXX we pick IVB for HSW now */
if (device_id == PCI_CHIP_HASWELL_M ||
LOCAL cl_driver_new_cb *cl_driver_new = NULL;
LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
+LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
/* Buffer */
-LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
LOCAL cl_buffer_reference_cb *cl_buffer_reference = NULL;
LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
+LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
LOCAL cl_buffer_pin_cb *cl_buffer_pin = NULL;
LOCAL cl_buffer_unpin_cb *cl_buffer_unpin = NULL;
LOCAL cl_buffer_subdata_cb *cl_buffer_subdata = NULL;
-LOCAL cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc = NULL;
LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
/* GPGPU */
LOCAL cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters = NULL;
LOCAL cl_gpgpu_upload_constants_cb *cl_gpgpu_upload_constants = NULL;
LOCAL cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup = NULL;
-LOCAL cl_gpgpu_update_barrier_cb *cl_gpgpu_update_barrier = NULL;
LOCAL cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers = NULL;
LOCAL cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset = NULL;
LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL;
* Author: Benjamin Segovia <benjamin.segovia@intel.com>
*/
-#ifndef __CL_BUFFER_H__
-#define __CL_BUFFER_H__
+#ifndef __CL_DRIVER_H__
+#define __CL_DRIVER_H__
#include <stdint.h>
#include <stdlib.h>
/* XXX needed for previous driver */
-#define MAX_SURFACES 128
-#define MAX_SAMPLERS 16
+#define GEN_MAX_SURFACES 128
+#define GEN_MAX_SAMPLERS 16
/**************************************************************************
* cl_driver:
typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel* kernel, uint32_t ker_n);
extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup;
-/* Make HW threads use barrierID */
-typedef void (cl_gpgpu_update_barrier_cb)(cl_gpgpu, uint32_t barrierID, uint32_t thread_n);
-extern cl_gpgpu_update_barrier_cb *cl_gpgpu_update_barrier;
-
/* Upload the constant samplers as specified inside the OCL kernel */
typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n);
extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers;
typedef void* (cl_buffer_get_virtual_cb)(cl_buffer);
extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual;
+/* Get the size of the buffer */
+typedef void* (cl_buffer_get_size_cb)(cl_buffer);
+extern cl_buffer_get_size_cb *cl_buffer_get_size;
+
/* Pin a buffer */
typedef int (cl_buffer_pin_cb)(cl_buffer, uint32_t alignment);
extern cl_buffer_pin_cb *cl_buffer_pin;
typedef int (cl_buffer_subdata_cb)(cl_buffer, unsigned long, unsigned long, const void*);
extern cl_buffer_subdata_cb *cl_buffer_subdata;
-/* Emit relocation */
-typedef int (cl_buffer_emit_reloc_cb) (cl_buffer, uint32_t, cl_buffer, uint32_t, uint32_t, uint32_t);
-extern cl_buffer_emit_reloc_cb *cl_buffer_emit_reloc;
-
/* Wait for all pending rendering for this buffer to complete */
typedef int (cl_buffer_wait_rendering_cb) (cl_buffer);
extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
-#endif /* __CL_BUFFER_H__ */
+/* Get the device id */
+typedef int (cl_driver_get_device_id_cb)(void);
+extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
+
+#endif /* __CL_DRIVER_H__ */
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia@intel.com>
- */
-
-#ifndef __CL_INTEL_DRIVER_H__
-#define __CL_INTEL_DRIVER_H__
-
-/* They are mostly wrapper around C++ delete / new to avoid c++ in c files */
-struct intel_driver;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Allocate and initialize the gen driver */
-struct intel_driver* cl_intel_driver_new(void);
-
-/* Destroy and deallocate the gen driver */
-void cl_intel_driver_delete(struct intel_driver*);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* __CL_INTEL_DRIVER_H__ */
-
p->magic = CL_MAGIC_PROGRAM_HEADER;
p->ctx = ctx;
+ /* The queue also belongs to its context */
+ cl_context_add_ref(ctx);
+
exit:
return p;
error:
#include "cl_utils.h"
#include "cl_alloc.h"
#include "cl_driver.h"
-#include "cl_genx_driver.h"
#define SET_BLOCKED_SIGSET(DRIVER) do { \
sigset_t bl_mask; \
RESTORE_BLOCKED_SIGSET(DRIVER); \
} while (0)
-LOCAL intel_driver_t*
+static void
+intel_driver_delete(intel_driver_t *driver)
+{
+ if (driver == NULL)
+ return;
+ cl_free(driver);
+}
+
+static intel_driver_t*
intel_driver_new(void)
{
intel_driver_t *driver = NULL;
goto exit;
}
-LOCAL void
-intel_driver_delete(intel_driver_t *driver)
-{
- if (driver == NULL)
- return;
- cl_free(driver);
-}
-
/* just used for maximum relocation number in drm_intel */
#define BATCH_SIZE 0x1000
#endif /* EMULATE_GEN */
}
-LOCAL void
+static void
intel_driver_open(intel_driver_t *intel)
{
int cardi;
}
}
-LOCAL void
+static void
intel_driver_close(intel_driver_t *intel)
{
if(intel->dri_ctx) dri_state_release(intel->dri_ctx);
return name;
}
-LOCAL int
-cl_intel_get_device_id(void)
+static int
+intel_get_device_id(void)
{
intel_driver_t *driver = NULL;
int intel_device_id;
return intel_device_id;
}
-LOCAL void
+static void
cl_intel_driver_delete(intel_driver_t *driver)
{
if (driver == NULL)
intel_driver_delete(driver);
}
-LOCAL intel_driver_t*
+static intel_driver_t*
cl_intel_driver_new(void)
{
intel_driver_t *driver = NULL;
goto exit;
}
-LOCAL drm_intel_bufmgr*
+static drm_intel_bufmgr*
intel_driver_get_bufmgr(intel_driver_t *drv)
{
return drv->bufmgr;
}
-LOCAL uint32_t
+static uint32_t
intel_driver_get_ver(struct intel_driver *drv)
{
return drv->gen_ver;
}
-LOCAL uint32_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
-LOCAL void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
+static uint32_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
+static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
LOCAL void
intel_setup_callbacks(void)
cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
+ cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference;
cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference;
cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map;
cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
- cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_unmap;
+ cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
+ cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin;
cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
- cl_buffer_emit_reloc = (cl_buffer_emit_reloc_cb *) drm_intel_bo_emit_reloc;
cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
intel_set_gpgpu_callbacks();
}
struct dri_state *dri_ctx;
} intel_driver_t;
-/* create / destroy device */
-extern intel_driver_t* intel_driver_new(void);
-extern void intel_driver_delete(intel_driver_t*);
-
/* device control */
extern void intel_driver_lock_hardware(intel_driver_t*);
extern void intel_driver_unlock_hardware(intel_driver_t*);
dri_bo_unreference(state->sampler_state_b.bo);
bo = dri_bo_alloc(state->drv->bufmgr,
"sample states",
- MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
+ GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
32);
assert(bo);
dri_bo_map(bo, 1);
- memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * MAX_SAMPLERS);
+ memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * GEN_MAX_SAMPLERS);
state->sampler_state_b.bo = bo;
}
uint32_t cchint)
{
const uint32_t size = obj_bo->size;
- assert(index < MAX_SURFACES);
+ assert(index < GEN_MAX_SURFACES);
if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
intel_gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint);
else
int32_t pitch,
cl_gpgpu_tiling tiling)
{
- assert(index < MAX_SURFACES);
+ assert(index < GEN_MAX_SURFACES);
if (state->drv->gen_ver == 7 || state->drv->gen_ver == 75)
intel_gpgpu_bind_image2D_gen7(state, index, (drm_intel_bo*) obj_bo, format, w, h, pitch, tiling);
else
dri_bo_unmap(state->sampler_state_b.bo);
}
-static void
-intel_gpgpu_update_barrier(intel_gpgpu_t *state, uint32_t barrierID, uint32_t thread_n)
-{
- BEGIN_BATCH(state->batch, 4);
- OUT_BATCH(state->batch, CMD_MEDIA_STATE_FLUSH | 0);
- OUT_BATCH(state->batch, 1 << barrierID);
- OUT_BATCH(state->batch, CMD_MEDIA_GATEWAY_STATE | 0);
- OUT_BATCH(state->batch, (barrierID << 16) | thread_n);
- ADVANCE_BATCH(state->batch);
-}
-
static void
intel_gpgpu_set_perf_counters(intel_gpgpu_t *state, cl_buffer *perf)
{
cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
cl_gpgpu_upload_constants = (cl_gpgpu_upload_constants_cb *) intel_gpgpu_upload_constants;
cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
- cl_gpgpu_update_barrier = (cl_gpgpu_update_barrier_cb *) intel_gpgpu_update_barrier;
cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start;
* Author: Benjamin Segovia <benjamin.segovia@intel.com>
*/
-#ifndef __GENX_GPGPU_H__
-#define __GENX_GPGPU_H__
+#ifndef __INTEL_GPGPU_H__
+#define __INTEL_GPGPU_H__
#include "cl_utils.h"
#include "cl_driver.h"
/* Set the gpgpu related call backs */
extern void intel_set_gpgpu_callbacks(void);
-#endif /* __GENX_GPGPU_H__ */
+#endif /* __INTEL_GPGPU_H__ */
#include "cl_utils.h"
#include "cl_alloc.h"
+#include "cl_device_data.h"
#include "sim/sim_driver.h"
#include "CL/cl.h"
#include <stdlib.h>
#include <assert.h>
#include <string.h>
+#include <stdio.h>
#include "cl_driver.h"
-/* Just to count allocations */
+/* Fake buffer manager that just counts allocations */
typedef struct sim_bufmgr { volatile int buf_n; } sim_bufmgr_t;
+static sim_bufmgr_t*
+sim_bufmgr_new(void)
+{
+ return cl_calloc(1,sizeof(sim_bufmgr_t));
+}
+
+static void
+sim_bufmgr_delete(sim_bufmgr_t *bufmgr)
+{
+ cl_free(bufmgr);
+}
+
+/* Fake low-driver */
+typedef struct sim_driver {
+ sim_bufmgr_t *bufmgr;
+ int gen_ver;
+} sim_driver_t;
+
+static void
+sim_driver_delete(sim_driver_t *driver)
+{
+ if (driver == NULL) return;
+ sim_bufmgr_delete(driver->bufmgr);
+ cl_free(driver);
+}
+
+static sim_driver_t*
+sim_driver_new(void)
+{
+ sim_driver_t *driver = NULL;
+ TRY_ALLOC_NO_ERR(driver, cl_calloc(1, sizeof(sim_driver_t)));
+ TRY_ALLOC_NO_ERR(driver->bufmgr, sim_bufmgr_new());
+ driver->gen_ver = 7; // XXX make it flexible
+exit:
+ return driver;
+error:
+ sim_driver_delete(driver);
+ driver = NULL;
+ goto exit;
+}
+
+static int
+sim_driver_get_ver(sim_driver_t *driver)
+{
+ return driver->gen_ver;
+}
+
+static sim_bufmgr_t*
+sim_driver_get_bufmgr(sim_driver_t *driver)
+{
+ return driver->bufmgr;
+}
+
+static int
+sim_driver_get_device_id(void)
+{
+ return PCI_CHIP_IVYBRIDGE_GT2; // XXX get some env variable instead
+}
+
+
/* Just a named buffer to mirror real drm functions */
typedef struct sim_buffer {
void *data; /* data in the buffer */
if (buf == NULL) return;
cl_free(buf->data);
cl_free(buf->name);
+ cl_free(buf);
}
static sim_buffer_t*
}
buf->ref_n = 1;
buf->bufmgr = bufmgr;
+ buf->sz = sz;
atomic_inc(&buf->bufmgr->buf_n);
exit:
sim_buffer_delete(buf);
}
+static void
+sim_buffer_reference(sim_buffer_t *buf)
+{
+ if (UNLIKELY(buf == NULL)) return;
+ atomic_inc(&buf->ref_n);
+}
+
+static void*
+sim_buffer_get_virtual(sim_buffer_t *buf)
+{
+ if (UNLIKELY(buf == NULL)) return NULL;
+ return buf->data;
+}
+
+static void*
+sim_buffer_get_size(sim_buffer_t *buf)
+{
+ if (UNLIKELY(buf == NULL)) return 0;
+ return buf->data;
+}
+
static int
sim_buffer_subdata(sim_buffer_t *buf, unsigned long offset, unsigned long size, const void *data)
{
memcpy((char*) buf->data + offset, data, size);
return 0;
}
-
-static int
-sim_buffer_emit_reloc(sim_buffer_t *buf,
- uint32_t offset,
- sim_buffer_t *target_buf,
- uint32_t target_offset,
- uint32_t read_domains,
- uint32_t write_domain)
-{
- return 1;
-}
-
static int sim_buffer_map(sim_buffer_t *buf, uint32_t write_enable) {return 0;}
static int sim_buffer_unmap(sim_buffer_t *buf) {return 0;}
static int sim_buffer_pin(sim_buffer_t *buf, uint32_t alignment) {return 0;}
static int sim_buffer_unpin(sim_buffer_t *buf) {return 0;}
+static int sim_buffer_wait_rendering(sim_buffer_t *buf) {return 0;}
+
+/* Encapsulates operations needed to run one NDrange */
+typedef struct sim_gpgpu
+{
+ sim_driver_t *driver; // the driver the gpgpu states belongs to
+} sim_gpgpu_t;
+
+static void sim_gpgpu_delete(sim_gpgpu_t *gpgpu)
+{
+ cl_free(gpgpu);
+}
+
+static sim_gpgpu_t *sim_gpgpu_new(sim_driver_t *driver)
+{
+ sim_gpgpu_t *gpgpu = NULL;
+ TRY_ALLOC_NO_ERR(gpgpu, cl_calloc(1, sizeof(sim_gpgpu_t)));
+
+exit:
+ return gpgpu;
+error:
+ sim_gpgpu_delete(gpgpu);
+ gpgpu = NULL;
+ goto exit;
+}
+
+#undef NOT_IMPLEMENTED
+#define NOT_IMPLEMENTED
+
+static void sim_gpgpu_bind_buf(sim_gpgpu_t *gpgpu, int32_t index, sim_buffer_t *buf, uint32_t cchint)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_bind_image2D(sim_gpgpu_t *gpgpu,
+ int32_t index,
+ sim_buffer_t *obj_bo,
+ uint32_t format,
+ int32_t w,
+ int32_t h,
+ int pitch,
+ cl_gpgpu_tiling tiling)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_state_init(sim_gpgpu_t *gpgpu, uint32_t max_threads, uint32_t size_cs_entry)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_set_perf_counters(sim_gpgpu_t *gpgpu, sim_buffer_t *perf)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_upload_constants(sim_gpgpu_t *gpgpu, const void* data, uint32_t size)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_states_setup(sim_gpgpu_t *gpgpu, cl_gpgpu_kernel* kernel, uint32_t ker_n)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_upload_samplers(sim_gpgpu_t *state, const void *data, uint32_t n)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_batch_reset(sim_gpgpu_t *state, size_t sz)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_batch_start(sim_gpgpu_t *state)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_batch_end(sim_gpgpu_t *state, int32_t flush_mode)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_flush(sim_gpgpu_t *state)
+{ NOT_IMPLEMENTED; }
+static void sim_gpgpu_walker(sim_gpgpu_t *state,
+ uint32_t simd_sz,
+ uint32_t thread_n,
+ const size_t global_wk_off[3],
+ const size_t global_wk_sz[3],
+ const size_t local_wk_sz[3])
+{ NOT_IMPLEMENTED; }
LOCAL void
sim_setup_callbacks(void)
{
+ cl_driver_new = (cl_driver_new_cb *) sim_driver_new;
+ cl_driver_delete = (cl_driver_delete_cb *) sim_driver_delete;
+ cl_driver_get_ver = (cl_driver_get_ver_cb *) sim_driver_get_ver;
+ cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) sim_driver_get_bufmgr;
+ cl_driver_get_device_id = (cl_driver_get_device_id_cb *) sim_driver_get_device_id;
cl_buffer_alloc = (cl_buffer_alloc_cb *) sim_buffer_alloc;
+ cl_buffer_reference = (cl_buffer_reference_cb *) sim_buffer_reference;
cl_buffer_unreference = (cl_buffer_unreference_cb *) sim_buffer_unreference;
cl_buffer_map = (cl_buffer_map_cb *) sim_buffer_map;
cl_buffer_unmap = (cl_buffer_unmap_cb *) sim_buffer_unmap;
+ cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) sim_buffer_get_virtual;
+ cl_buffer_get_size = (cl_buffer_get_size_cb *) sim_buffer_get_size;
cl_buffer_pin = (cl_buffer_pin_cb *) sim_buffer_pin;
cl_buffer_unpin = (cl_buffer_unpin_cb *) sim_buffer_unpin;
cl_buffer_subdata = (cl_buffer_subdata_cb *) sim_buffer_subdata;
- cl_buffer_emit_reloc = (cl_buffer_emit_reloc_cb *) sim_buffer_emit_reloc;
+ cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) sim_buffer_wait_rendering;
+ cl_gpgpu_new = (cl_gpgpu_new_cb *) sim_gpgpu_new;
+ cl_gpgpu_delete = (cl_gpgpu_delete_cb *) sim_gpgpu_delete;
+ cl_gpgpu_bind_image2D = (cl_gpgpu_bind_image2D_cb *) sim_gpgpu_bind_image2D;
+ cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) sim_gpgpu_bind_buf;
+ cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) sim_gpgpu_state_init;
+ cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) sim_gpgpu_set_perf_counters;
+ cl_gpgpu_upload_constants = (cl_gpgpu_upload_constants_cb *) sim_gpgpu_upload_constants;
+ cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) sim_gpgpu_states_setup;
+ cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) sim_gpgpu_upload_samplers;
+ cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) sim_gpgpu_batch_reset;
+ cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) sim_gpgpu_batch_start;
+ cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) sim_gpgpu_batch_end;
+ cl_gpgpu_flush = (cl_gpgpu_flush_cb *) sim_gpgpu_flush;
+ cl_gpgpu_walker = (cl_gpgpu_walker_cb *) sim_gpgpu_walker;
}