Use drm/intel optimization for mem bo mapping in case of read or write.
So we could be possibly waiting less.
This also adds 'map_flags' check in clEnqueueMapBuffer/clEnqueueMapImage
for actual read or write mapping.
But currently leave clMapBufferIntel untouched which might break ABI/API.
v2: Fix write_map flag in clEnqueueMapBuffer/clEnqueueMapImage.
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Reviewed-by: "Guo, Yejun" <yejun.guo@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
data->size = size;
data->ptr = ptr;
data->unsync_map = 1;
+ if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
+ data->write_map = 1;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
data->ptr = ptr;
data->unsync_map = 1;
+ if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
+ data->write_map = 1;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
void *ptr = NULL;
cl_int err = CL_SUCCESS;
CHECK_MEM (mem);
- ptr = cl_mem_map(mem);
+ ptr = cl_mem_map(mem, 1);
error:
if (errcode_ret)
*errcode_ret = err;
assert(mem->bo);
chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz;
chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz;
- to = cl_mem_map(mem);
+ to = cl_mem_map(mem, 1);
for (j = 0; j < chunk_n; ++j) {
char name[256];
sprintf(name, "dump%03i.bmp", curr);
void* src_ptr;
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(src_ptr = cl_mem_map_auto(data->mem_obj, 0))) {
err = CL_MAP_FAILURE;
goto error;
}
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- if (!(src_ptr = cl_mem_map_auto(mem))) {
+ if (!(src_ptr = cl_mem_map_auto(mem, 0))) {
err = CL_MAP_FAILURE;
goto error;
}
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
void* dst_ptr;
- if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(dst_ptr = cl_mem_map_auto(data->mem_obj, 1))) {
err = CL_MAP_FAILURE;
goto error;
}
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- if (!(dst_ptr = cl_mem_map_auto(mem))) {
+ if (!(dst_ptr = cl_mem_map_auto(mem, 1))) {
err = CL_MAP_FAILURE;
goto error;
}
const size_t* origin = data->origin;
const size_t* region = data->region;
- if (!(src_ptr = cl_mem_map_auto(mem))) {
+ if (!(src_ptr = cl_mem_map_auto(mem, 0))) {
err = CL_MAP_FAILURE;
goto error;
}
cl_mem mem = data->mem_obj;
CHECK_IMAGE(mem, image);
- if (!(dst_ptr = cl_mem_map_auto(mem))) {
+ if (!(dst_ptr = cl_mem_map_auto(mem, 1))) {
err = CL_MAP_FAILURE;
goto error;
}
//because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
ptr = cl_mem_map_gtt(mem);
else
- ptr = cl_mem_map_auto(mem);
+ ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
if (ptr == NULL) {
err = CL_MAP_FAILURE;
//because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
ptr = cl_mem_map_gtt(mem);
else
- ptr = cl_mem_map_auto(mem);
+ ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
if (ptr == NULL) {
err = CL_MAP_FAILURE;
const cl_mem buffer = mem_list[i];
CHECK_MEM(buffer);
- *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer);
+ *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer, 0);
}
data->user_func(data->ptr);
void * ptr; /* Ptr for write and return value */
const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */
uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */
+ uint8_t write_map; /* Indicate if the clEnqueueMapBuffer is write enable */
void (*user_func)(void *); /* pointer to a host-callable user function */
} enqueue_data;
cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region,
const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image)
{
- char* dst= cl_mem_map_auto((cl_mem)dst_image);
- char* src= cl_mem_map_auto((cl_mem)src_image);
+ char* dst= cl_mem_map_auto((cl_mem)dst_image, 1);
+ char* src= cl_mem_map_auto((cl_mem)src_image, 0);
size_t dst_offset = dst_image->bpp * dst_origin[0] + dst_image->row_pitch * dst_origin[1] + dst_image->slice_pitch * dst_origin[2];
size_t src_offset = src_image->bpp * src_origin[0] + src_image->row_pitch * src_origin[1] + src_image->slice_pitch * src_origin[2];
dst= (char*)dst+ dst_offset;
size_t slice_pitch,
void* host_ptr)
{
- char* dst_ptr = cl_mem_map_auto((cl_mem)image);
+ char* dst_ptr = cl_mem_map_auto((cl_mem)image, 1);
size_t origin[3] = {0, 0, 0};
size_t region[3] = {image->w, image->h, image->depth};
mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
if (image == NULL)
return NULL;
- void *src = cl_mem_map(buffer);
- void *dst = cl_mem_map(image);
+ void *src = cl_mem_map(buffer, 0);
+ void *dst = cl_mem_map(image, 1);
//
// FIXME, we could use copy buffer to image to do this on GPU latter.
// currently the copy buffer to image function doesn't support 1D image.
LOCAL void*
-cl_mem_map(cl_mem mem)
+cl_mem_map(cl_mem mem, int write)
{
- cl_buffer_map(mem->bo, 1);
+ cl_buffer_map(mem->bo, write);
assert(cl_buffer_get_virtual(mem->bo));
return cl_buffer_get_virtual(mem->bo);
}
}
LOCAL void*
-cl_mem_map_auto(cl_mem mem)
+cl_mem_map_auto(cl_mem mem, int write)
{
if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
return cl_mem_map_gtt(mem);
else
- return cl_mem_map(mem);
+ return cl_mem_map(mem, write);
}
LOCAL cl_int
const size_t, const size_t *, const size_t *);
/* Directly map a memory object */
-extern void *cl_mem_map(cl_mem);
+extern void *cl_mem_map(cl_mem, int);
/* Unmap a memory object */
extern cl_int cl_mem_unmap(cl_mem);
extern cl_int cl_mem_unmap_gtt(cl_mem);
/* Directly map a memory object - tiled images are mapped in GTT mode */
-extern void *cl_mem_map_auto(cl_mem);
+extern void *cl_mem_map_auto(cl_mem, int);
/* Unmap a memory object - tiled images are unmapped in GTT mode */
extern cl_int cl_mem_unmap_auto(cl_mem);