From: Yang Rong Date: Mon, 22 Apr 2013 05:11:51 +0000 (+0800) Subject: Add constant pointer as argument support in runtime. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c6d7b5f92c824b892f509993cddf629572e9c299;p=contrib%2Fbeignet.git Add constant pointer as argument support in runtime. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong --- diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index a22884f..7d604c3 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -1,4 +1,4 @@ -/* +/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or @@ -108,7 +108,6 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) uint32_t offset; // location of the address in the curbe arg_type = gbe_kernel_get_arg_type(k->opaque, i); if (arg_type != GBE_ARG_GLOBAL_PTR && - arg_type != GBE_ARG_CONSTANT_PTR && arg_type != GBE_ARG_IMAGE && arg_type != GBE_ARG_SAMPLER) continue; @@ -129,6 +128,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) return CL_SUCCESS; } +LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, + char * dst) +{ + int i; + for(i = 0; i < k->arg_n; i++) { + enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i); + + if(arg_type == GBE_ARG_CONSTANT_PTR) { + uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER); + cl_mem mem = k->args[i].mem; + cl_buffer_map(mem->bo, 1); + void * addr = cl_buffer_get_virtual(mem->bo); + memcpy(dst + offset, addr, mem->size); + cl_buffer_unmap(mem->bo); + } + } + return CL_SUCCESS; +} + #if USE_FULSIM extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr); extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*); diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index 6387ae1..dcfc8c4 100644 --- a/src/cl_command_queue.h +++ b/src/cl_command_queue.h @@ -70,5 +70,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue); /* Bind all the surfaces in the GPGPU state */ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel); +/*update constant buffer to final curbe */ +extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst); #endif /* __CL_COMMAND_QUEUE_H__ */ diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 3a590bc..9402549 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -186,7 +186,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, char *final_curbe = NULL; /* Includes them and one sub-buffer per group */ cl_gpgpu_kernel kernel; const uint32_t simd_sz = cl_kernel_get_simd_width(ker); - size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz; + size_t i, batch_sz = 0u, local_sz = 0u; + size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque); size_t thread_n = 0u; cl_int err = CL_SUCCESS; @@ -224,8 +225,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, if (ker->curbe) { assert(cst_sz > 0); TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz)); - for (i = 0; i < thread_n; ++i) + for (i = 0; i < thread_n; ++i) { memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz); + cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i); + } TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n); cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz); } diff --git a/src/cl_kernel.c b/src/cl_kernel.c index bbd4438..ec0e2e8 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -1,4 +1,4 @@ -/* +/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or @@ -154,6 +154,17 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image) || (arg_type != GBE_ARG_IMAGE && mem->is_image))) return CL_INVALID_ARG_VALUE; + + if(arg_type == GBE_ARG_CONSTANT_PTR) { + int32_t cbOffset; + cbOffset = gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size); + //constant ptr's curbe offset changed, update it + if(cbOffset >= 0) { + offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); + *((uint32_t *)(k->curbe + offset)) = cbOffset; //cb offset in curbe + } + } + cl_mem_add_ref(mem); if (k->args[index].mem) cl_mem_delete(k->args[index].mem); @@ -177,6 +188,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque) cl_context ctx = k->program->ctx; cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx); + if(k->bo != NULL) + cl_buffer_unreference(k->bo); + /* Allocate the gen code here */ const uint32_t code_sz = gbe_kernel_get_code_size(opaque); const char *code = gbe_kernel_get_code(opaque); diff --git a/src/cl_mem.c b/src/cl_mem.c index 4bbaee4..ab9cc9a 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -141,6 +141,7 @@ cl_mem_allocate(cl_context ctx, err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } + mem->size = sz; /* Append the buffer in the context buffer list */ pthread_mutex_lock(&ctx->buffer_lock); diff --git a/src/cl_mem.h b/src/cl_mem.h index 836deb4..a3a1547 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -36,6 +36,7 @@ struct _cl_mem { uint64_t magic; /* To identify it as a memory object */ volatile int ref_n; /* This object is reference counted */ cl_buffer bo; /* Data in GPU memory */ + size_t size; /* original request size, not alignment size, used in constant buffer */ cl_mem prev, next; /* We chain the memory buffers together */ cl_context ctx; /* Context it belongs to */ cl_mem_flags flags; /* Flags specified at the creation time */