From 97dec3e93f14d2431352b1501b57fe21cee3b719 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Tue, 10 Jun 2014 12:53:12 +0800 Subject: [PATCH] Add the printf logic into the run time. Signed-off-by: Junyan He Reviewed-by: Zhigang Gong --- backend/src/gbe_bin_interpreter.cpp | 6 ++ src/cl_command_queue.c | 14 +++++ src/cl_command_queue_gen7.c | 24 ++++++++ src/cl_driver.h | 28 ++++++++++ src/cl_driver_defs.c | 7 +++ src/cl_gbe_loader.cpp | 25 +++++++++ src/intel/intel_gpgpu.c | 106 ++++++++++++++++++++++++++++++++++++ 7 files changed, 210 insertions(+) diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp index 64bf5c4..1945b5b 100644 --- a/backend/src/gbe_bin_interpreter.cpp +++ b/backend/src/gbe_bin_interpreter.cpp @@ -21,6 +21,7 @@ #include "sys/assert.cpp" #include "sys/platform.cpp" #include "ir/constant.cpp" +#include "ir/printf.cpp" #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" @@ -62,6 +63,11 @@ struct BinInterpCallBackInitializer gbe_kernel_get_image_data = gbe::kernelGetImageData; gbe_get_image_base_index = gbe::getImageBaseIndex; gbe_set_image_base_index = gbe::setImageBaseIndex; + gbe_get_printf_num = gbe::kernelGetPrintfNum; + gbe_dup_printfset = gbe::kernelDupPrintfSet; + gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize; + gbe_release_printf_info = gbe::kernelReleasePrintfSet; + gbe_output_printf = gbe::kernelOutputPrintf; } ~BinInterpCallBackInitializer() { diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index e6553ec..618be65 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -420,9 +420,23 @@ LOCAL cl_int cl_command_queue_flush(cl_command_queue queue) { GET_QUEUE_THREAD_GPGPU(queue); + size_t global_wk_sz[3]; + void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz); cl_gpgpu_flush(gpgpu); + if (printf_info && gbe_get_printf_num(printf_info)) { + void *index_addr = cl_gpgpu_map_printf_buffer(gpgpu, 0); + void *buf_addr = cl_gpgpu_map_printf_buffer(gpgpu, 1); + gbe_output_printf(printf_info, index_addr, buf_addr, global_wk_sz[0], + global_wk_sz[1], global_wk_sz[2]); + cl_gpgpu_unmap_printf_buffer(gpgpu, 0); + cl_gpgpu_unmap_printf_buffer(gpgpu, 1); + gbe_release_printf_info(printf_info); + global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0; + cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz); + } + cl_invalid_thread_gpgpu(queue); return CL_SUCCESS; } diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 3401baa..9680535 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -247,6 +247,19 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cl_gpgpu_get_cache_ctrl()); } +static void +cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) { + int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER; + int32_t offset = gbe_kernel_get_curbe_offset(ker->opaque, value, 0); + size_t buf_size = global_sz * sizeof(int) * printf_num; + cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset); + + value = GBE_CURBE_PRINTF_BUF_POINTER; + offset = gbe_kernel_get_curbe_offset(ker->opaque, value, 0); + buf_size = gbe_get_printf_sizeof_size(printf_info) * global_sz; + cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset); +} + LOCAL cl_int cl_command_queue_ND_range_gen7(cl_command_queue queue, cl_kernel ker, @@ -264,7 +277,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque); int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque); size_t thread_n = 0u; + int printf_num = 0; cl_int err = CL_SUCCESS; + size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2]; + void* printf_info = NULL; /* Setup kernel */ kernel.name = "KERNEL"; @@ -298,12 +314,20 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, } } + printf_info = gbe_dup_printfset(ker->opaque); + cl_gpgpu_set_printf_info(gpgpu, printf_info, (size_t *)global_wk_sz); + /* Setup the kernel */ if (queue->props & CL_QUEUE_PROFILING_ENABLE) cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 1); else cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 0); + printf_num = gbe_get_printf_num(printf_info); + if (printf_num) { + cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size); + } + /* Bind user buffers */ cl_command_queue_bind_surface(queue, ker); /* Bind user images */ diff --git a/src/cl_driver.h b/src/cl_driver.h index 2bca443..421027a 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -225,6 +225,34 @@ extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf; typedef void (cl_gpgpu_unref_batch_buf_cb)(void*); extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf; +/* Set the printf buffer */ +typedef void (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t); +extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer; + +/* get the printf buffer offset in the apeture*/ +typedef unsigned long (cl_gpgpu_reloc_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t); +extern cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer; + +/* map the printf buffer */ +typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu, uint32_t); +extern cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer; + +/* unmap the printf buffer */ +typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu, uint32_t); +extern cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer; + +/* release the printf buffer */ +typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu, uint32_t); +extern cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer; + +/* Set the last printfset pointer */ +typedef void (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *, size_t*); +extern cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info; + +/* Get the last printfset pointer */ +typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu, size_t*); +extern cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info; + /* Will spawn all threads */ typedef void (cl_gpgpu_walker_cb)(cl_gpgpu, uint32_t simd_sz, diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index ab3af49..3a9b9ed 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -86,4 +86,11 @@ LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL; LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL; LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL; +LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL; +LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL; +LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL; +LOCAL cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer = NULL; +LOCAL cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info = NULL; +LOCAL cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info = NULL; +LOCAL cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer = NULL; diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp index b1b75d6..38f9ab6 100644 --- a/src/cl_gbe_loader.cpp +++ b/src/cl_gbe_loader.cpp @@ -59,6 +59,11 @@ gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL; gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data = NULL; gbe_set_image_base_index_cb *gbe_set_image_base_index_interp = NULL; gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL; +gbe_get_printf_num_cb* gbe_get_printf_num = NULL; +gbe_dup_printfset_cb* gbe_dup_printfset = NULL; +gbe_get_printf_sizeof_size_cb* gbe_get_printf_sizeof_size = NULL; +gbe_release_printf_info_cb* gbe_release_printf_info = NULL; +gbe_output_printf_cb* gbe_output_printf = NULL; struct GbeLoaderInitializer { @@ -200,6 +205,26 @@ struct GbeLoaderInitializer if (gbe_get_image_base_index == NULL) return false; + gbe_get_printf_num = *(gbe_get_printf_num_cb**)dlsym(dlhInterp, "gbe_get_printf_num"); + if (gbe_get_printf_num == NULL) + return false; + + gbe_dup_printfset = *(gbe_dup_printfset_cb**)dlsym(dlhInterp, "gbe_dup_printfset"); + if (gbe_dup_printfset == NULL) + return false; + + gbe_get_printf_sizeof_size = *(gbe_get_printf_sizeof_size_cb**)dlsym(dlhInterp, "gbe_get_printf_sizeof_size"); + if (gbe_get_printf_sizeof_size == NULL) + return false; + + gbe_release_printf_info = *(gbe_release_printf_info_cb**)dlsym(dlhInterp, "gbe_release_printf_info"); + if (gbe_release_printf_info == NULL) + return false; + + gbe_output_printf = *(gbe_output_printf_cb**)dlsym(dlhInterp, "gbe_output_printf"); + if (gbe_output_printf == NULL) + return false; + return true; } diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index a1bd672..3ec0315 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -78,6 +78,9 @@ enum {max_sampler_n = 16 }; /* Handle GPGPU state */ struct intel_gpgpu { + void* ker_opaque; + size_t global_wk_sz[3]; + void* printf_info; intel_driver_t *drv; intel_batchbuffer_t *batch; cl_gpgpu_kernel *ker; @@ -97,6 +100,8 @@ struct intel_gpgpu struct { drm_intel_bo *bo; } scratch_b; struct { drm_intel_bo *bo; } constant_b; struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ + struct { drm_intel_bo *bo; + drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/ struct { drm_intel_bo *bo; } aux_buf; struct { @@ -155,6 +160,10 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu) return; if(gpgpu->time_stamp_b.bo) drm_intel_bo_unreference(gpgpu->time_stamp_b.bo); + if(gpgpu->printf_b.bo) + drm_intel_bo_unreference(gpgpu->printf_b.bo); + if(gpgpu->printf_b.ibo) + drm_intel_bo_unreference(gpgpu->printf_b.ibo); if (gpgpu->aux_buf.bo) drm_intel_bo_unreference(gpgpu->aux_buf.bo); if (gpgpu->perf_b.bo) @@ -567,6 +576,13 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, gpgpu->urb.size_cs_entry = size_cs_entry; gpgpu->max_threads = max_threads; + if (gpgpu->printf_b.ibo) + dri_bo_unreference(gpgpu->printf_b.ibo); + gpgpu->printf_b.ibo = NULL; + if (gpgpu->printf_b.bo) + dri_bo_unreference(gpgpu->printf_b.bo); + gpgpu->printf_b.bo = NULL; + /* Set the profile buffer*/ if(gpgpu->time_stamp_b.bo) dri_bo_unreference(gpgpu->time_stamp_b.bo); @@ -1209,6 +1225,90 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event, drm_intel_gem_bo_unmap_gtt(event->ts_buf); } +static void +intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset) +{ + drm_intel_bo *bo = NULL; + if (i == 0) { // the index buffer. + if (gpgpu->printf_b.ibo) + dri_bo_unreference(gpgpu->printf_b.ibo); + gpgpu->printf_b.ibo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf index buffer", size, 4096); + bo = gpgpu->printf_b.ibo; + } else if (i == 1) { + if (gpgpu->printf_b.bo) + dri_bo_unreference(gpgpu->printf_b.bo); + gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf output buffer", size, 4096); + bo = gpgpu->printf_b.bo; + } else + assert(0); + + drm_intel_bo_map(bo, 1); + memset(bo->virtual, 0, size); + drm_intel_bo_unmap(bo); + + intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, 0); +} + +static void* +intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i) +{ + drm_intel_bo *bo = NULL; + if (i == 0) { + bo = gpgpu->printf_b.ibo; + } else if (i == 1) { + bo = gpgpu->printf_b.bo; + } else + assert(0); + + drm_intel_bo_map(bo, 1); + return bo->virtual; +} + +static void +intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu, uint32_t i) +{ + drm_intel_bo *bo = NULL; + if (i == 0) { + bo = gpgpu->printf_b.ibo; + } else if (i == 1) { + bo = gpgpu->printf_b.bo; + } else + assert(0); + + drm_intel_bo_unmap(bo); +} + +static void +intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i) +{ + if (i == 0) { + drm_intel_bo_unreference(gpgpu->printf_b.ibo); + gpgpu->printf_b.ibo = NULL; + } else if (i == 1) { + drm_intel_bo_unreference(gpgpu->printf_b.bo); + gpgpu->printf_b.bo = NULL; + } else + assert(0); +} + +static void +intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info, size_t * global_sz) +{ + gpgpu->printf_info = printf_info; + gpgpu->global_wk_sz[0] = global_sz[0]; + gpgpu->global_wk_sz[1] = global_sz[1]; + gpgpu->global_wk_sz[2] = global_sz[2]; +} + +static void* +intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu, size_t * global_sz) +{ + global_sz[0] = gpgpu->global_wk_sz[0]; + global_sz[1] = gpgpu->global_wk_sz[1]; + global_sz[2] = gpgpu->global_wk_sz[2]; + return gpgpu->printf_info; +} + LOCAL void intel_set_gpgpu_callbacks(int device_id) { @@ -1239,6 +1339,12 @@ intel_set_gpgpu_callbacks(int device_id) cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp; cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf; cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf; + cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf; + cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf; + cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr; + cl_gpgpu_release_printf_buffer = (cl_gpgpu_release_printf_buffer_cb *)intel_gpgpu_release_printf_buf; + cl_gpgpu_set_printf_info = (cl_gpgpu_set_printf_info_cb *)intel_gpgpu_set_printf_info; + cl_gpgpu_get_printf_info = (cl_gpgpu_get_printf_info_cb *)intel_gpgpu_get_printf_info; if (IS_HASWELL(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75; -- 2.7.4