From 74252fdc7b3e211bcbe22e4411f313e978f25def Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Wed, 18 Jun 2014 10:01:15 +0800 Subject: [PATCH] cl/driver: fix the incorrect handling of 1D array. According to the bspec, the 1D array should be treated as a 3D like surface which has height 1. So we need to make sure the depth is the array_size. Thus the rt_view_extent's value should be always the same as the depth. According to the ocl spec, the 1D array firstly should be a 1D image rather than a 2D image. Thus we should access different lines according to the slice_pitch rather than the image_row_pitch. Signed-off-by: Zhigang Gong Reviewed-by: He Junyan --- backend/src/ocl_stdlib.tmpl.h | 2 +- src/cl_mem.c | 46 ++++++++++++++++++++++++++----------------- src/intel/intel_gpgpu.c | 20 ++++++------------- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 3b0b49a..f22e30f 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -4795,7 +4795,7 @@ DECL_IMAGE_INFO_COMMON(image1d_array_t) INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) { GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_height(surface_id); + return __gen_ocl_get_image_depth(surface_id); } #undef EXPEND_READ_COORD diff --git a/src/cl_mem.c b/src/cl_mem.c index 0806d35..491993e 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -513,6 +513,7 @@ static const uint32_t tilex_w = 512; /* tileX width in bytes */ static const uint32_t tilex_h = 8; /* tileX height in number of rows */ static const uint32_t tiley_w = 128; /* tileY width in bytes */ static const uint32_t tiley_h = 32; /* tileY height in number of rows */ +static const uint32_t valign = 2; /* vertical alignment is 2. */ cl_image_tiling_t cl_get_default_tiling(void) { @@ -551,7 +552,7 @@ _cl_mem_new_image(cl_context ctx, cl_int err = CL_SUCCESS; cl_mem mem = NULL; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; - size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h; + size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ @@ -579,21 +580,29 @@ _cl_mem_new_image(cl_context ctx, } while (0); if (UNLIKELY(w == 0)) DO_IMAGE_ERROR; - if (UNLIKELY(h == 0 && image_type != CL_MEM_OBJECT_IMAGE1D)) DO_IMAGE_ERROR; + if (UNLIKELY(h == 0 && (image_type != CL_MEM_OBJECT_IMAGE1D && + image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY))) + DO_IMAGE_ERROR; - if (image_type == CL_MEM_OBJECT_IMAGE1D) { + if (image_type == CL_MEM_OBJECT_IMAGE1D || + image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; - depth = 1; h = 1; + if (image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY) + depth = 1; + else if (data && slice_pitch == 0) + slice_pitch = pitch; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; + if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; + if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; + if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; tiling = CL_NO_TILE; - } else if (image_type == CL_MEM_OBJECT_IMAGE2D || - image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + } else if (image_type == CL_MEM_OBJECT_IMAGE2D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -606,12 +615,9 @@ _cl_mem_new_image(cl_context ctx, if (cl_driver_get_ver(ctx->drv) != 6) tiling = cl_get_default_tiling(); - if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) - tiling = CL_NO_TILE; - depth = 1; } else if (image_type == CL_MEM_OBJECT_IMAGE3D || - image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -637,7 +643,12 @@ _cl_mem_new_image(cl_context ctx, /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; - aligned_h = h; + if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || + image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || + image_type == CL_MEM_OBJECT_IMAGE3D) + aligned_h = ALIGN(h, valign); + else + aligned_h = h; } else if (tiling == CL_TILE_X) { aligned_pitch = ALIGN(w * bpp, tilex_w); aligned_h = ALIGN(h, tilex_h); @@ -662,9 +673,12 @@ _cl_mem_new_image(cl_context ctx, goto error; cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch); - aligned_slice_pitch = (image_type == CL_MEM_OBJECT_IMAGE1D || image_type == CL_MEM_OBJECT_IMAGE2D - || image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) - ? 0 : aligned_pitch * ALIGN(h, 2); + if (image_type == CL_MEM_OBJECT_IMAGE1D || + image_type == CL_MEM_OBJECT_IMAGE2D || + image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) + aligned_slice_pitch = 0; + else + aligned_slice_pitch = aligned_pitch * ALIGN(h, 2); cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt, intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling, @@ -832,10 +846,6 @@ cl_mem_new_image(cl_context context, image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, errcode_ret); case CL_MEM_OBJECT_IMAGE1D_ARRAY: - return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, - image_desc->image_width, image_desc->image_array_size, image_desc->image_depth, - image_desc->image_row_pitch, image_desc->image_slice_pitch, - host_ptr, errcode_ret); case CL_MEM_OBJECT_IMAGE2D_ARRAY: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_array_size, diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index bb944c9..d868a2f 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -763,6 +763,7 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, memset(ss, 0, sizeof(*ss)); + ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 ss->ss0.surface_type = intel_get_surface_type(type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; @@ -772,13 +773,8 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, ss->ss1.base_addr = obj_bo->offset; ss->ss2.width = w - 1; - if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { - ss->ss2.height = 1; - ss->ss3.depth = h - 1; - } else { - ss->ss2.height = h - 1; - ss->ss3.depth = depth - 1; - } + ss->ss2.height = h - 1; + ss->ss3.depth = depth - 1; ss->ss4.not_str_buf.rt_view_extent = depth - 1; ss->ss4.not_str_buf.min_array_element = 0; ss->ss3.pitch = pitch - 1; @@ -814,6 +810,7 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; memset(ss, 0, sizeof(*ss)); + ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 ss->ss0.surface_type = intel_get_surface_type(type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; @@ -822,13 +819,8 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, ss->ss0.surface_format = format; ss->ss1.base_addr = obj_bo->offset; ss->ss2.width = w - 1; - if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { - ss->ss2.height = 1; - ss->ss3.depth = h - 1; - } else { - ss->ss2.height = h - 1; - ss->ss3.depth = depth - 1; - } + ss->ss2.height = h - 1; + ss->ss3.depth = depth - 1; ss->ss4.not_str_buf.rt_view_extent = depth - 1; ss->ss4.not_str_buf.min_array_element = 0; ss->ss3.pitch = pitch - 1; -- 2.7.4