From a3e1a0eb52b82cd98b27afa3180fe1f95caf0941 Mon Sep 17 00:00:00 2001 From: bsegovia Date: Fri, 14 Oct 2011 04:41:19 +0000 Subject: [PATCH] First test now passes with images (format still hard-coded and constant patching is still missing) --- kernels/test_copy_image.cl | 11 +-- src/CMakeLists.txt | 1 + src/cl_command_queue.c | 26 ++++-- src/cl_image.c | 95 ++++++++++++++++++++++ src/cl_image.h | 31 ++++++++ src/cl_kernel.c | 48 ----------- src/cl_mem.c | 74 +---------------- src/intel/intel_gpgpu.c | 193 +++++++++++++++++++++++++++++++++------------ src/intel/intel_gpgpu.h | 17 +++- src/intel/intel_structs.h | 57 +++++-------- 10 files changed, 328 insertions(+), 225 deletions(-) create mode 100644 src/cl_image.c create mode 100644 src/cl_image.h diff --git a/kernels/test_copy_image.cl b/kernels/test_copy_image.cl index 1ccc581..9693e86 100644 --- a/kernels/test_copy_image.cl +++ b/kernels/test_copy_image.cl @@ -5,10 +5,11 @@ __kernel void test_copy_image(__read_only image2d_t src, __global uchar4 *dst) { - const int x = (int) get_global_id(0); - const int y = (int) get_global_id(1); - const int id = x + y * get_image_width(src); - const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y))); - dst[id] = from; + const int x = (int) get_global_id(0); + const int y = (int) get_global_id(1); + //const int id = x + y * get_image_width(src); + const int id = x + y * 32; + const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y))); + dst[id] = from; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a9b3f2..d1fff6c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ SET(OPENCL_SRC cl_kernel.c cl_program.c cl_event.c + cl_image.c cl_mem.c cl_platform_id.c cl_device_id.c diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 3f2a8e7..1faec2b 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -118,8 +118,8 @@ cl_command_queue_bind_surface(cl_command_queue queue, /* Bind user defined surface */ for (i = 0; i < k->arg_info_n; ++i) { - if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER) - continue; +// if (k->arg_info[i].type != OCLRT_ARG_TYPE_BUFFER) +// continue; assert(k->arg_info[i].offset % SURFACE_SZ == 0); index = k->arg_info[i].offset / SURFACE_SZ; mem = (cl_mem) k->args[k->arg_info[i].arg_index]; @@ -127,7 +127,18 @@ cl_command_queue_bind_surface(cl_command_queue queue, CHECK_MEM(mem); bo = mem->bo; assert(bo); - gpgpu_bind_buf(gpgpu, index, bo, 0, bo->size, cc_llc_mlc); + if (mem->is_image) +#define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB + gpgpu_bind_image2D(gpgpu, + index, + bo, + I965_SURFACEFORMAT_R8G8B8A8_UINT, + mem->w, + mem->h, + 4, + cc_llc_mlc); + else + gpgpu_bind_buf(gpgpu, index, bo, bo->size, cc_llc_mlc); } /* Allocate the constant surface (if any) */ @@ -135,7 +146,6 @@ cl_command_queue_bind_surface(cl_command_queue queue, assert(k->const_bo_index != MAX_SURFACES - 1); gpgpu_bind_buf(gpgpu, k->const_bo_index, k->const_bo, - 0, k->const_bo->size, cc_llc_mlc); } @@ -147,7 +157,7 @@ cl_command_queue_bind_surface(cl_command_queue queue, index = k->patch.local_surf.offset / SURFACE_SZ; assert(index != MAX_SURFACES - 1); *local = drm_intel_bo_alloc(bufmgr, "CL local surface", sz, 64); - gpgpu_bind_buf(gpgpu, index, *local, 0, sz, cc_llc_mlc); + gpgpu_bind_buf(gpgpu, index, *local, sz, cc_llc_mlc); } else if (local) *local = NULL; @@ -162,7 +172,7 @@ cl_command_queue_bind_surface(cl_command_queue queue, index = k->patch.private_surf.offset / SURFACE_SZ; assert(index != MAX_SURFACES - 1); *priv = drm_intel_bo_alloc(bufmgr, "CL private surface", sz, 64); - gpgpu_bind_buf(gpgpu, index, *priv, 0, sz, cc_llc_mlc); + gpgpu_bind_buf(gpgpu, index, *priv, sz, cc_llc_mlc); } else if(priv) *priv = NULL; @@ -177,14 +187,14 @@ cl_command_queue_bind_surface(cl_command_queue queue, assert(index != MAX_SURFACES - 1); index = k->patch.scratch.offset / SURFACE_SZ; *scratch = drm_intel_bo_alloc(bufmgr, "CL scratch surface", sz, 64); - gpgpu_bind_buf(gpgpu, index, *scratch, 0, sz, cc_llc_mlc); + gpgpu_bind_buf(gpgpu, index, *scratch, sz, cc_llc_mlc); } else if (scratch) *scratch = NULL; /* Now bind a bo used for synchronization */ sync_bo = drm_intel_bo_alloc(bufmgr, "sync surface", 64, 64); - gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, 0, 64, cc_llc_mlc); + gpgpu_bind_buf(gpgpu, MAX_SURFACES-1, sync_bo, 64, cc_llc_mlc); if (queue->last_batch != NULL) drm_intel_bo_unreference(queue->last_batch); queue->last_batch = sync_bo; diff --git a/src/cl_image.c b/src/cl_image.c new file mode 100644 index 0000000..7e6e700 --- /dev/null +++ b/src/cl_image.c @@ -0,0 +1,95 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +#include "cl_image.h" +#include "cl_utils.h" + +#include + +LOCAL cl_int +cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) +{ + assert(bpp); + + const uint32_t type = fmt->image_channel_data_type; + const uint32_t order = fmt->image_channel_order; + switch (type) { +#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE; + DECL_BPP(CL_SNORM_INT8, 1); break; + DECL_BPP(CL_SNORM_INT16, 2); break; + DECL_BPP(CL_UNORM_INT8, 1); break; + DECL_BPP(CL_UNORM_INT16, 2); break; + DECL_BPP(CL_UNORM_SHORT_565, 2); + if (order != CL_RGBx && order != CL_RGB) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + DECL_BPP(CL_UNORM_SHORT_555, 2); + if (order != CL_RGBx && order != CL_RGB) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + DECL_BPP(CL_UNORM_INT_101010, 4); + if (order != CL_RGBx && order != CL_RGB) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + DECL_BPP(CL_SIGNED_INT8, 1); break; + DECL_BPP(CL_SIGNED_INT16, 2); break; + DECL_BPP(CL_SIGNED_INT32, 4); break; + DECL_BPP(CL_UNSIGNED_INT8, 1); break; + DECL_BPP(CL_UNSIGNED_INT16, 2); break; + DECL_BPP(CL_UNSIGNED_INT32, 4); break; + DECL_BPP(CL_HALF_FLOAT, 2); break; + DECL_BPP(CL_FLOAT, 4); break; +#undef DECL_BPP + default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + }; + + switch (order) { + case CL_R: break; + case CL_A: break; + case CL_RA: *bpp *= 2; break; + case CL_RG: *bpp *= 2; break; + case CL_Rx: *bpp *= 2; break; + case CL_INTENSITY: + case CL_LUMINANCE: + if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && + type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && + type != CL_HALF_FLOAT && type != CL_FLOAT) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + case CL_RGB: + case CL_RGBx: + if (type != CL_UNORM_SHORT_555 && + type != CL_UNORM_SHORT_565 && + type != CL_UNORM_INT_101010) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + case CL_RGBA: *bpp *= 4; break; + case CL_ARGB: + case CL_BGRA: + if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 && + type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + *bpp *= 4; + break; + default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + }; + + return CL_SUCCESS; +} + diff --git a/src/cl_image.h b/src/cl_image.h new file mode 100644 index 0000000..923bf1d --- /dev/null +++ b/src/cl_image.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +#ifndef __CL_IMAGE_H__ +#define __CL_IMAGE_H__ + +#include "cl_internals.h" +#include "CL/cl.h" +#include + +/* Compute the number of bytes per pixel if the format is supported */ +extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp); + +#endif /* __CL_IMAGE_H__ */ + diff --git a/src/cl_kernel.c b/src/cl_kernel.c index c199786..2d19b04 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -451,54 +451,6 @@ error: } #undef ASSOC_ITEM -typedef struct i965_sampler_state -{ - struct - { - uint32_t shadow_function:3; - uint32_t lod_bias:11; - uint32_t min_filter:3; - uint32_t mag_filter:3; - uint32_t mip_filter:2; - uint32_t base_level:5; - uint32_t min_mag_neq:1; - uint32_t lod_preclamp:1; - uint32_t default_color_mode:1; - uint32_t pad0:1; - uint32_t disable:1; - } ss0; - - struct - { - uint32_t r_wrap_mode:3; - uint32_t t_wrap_mode:3; - uint32_t s_wrap_mode:3; - uint32_t cube_control_mode:1; - uint32_t pad:2; - uint32_t max_lod:10; - uint32_t min_lod:10; - } ss1; - - - struct - { - uint32_t pad:5; - uint32_t default_color_pointer:27; - } ss2; - - struct - { - uint32_t non_normalized_coord:1; - uint32_t pad:12; - uint32_t address_round:6; - uint32_t max_aniso:3; - uint32_t chroma_key_mode:1; - uint32_t chroma_key_index:2; - uint32_t chroma_key_enable:1; - uint32_t monochrome_filter_width:3; - uint32_t monochrome_filter_height:3; - } ss3; -} i965_sampler_state_t; LOCAL int cl_kernel_setup(cl_kernel k, const char *ker) diff --git a/src/cl_mem.c b/src/cl_mem.c index 7151df7..f2fad25 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -18,6 +18,7 @@ */ #include "cl_mem.h" +#include "cl_image.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" @@ -123,77 +124,6 @@ error: goto exit; } -static cl_int -cl_mem_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) -{ - assert(bpp); - - const uint32_t type = fmt->image_channel_data_type; - const uint32_t order = fmt->image_channel_order; - switch (type) { -#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE; - DECL_BPP(CL_SNORM_INT8, 1); break; - DECL_BPP(CL_SNORM_INT16, 2); break; - DECL_BPP(CL_UNORM_INT8, 1); break; - DECL_BPP(CL_UNORM_INT16, 2); break; - DECL_BPP(CL_UNORM_SHORT_565, 2); - if (order != CL_RGBx && order != CL_RGB) - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - break; - DECL_BPP(CL_UNORM_SHORT_555, 2); - if (order != CL_RGBx && order != CL_RGB) - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - break; - DECL_BPP(CL_UNORM_INT_101010, 4); - if (order != CL_RGBx && order != CL_RGB) - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - break; - DECL_BPP(CL_SIGNED_INT8, 1); break; - DECL_BPP(CL_SIGNED_INT16, 2); break; - DECL_BPP(CL_SIGNED_INT32, 4); break; - DECL_BPP(CL_UNSIGNED_INT8, 1); break; - DECL_BPP(CL_UNSIGNED_INT16, 2); break; - DECL_BPP(CL_UNSIGNED_INT32, 4); break; - DECL_BPP(CL_HALF_FLOAT, 2); break; - DECL_BPP(CL_FLOAT, 4); break; -#undef DECL_BPP - default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - }; - - switch (order) { - case CL_R: break; - case CL_A: break; - case CL_RA: *bpp *= 2; break; - case CL_RG: *bpp *= 2; break; - case CL_Rx: *bpp *= 2; break; - case CL_INTENSITY: - case CL_LUMINANCE: - if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && - type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && - type != CL_HALF_FLOAT && type != CL_FLOAT) - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - break; - case CL_RGB: - case CL_RGBx: - if (type != CL_UNORM_SHORT_555 && - type != CL_UNORM_SHORT_565 && - type != CL_UNORM_INT_101010) - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - break; - case CL_RGBA: *bpp *= 4; break; - case CL_ARGB: - case CL_BGRA: - if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 && - type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8) - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - *bpp *= 4; - break; - default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - }; - - return CL_SUCCESS; -} - LOCAL cl_mem cl_mem_new_image2D(cl_context ctx, cl_mem_flags flags, @@ -216,7 +146,7 @@ cl_mem_new_image2D(cl_context ctx, } /* Get the size of each pixel */ - if (UNLIKELY((err = cl_mem_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) + if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) goto error; /* See if the user parameters match */ diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index f9e7db1..9a8a63f 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -82,10 +82,10 @@ struct intel_gpgpu struct { uint32_t num_cs_entries; - uint32_t size_cs_entry; /* size of one entry in 512bit elements */ + uint32_t size_cs_entry; /* size of one entry in 512bit elements */ } urb; - uint32_t max_threads; /* max threads requested by the user */ + uint32_t max_threads; /* max threads requested by the user */ }; /* Be sure that the size is still valid */ @@ -467,68 +467,161 @@ gpgpu_state_init(intel_gpgpu_t *state, dri_bo_unreference(state->sampler_state_b.bo); bo = dri_bo_alloc(state->drv->bufmgr, "sample states", - //MAX_SAMPLERS * sizeof(struct i965_sampler_state), - MAX_SAMPLERS * 16, + MAX_SAMPLERS * sizeof(struct gen6_sampler_state), 32); assert(bo); state->sampler_state_b.bo = bo; memset(state->samplers, 0, sizeof(state->samplers)); } +static void +gpgpu_set_buf_reloc_gen6(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo) +{ + surface_heap_t *heap = state->surface_heap_b.bo->virtual; + heap->binding_table[index] = offsetof(surface_heap_t, surface) + + index * sizeof(gen6_surface_state_t); + dri_bo_emit_reloc(state->surface_heap_b.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + heap->binding_table[index] + + offsetof(gen6_surface_state_t, ss1), + obj_bo); +} + +static void +gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo) +{ + surface_heap_t *heap = state->surface_heap_b.bo->virtual; + heap->binding_table[index] = offsetof(surface_heap_t, surface) + + index * sizeof(gen7_surface_state_t); + dri_bo_emit_reloc(state->surface_heap_b.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0, + heap->binding_table[index] + + offsetof(gen7_surface_state_t, ss1), + obj_bo); +} + +static void +gpgpu_bind_buf_gen6(intel_gpgpu_t *state, + int32_t index, + dri_bo* obj_bo, + uint32_t size, + uint32_t cchint) +{ + surface_heap_t *heap = state->surface_heap_b.bo->virtual; + gen6_surface_state_t *ss = (gen6_surface_state_t *) heap->surface[index]; + const uint32_t size_ss = ((size+0xf) >> 4) - 1; /* ceil(size/16) - 1 */ + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_BUFFER; + ss->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_FLOAT; + ss->ss1.base_addr = obj_bo->offset; + ss->ss2.width = size_ss & 0x7f; /* bits 6:0 of size_ss */ + ss->ss2.height = (size_ss >> 7) & 0x1fff; /* bits 19:7 of size_ss */ + ss->ss3.depth = size_ss >> 20; /* bits 26:20 of size_ss */ + ss->ss3.pitch = 0xf; /* sizeof(RGBA32) - 1 */; + ss->ss5.cache_control = cchint; + gpgpu_set_buf_reloc_gen6(state, index, obj_bo); +} + +static void +gpgpu_bind_buf_gen7(intel_gpgpu_t *state, + int32_t index, + dri_bo* obj_bo, + uint32_t size, + uint32_t cchint) +{ + surface_heap_t *heap = state->surface_heap_b.bo->virtual; + gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; + const uint32_t size_ss = size - 1; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_BUFFER; + ss->ss0.surface_format = I965_SURFACEFORMAT_RAW; + ss->ss1.base_addr = obj_bo->offset; + ss->ss2.width = size_ss & 0x7f; /* bits 6:0 of size_ss */ + ss->ss2.height = (size_ss & 0x1fff80) >> 7; /* bits 20:7 of size_ss */ + ss->ss3.depth = (size_ss & 0xffe00000) >> 20; /* bits 27:21 of size_ss */ + ss->ss5.surface_object_control_state = GEN7_CACHED_IN_LLC; + gpgpu_set_buf_reloc_gen7(state, index, obj_bo); +} + +static void +gpgpu_bind_image2D_gen6(intel_gpgpu_t *state, + int32_t index, + dri_bo* obj_bo, + uint32_t format, + int32_t w, + int32_t h, + int bpp, + uint32_t cchint) +{ + surface_heap_t *heap = state->surface_heap_b.bo->virtual; + gen6_surface_state_t *ss = (gen6_surface_state_t *) heap->surface[index]; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + ss->ss1.base_addr = obj_bo->offset; + ss->ss2.width = w - 1; + ss->ss2.height = h - 1; + ss->ss3.pitch = w*bpp - 1; + ss->ss5.cache_control = cchint; + gpgpu_set_buf_reloc_gen6(state, index, obj_bo); +} + +static void +gpgpu_bind_image2D_gen7(intel_gpgpu_t *state, + int32_t index, + dri_bo* obj_bo, + uint32_t format, + int32_t w, + int32_t h, + int bpp, + uint32_t cchint) +{ + surface_heap_t *heap = state->surface_heap_b.bo->virtual; + gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + ss->ss1.base_addr = obj_bo->offset; + ss->ss2.width = w - 1; + ss->ss2.height = h - 1; + ss->ss3.pitch = w*bpp - 1; + //ss->ss5.cache_control = cchint; + gpgpu_set_buf_reloc_gen7(state, index, obj_bo); +} + LOCAL void gpgpu_bind_buf(intel_gpgpu_t *state, int32_t index, dri_bo* obj_bo, - uint32_t offset, uint32_t size, uint32_t cchint) { - assert(offset < MAX_SURFACES); - surface_heap_t *heap = state->surface_heap_b.bo->virtual; - - if(state->drv->gen_ver == 6) { - gen6_surface_state_t *ss = (gen6_surface_state_t *) heap->surface[index]; - const uint32_t size_ss = ((size+0xf) >> 4) - 1; /* ceil(size/16) - 1 */ - memset(ss, 0, sizeof(*ss)); - ss->ss0.surface_type = I965_SURFACE_BUFFER; - ss->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_FLOAT; - ss->ss1.base_addr = obj_bo->offset + offset; - ss->ss2.width = size_ss & 0x7f; /* bits 6:0 of size_ss */ - ss->ss2.height = (size_ss >> 7) & 0x1fff; /* bits 19:7 of size_ss */ - ss->ss3.depth = size_ss >> 20; /* bits 26:20 of size_ss */ - ss->ss3.pitch = 0xf; /* sizeof(RGBA32) - 1 */; - ss->ss5.cache_control = cchint; - heap->binding_table[index] = offsetof(surface_heap_t, surface) + - index * sizeof(gen6_surface_state_t); - dri_bo_emit_reloc(state->surface_heap_b.bo, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - offset, - heap->binding_table[index] + - offsetof(gen6_surface_state_t, ss1), - obj_bo); - } else if (state->drv->gen_ver == 7) { - gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; - const uint32_t size_ss = size - 1; - memset(ss, 0, sizeof(*ss)); - ss->ss0.surface_type = I965_SURFACE_BUFFER; - ss->ss0.surface_format = I965_SURFACEFORMAT_RAW; - ss->ss1.base_addr = obj_bo->offset + offset; - ss->ss2.width = size_ss & 0x7f; /* bits 6:0 of size_ss */ - ss->ss2.height = (size_ss & 0x1fff80) >> 7; /* bits 20:7 of size_ss */ - ss->ss3.depth = (size_ss & 0xffe00000) >> 20; /* bits 27:21 of size_ss */ - ss->ss5.surface_object_control_state = GEN7_CACHED_IN_LLC; - heap->binding_table[index] = offsetof(surface_heap_t, surface) + - index * sizeof(gen7_surface_state_t); - dri_bo_emit_reloc(state->surface_heap_b.bo, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - offset, - heap->binding_table[index] + - offsetof(gen7_surface_state_t, ss1), - obj_bo); - } + assert(index < MAX_SURFACES); + if(state->drv->gen_ver == 6) + gpgpu_bind_buf_gen6(state, index, obj_bo, size, cchint); + else if (state->drv->gen_ver == 7) + gpgpu_bind_buf_gen7(state, index, obj_bo, size, cchint); +} +LOCAL void +gpgpu_bind_image2D(intel_gpgpu_t *state, + int32_t index, + dri_bo* obj_bo, + uint32_t format, + int32_t w, + int32_t h, + int bpp, + uint32_t cchint) +{ + assert(index < MAX_SURFACES); + if(state->drv->gen_ver == 6) + gpgpu_bind_image2D_gen6(state, index, obj_bo, format, w, h, bpp, cchint); + else if (state->drv->gen_ver == 7) + gpgpu_bind_image2D_gen7(state, index, obj_bo, format, w, h, bpp, cchint); } static void diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h index 3ab7274..47f23a8 100644 --- a/src/intel/intel_gpgpu.h +++ b/src/intel/intel_gpgpu.h @@ -25,6 +25,9 @@ #include #include +#define MAX_SURFACES 128 +#define MAX_SAMPLERS 16 + enum gen6_cache_control { cc_gtt = 0x0, /* don't use L3, use GTT for LLC */ cc_mlc = 0x1, /* IVB: use L3, use GTT for LLC; SNB: UC */ @@ -32,9 +35,6 @@ enum gen6_cache_control { cc_llc_mlc = 0x3, }; -#define MAX_SURFACES 128 -#define MAX_SAMPLERS 16 - /* Use this structure to bind kernels in the gpgpu state */ typedef struct genx_gpgpu_kernel { const char *name; /* kernel name and bo name */ @@ -71,10 +71,19 @@ extern int32_t intel_gpgpu_version(intel_gpgpu_t*); extern void gpgpu_bind_buf(intel_gpgpu_t*, int32_t index, struct _drm_intel_bo* obj_bo, - uint32_t offset, uint32_t size, uint32_t cchint); +/* Set a 2d texture */ +extern void gpgpu_bind_image2D(intel_gpgpu_t *state, + int32_t index, + struct _drm_intel_bo* obj_bo, + uint32_t format, + int32_t w, + int32_t h, + int bpp, + uint32_t cchint); + /* Configure state, size in 512-bit units */ extern void gpgpu_state_init(intel_gpgpu_t*, uint32_t max_threads, uint32_t size_cs_entry); diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index 134b270..a455bdf 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -17,8 +17,8 @@ * Author: Benjamin Segovia */ -#ifndef __GENX_STRUCTS_H__ -#define __GENX_STRUCTS_H__ +#ifndef __INTEL_STRUCTS_H__ +#define __INTEL_STRUCTS_H__ #include @@ -70,13 +70,8 @@ typedef struct gen6_interface_descriptor uint32_t barrier_return_grf_offset:8; } desc5; - struct { - uint32_t reserved_mbz; - } desc6; - - struct { - uint32_t reserved_mbz; - } desc7; + uint32_t desc6; + uint32_t desc7; } gen6_interface_descriptor_t; typedef struct gen6_surface_state @@ -206,7 +201,6 @@ typedef struct gen7_surface_state uint32_t ss6; /* unused */ uint32_t ss7; /* unused */ - } gen7_surface_state_t; STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t)); @@ -275,8 +269,7 @@ typedef struct gen6_vfe_state_inline typedef struct gen6_pipe_control { - struct - { + struct { uint32_t length : BITFIELD_RANGE(0, 7); uint32_t reserved : BITFIELD_RANGE(8, 15); uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23); @@ -285,8 +278,7 @@ typedef struct gen6_pipe_control uint32_t instruction_type : BITFIELD_RANGE(29, 31); } dw0; - struct - { + struct { uint32_t depth_cache_flush_enable : BITFIELD_BIT(0); uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1); uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2); @@ -312,23 +304,20 @@ typedef struct gen6_pipe_control uint32_t reserved : BITFIELD_RANGE(23, 31); } dw1; - struct - { + struct { uint32_t reserved : BITFIELD_RANGE(0, 1); uint32_t destination_address_type : BITFIELD_BIT(2); uint32_t address : BITFIELD_RANGE(3, 31); } dw2; - struct - { + struct { uint64_t data; } qw0; } gen6_pipe_control_t; -#if 0 + typedef struct gen6_sampler_state { - struct - { + struct { uint32_t shadow_function:3; uint32_t lod_bias:11; uint32_t min_filter:3; @@ -342,8 +331,7 @@ typedef struct gen6_sampler_state uint32_t disable:1; } ss0; - struct - { + struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; @@ -353,15 +341,12 @@ typedef struct gen6_sampler_state uint32_t min_lod:10; } ss1; - - struct - { + struct { uint32_t pad:5; uint32_t default_color_pointer:27; } ss2; - struct - { + struct { uint32_t non_normalized_coord:1; uint32_t pad:12; uint32_t address_round:6; @@ -373,12 +358,10 @@ typedef struct gen6_sampler_state uint32_t monochrome_filter_height:3; } ss3; } gen6_sampler_state_t; -#endif typedef struct gen7_sampler_state { - struct - { + struct { uint32_t aniso_algorithm:1; uint32_t lod_bias:13; uint32_t min_filter:3; @@ -392,8 +375,7 @@ typedef struct gen7_sampler_state uint32_t disable:1; } ss0; - struct - { + struct { uint32_t cube_control_mode:1; uint32_t shadow_function:3; uint32_t pad:4; @@ -401,14 +383,12 @@ typedef struct gen7_sampler_state uint32_t min_lod:12; } ss1; - struct - { + struct { uint32_t pad:5; uint32_t default_color_pointer:27; } ss2; - struct - { + struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; @@ -427,4 +407,5 @@ typedef struct gen7_sampler_state #undef BITFIELD_BIT #undef BITFIELD_RANGE -#endif /* __GENX_STRUCTS_H__ */ +#endif /* __INTEL_STRUCTS_H__ */ + -- 2.7.4