From 6bc1455241ce68c15f1ed8d5ca7230fac83d040d Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 7 May 2014 18:03:10 +0800 Subject: [PATCH] Make the surface typed write work for HSW 1.Modify the typed write for state write using GEN_SFID_DATAPORT_DATA_CACHE. 2.Add the channel select for surface state setting. 3.Correct the send message for setting slot in send description. Signed-off-by: Junyan He Reviewed-by: Yang Rong --- backend/src/backend/gen75_encoder.cpp | 117 ++++++++++++++++++++++++++++++++++ backend/src/backend/gen75_encoder.hpp | 4 ++ backend/src/backend/gen_encoder.hpp | 14 ++-- src/intel/intel_defines.h | 7 ++ src/intel/intel_driver.c | 2 +- src/intel/intel_gpgpu.c | 74 +++++++++++++++------ src/intel/intel_gpgpu.h | 2 +- src/intel/intel_structs.h | 11 +++- 8 files changed, 202 insertions(+), 29 deletions(-) diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp index ede9d55..d1a8542 100644 --- a/backend/src/backend/gen75_encoder.cpp +++ b/backend/src/backend/gen75_encoder.cpp @@ -27,8 +27,40 @@ #include "backend/gen75_encoder.hpp" +static const uint32_t untypedRWMask[] = { + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED, + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN, + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE, + GEN_UNTYPED_ALPHA, + 0 +}; + namespace gbe { + void Gen75Encoder::setHeader(GenNativeInstruction *insn) { + if (this->curr.execWidth == 8) + insn->header.execution_size = GEN_WIDTH_8; + else if (this->curr.execWidth == 16) + insn->header.execution_size = GEN_WIDTH_16; + else if (this->curr.execWidth == 1) + insn->header.execution_size = GEN_WIDTH_1; + else if (this->curr.execWidth == 4) + insn->header.execution_size = GEN_WIDTH_4; + else + NOT_IMPLEMENTED; + insn->header.acc_wr_control = this->curr.accWrEnable; + insn->header.quarter_control = this->curr.quarterControl; + insn->bits1.ia1.nib_ctrl = this->curr.nibControl; + insn->header.mask_control = this->curr.noMask; + insn->bits2.ia1.flag_reg_nr = this->curr.flag; + insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag; + if (this->curr.predicate != GEN_PREDICATE_NONE) { + insn->header.predicate_control = this->curr.predicate; + insn->header.predicate_inverse = this->curr.inversePredicate; + } + insn->header.saturate = this->curr.saturate; + } + void Gen75Encoder::setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba, @@ -60,4 +92,89 @@ namespace gbe /* Always using the low 8 slots here. */ insn->bits3.gen7_typed_rw.slot = 1; } + + void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + uint32_t msg_length = 0; + uint32_t response_length = 0; + + if (this->curr.execWidth == 8) { + msg_length = srcNum; + response_length = 1; + } else if (this->curr.execWidth == 16) { + msg_length = 2 * srcNum; + response_length = 2; + } else + NOT_IMPLEMENTED; + + this->setHeader(insn); + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA_CACHE; + setMessageDescriptor(insn, sfid, msg_length, response_length); + insn->bits3.gen7_atomic_op.msg_type = GEN75_P1_TYPED_ATOMIC_OP; + insn->bits3.gen7_atomic_op.bti = bti; + insn->bits3.gen7_atomic_op.return_data = 1; + insn->bits3.gen7_atomic_op.aop_type = function; + + if (this->curr.execWidth == 8) + insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD8; + else if (this->curr.execWidth == 16) + insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD16; + else + NOT_SUPPORTED; + } + + void Gen75Encoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + assert(elemNum >= 1 || elemNum <= 4); + uint32_t msg_length = 0; + uint32_t response_length = 0; + if (this->curr.execWidth == 8) { + msg_length = 1; + response_length = elemNum; + } else if (this->curr.execWidth == 16) { + msg_length = 2; + response_length = 2 * elemNum; + } else + NOT_IMPLEMENTED; + + this->setHeader(insn); + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + setDPUntypedRW(insn, + bti, + untypedRWMask[elemNum], + GEN75_P1_UNTYPED_READ, + msg_length, + response_length); + } + + void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + assert(elemNum >= 1 || elemNum <= 4); + uint32_t msg_length = 0; + uint32_t response_length = 0; + this->setHeader(insn); + if (this->curr.execWidth == 8) { + this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); + msg_length = 1 + elemNum; + } else if (this->curr.execWidth == 16) { + this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + msg_length = 2 * (1 + elemNum); + } + else + NOT_IMPLEMENTED; + this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + setDPUntypedRW(insn, + bti, + untypedRWMask[elemNum], + GEN75_P1_UNTYPED_SURFACE_WRITE, + msg_length, + response_length); + } } /* End of the name space. */ diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp index 53db3a7..a107202 100644 --- a/backend/src/backend/gen75_encoder.hpp +++ b/backend/src/backend/gen75_encoder.hpp @@ -34,6 +34,10 @@ namespace gbe Gen75Encoder(uint32_t simdWidth, uint32_t gen, uint32_t deviceID) : GenEncoder(simdWidth, gen, deviceID, 8) { }; + virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); + virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void setHeader(GenNativeInstruction *insn); virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba, uint32_t msg_type, uint32_t msg_length, uint32_t response_length); virtual void setTypedWriteMessage(GenNativeInstruction *insn, unsigned char bti, diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 31d7f4e..d44d323 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -161,15 +161,15 @@ namespace gbe /*! Wait instruction (used for the barrier) */ void WAIT(void); /*! Atomic instructions */ - void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); + virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); /*! Read 64-bits float/int arrays */ void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum); /*! Write 64-bits float/int arrays */ void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar); /*! Untyped read (upto 4 channels) */ - void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); /*! Untyped write (upto 4 channels) */ - void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); /*! Byte gather (for unaligned bytes, shorts and ints) */ void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize); /*! Byte scatter (for unaligned bytes, shorts and ints) */ @@ -193,9 +193,9 @@ namespace gbe bool isLD); /*! TypedWrite instruction for texture */ - void TYPED_WRITE(GenRegister header, - bool header_present, - unsigned char bti); + virtual void TYPED_WRITE(GenRegister header, + bool header_present, + unsigned char bti); /*! Extended math function (2 sources) */ void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1); /*! Extended math function (1 source) */ @@ -207,6 +207,7 @@ namespace gbe //////////////////////////////////////////////////////////////////////// // Helper functions to encode //////////////////////////////////////////////////////////////////////// + virtual void setHeader(GenNativeInstruction *insn); virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba, uint32_t msg_type, uint32_t msg_length, uint32_t response_length); @@ -216,7 +217,6 @@ namespace gbe void setMessageDescriptor(GenNativeInstruction *inst, enum GenMessageTarget sfid, unsigned msg_length, unsigned response_length, bool header_present = false, bool end_of_thread = false); - void setHeader(GenNativeInstruction *insn); void setDst(GenNativeInstruction *insn, GenRegister dest); void setSrc0(GenNativeInstruction *insn, GenRegister reg); void setSrc1(GenNativeInstruction *insn, GenRegister reg); diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index e5015ec..5139e43 100644 --- a/src/intel/intel_defines.h +++ b/src/intel/intel_defines.h @@ -288,6 +288,13 @@ #define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_YMAJOR 1 +#define I965_SURCHAN_SELECT_ZERO 0 +#define I965_SURCHAN_SELECT_ONE 1 +#define I965_SURCHAN_SELECT_RED 4 +#define I965_SURCHAN_SELECT_GREEN 5 +#define I965_SURCHAN_SELECT_BLUE 6 +#define I965_SURCHAN_SELECT_ALPHA 7 + #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 7fd2bf3..ef97835 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -687,5 +687,5 @@ intel_setup_callbacks(void) cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata; cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering; cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime; - intel_set_gpgpu_callbacks(); + intel_set_gpgpu_callbacks(intel_get_device_id()); } diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index fbeef11..3d6fd30 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -629,6 +629,53 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; + + assert(index < GEN_MAX_SURFACES); +} + +static void +intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, + uint32_t index, + dri_bo* obj_bo, + uint32_t obj_bo_offset, + uint32_t format, + cl_mem_object_type type, + int32_t w, + int32_t h, + int32_t depth, + int32_t pitch, + int32_t tiling) +{ + surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; + gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; + memset(ss, 0, sizeof(*ss)); + + ss->ss0.surface_type = intel_get_surface_type(type); + ss->ss0.surface_format = format; + ss->ss1.base_addr = obj_bo->offset; + ss->ss2.width = w - 1; + ss->ss2.height = h - 1; + ss->ss3.depth = depth - 1; + ss->ss4.not_str_buf.rt_view_extent = depth - 1; + ss->ss4.not_str_buf.min_array_element = 0; + ss->ss3.pitch = pitch - 1; + ss->ss5.cache_control = cc_llc_l3; + ss->ss7.shader_r = I965_SURCHAN_SELECT_RED; + ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN; + ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE; + ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA; + if (tiling == GPGPU_TILE_X) { + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + } else if (tiling == GPGPU_TILE_Y) { + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + } + ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ + intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); + gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; + + assert(index < GEN_MAX_SURFACES); } static void @@ -668,23 +715,6 @@ intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint } static void -intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu, - uint32_t index, - cl_buffer *obj_bo, - uint32_t obj_bo_offset, - uint32_t format, - cl_mem_object_type type, - int32_t w, - int32_t h, - int32_t depth, - int32_t pitch, - cl_gpgpu_tiling tiling) -{ - intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, obj_bo_offset, format, type, w, h, depth, pitch, tiling); - assert(index < GEN_MAX_SURFACES); -} - -static void intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gen6_interface_descriptor_t *desc; @@ -1053,12 +1083,11 @@ intel_gpgpu_event_get_exec_timestamp(intel_event_t *event, } LOCAL void -intel_set_gpgpu_callbacks(void) +intel_set_gpgpu_callbacks(int device_id) { cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new; cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete; cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync; - cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image; cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf; cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack; cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init; @@ -1083,5 +1112,12 @@ intel_set_gpgpu_callbacks(void) cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp; cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf; cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf; + + if (IS_HASWELL(device_id)) + cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75; + else if (IS_IVYBRIDGE(device_id)) + cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7; + else + assert(0); } diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h index 9918b35..d593ac7 100644 --- a/src/intel/intel_gpgpu.h +++ b/src/intel/intel_gpgpu.h @@ -28,7 +28,7 @@ #include /* Set the gpgpu related call backs */ -extern void intel_set_gpgpu_callbacks(void); +extern void intel_set_gpgpu_callbacks(int device_id); #endif /* __INTEL_GPGPU_H__ */ diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index 36b5971..59a9810 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -234,7 +234,16 @@ typedef struct gen7_surface_state } ss5; uint32_t ss6; /* unused */ - uint32_t ss7; /* unused */ + + struct { + uint32_t min_lod:12; + uint32_t pad0:4; + uint32_t shader_a:3; + uint32_t shader_b:3; + uint32_t shader_g:3; + uint32_t shader_r:3; + uint32_t pad1:4; + } ss7; } gen7_surface_state_t; STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t)); -- 2.7.4