From e7c9adca5726a8c96de20ae7c5f21a30061db392 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 12 Dec 2017 12:05:03 -0800 Subject: [PATCH] intel/eu: Plumb header present bit to codegen helpers for HDC messages. This makes sure that the header-present bit of the message descriptor is in sync with the IR instruction fields, which gives the optimizer more control to avoid the overhead of setting up a message header when it's possible to do so. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_eu.h | 18 ++++++++++++------ src/intel/compiler/brw_eu_emit.c | 30 ++++++++++++++++++------------ src/intel/compiler/brw_fs_generator.cpp | 20 ++++++++++++++------ src/intel/compiler/brw_vec4_generator.cpp | 11 ++++++----- 4 files changed, 50 insertions(+), 29 deletions(-) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 2d0f56f..a5f28d8 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -444,7 +444,8 @@ brw_untyped_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected); + bool response_expected, + bool header_present); void brw_untyped_surface_read(struct brw_codegen *p, @@ -459,7 +460,8 @@ brw_untyped_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels); + unsigned num_channels, + bool header_present); void brw_typed_atomic(struct brw_codegen *p, @@ -468,7 +470,8 @@ brw_typed_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected); + bool response_expected, + bool header_present); void brw_typed_surface_read(struct brw_codegen *p, @@ -476,14 +479,16 @@ brw_typed_surface_read(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels); + unsigned num_channels, + bool header_present); void brw_typed_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels); + unsigned num_channels, + bool header_present); void brw_byte_scattered_read(struct brw_codegen *p, @@ -498,7 +503,8 @@ brw_byte_scattered_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned bit_size); + unsigned bit_size, + bool header_present); void brw_memory_fence(struct brw_codegen *p, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 14b1c59..44abede 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2883,7 +2883,8 @@ brw_untyped_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected) + bool response_expected, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? @@ -2901,7 +2902,7 @@ brw_untyped_atomic(struct brw_codegen *p, p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, brw_surface_payload_size(p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, true), - align1); + header_present); brw_set_dp_untyped_atomic_message( p, insn, atomic_op, response_expected); @@ -2984,7 +2985,8 @@ brw_untyped_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels) + unsigned num_channels, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? @@ -2996,7 +2998,7 @@ brw_untyped_surface_write(struct brw_codegen *p, WRITEMASK_X : WRITEMASK_XYZW; struct brw_inst *insn = brw_send_indirect_surface_message( p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, msg_length, 0, align1); + payload, surface, msg_length, 0, header_present); brw_set_dp_untyped_surface_write_message( p, insn, num_channels); @@ -3054,7 +3056,8 @@ brw_byte_scattered_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned bit_size) + unsigned bit_size, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; assert(devinfo->gen > 7 || devinfo->is_haswell); @@ -3063,7 +3066,7 @@ brw_byte_scattered_write(struct brw_codegen *p, struct brw_inst *insn = brw_send_indirect_surface_message( p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW), - payload, surface, msg_length, 0, true); + payload, surface, msg_length, 0, header_present); unsigned msg_control = brw_byte_scattered_data_element_from_bit_size(bit_size) << 2; @@ -3119,7 +3122,8 @@ brw_typed_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected) { + bool response_expected, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : @@ -3131,7 +3135,7 @@ brw_typed_atomic(struct brw_codegen *p, p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, brw_surface_payload_size(p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, false), - true); + header_present); brw_set_dp_typed_atomic_message( p, insn, atomic_op, response_expected); @@ -3175,7 +3179,8 @@ brw_typed_surface_read(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels) + unsigned num_channels, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? @@ -3185,7 +3190,7 @@ brw_typed_surface_read(struct brw_codegen *p, p, sfid, dst, payload, surface, msg_length, brw_surface_payload_size(p, num_channels, devinfo->gen >= 8 || devinfo->is_haswell, false), - true); + header_present); brw_set_dp_typed_surface_read_message( p, insn, num_channels); @@ -3229,7 +3234,8 @@ brw_typed_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels) + unsigned num_channels, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? @@ -3241,7 +3247,7 @@ brw_typed_surface_write(struct brw_codegen *p, WRITEMASK_X : WRITEMASK_XYZW); struct brw_inst *insn = brw_send_indirect_surface_message( p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, msg_length, 0, true); + payload, surface, msg_length, 0, header_present); brw_set_dp_typed_surface_write_message( p, insn, num_channels); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 557b098..60944a9 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2118,10 +2118,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_UNTYPED_ATOMIC: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, - inst->mlen, !inst->dst.is_null()); + inst->mlen, !inst->dst.is_null(), + inst->header_size); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: + assert(!inst->header_size); assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen, src[2].ud); @@ -2130,10 +2132,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_write(p, src[0], src[1], - inst->mlen, src[2].ud); + inst->mlen, src[2].ud, + inst->header_size); break; case SHADER_OPCODE_BYTE_SCATTERED_READ: + assert(!inst->header_size); assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_byte_scattered_read(p, dst, src[0], src[1], inst->mlen, src[2].ud); @@ -2142,24 +2146,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_BYTE_SCATTERED_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_byte_scattered_write(p, src[0], src[1], - inst->mlen, src[2].ud); + inst->mlen, src[2].ud, + inst->header_size); break; case SHADER_OPCODE_TYPED_ATOMIC: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_atomic(p, dst, src[0], src[1], - src[2].ud, inst->mlen, !inst->dst.is_null()); + src[2].ud, inst->mlen, !inst->dst.is_null(), + inst->header_size); break; case SHADER_OPCODE_TYPED_SURFACE_READ: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud); + inst->mlen, src[2].ud, + inst->header_size); break; case SHADER_OPCODE_TYPED_SURFACE_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud); + brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud, + inst->header_size); break; case SHADER_OPCODE_MEMORY_FENCE: diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 6fa6e35..ecf9ed0 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1869,10 +1869,11 @@ generate_code(struct brw_codegen *p, case SHADER_OPCODE_UNTYPED_ATOMIC: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen, - !inst->dst.is_null()); + !inst->dst.is_null(), inst->header_size); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: + assert(!inst->header_size); assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen, src[2].ud); @@ -1881,25 +1882,25 @@ generate_code(struct brw_codegen *p, case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_write(p, src[0], src[1], inst->mlen, - src[2].ud); + src[2].ud, inst->header_size); break; case SHADER_OPCODE_TYPED_ATOMIC: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen, - !inst->dst.is_null()); + !inst->dst.is_null(), inst->header_size); break; case SHADER_OPCODE_TYPED_SURFACE_READ: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen, - src[2].ud); + src[2].ud, inst->header_size); break; case SHADER_OPCODE_TYPED_SURFACE_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_surface_write(p, src[0], src[1], inst->mlen, - src[2].ud); + src[2].ud, inst->header_size); break; case SHADER_OPCODE_MEMORY_FENCE: -- 2.7.4