From 3cbc02e4693030d18a24602cf72e693b92e1a7a3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Aug 2018 11:01:24 -0500 Subject: [PATCH] intel: Use TXS for image_size when we have a typed surface Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_eu_defines.h | 2 ++ src/intel/compiler/brw_fs_generator.cpp | 23 +++++++++++--- src/intel/compiler/brw_fs_nir.cpp | 35 ++++++++++++++++++++++ .../compiler/brw_nir_lower_image_load_store.c | 15 ++++++++++ src/intel/compiler/brw_shader.cpp | 3 ++ 5 files changed, 74 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 883616d..5295788 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -354,6 +354,8 @@ enum opcode { SHADER_OPCODE_SAMPLEINFO, SHADER_OPCODE_SAMPLEINFO_LOGICAL, + SHADER_OPCODE_IMAGE_SIZE, + /** * Combines multiple sources of size 1 into a larger virtual GRF. * For example, parameters for a send-from-GRF message. Or, updating diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index d40ce2c..cb402cd 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -958,6 +958,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src } break; case SHADER_OPCODE_TXS: + case SHADER_OPCODE_IMAGE_SIZE: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TXD: @@ -1126,10 +1127,19 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src } } - uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 || - inst->opcode == SHADER_OPCODE_TG4_OFFSET) - ? prog_data->binding_table.gather_texture_start - : prog_data->binding_table.texture_start; + uint32_t base_binding_table_index; + switch (inst->opcode) { + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + base_binding_table_index = prog_data->binding_table.gather_texture_start; + break; + case SHADER_OPCODE_IMAGE_SIZE: + base_binding_table_index = prog_data->binding_table.image_start; + break; + default: + base_binding_table_index = prog_data->binding_table.texture_start; + break; + } if (surface_index.file == BRW_IMMEDIATE_VALUE && sampler_index.file == BRW_IMMEDIATE_VALUE) { @@ -2114,6 +2124,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_SAMPLEINFO: generate_tex(inst, dst, src[0], src[1], src[2]); break; + + case SHADER_OPCODE_IMAGE_SIZE: + generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0)); + break; + case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: generate_ddx(inst, dst, src[0]); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index aaba0e2..2fef050 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3918,6 +3918,41 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_image_size: { + /* Unlike the [un]typed load and store opcodes, the TXS that this turns + * into will handle the binding table index for us in the geneerator. + */ + fs_reg image = retype(get_nir_src_imm(instr->src[0]), + BRW_REGISTER_TYPE_UD); + image = bld.emit_uniformize(image); + + /* Since the image size is always uniform, we can just emit a SIMD8 + * query instruction and splat the result out. + */ + const fs_builder ubld = bld.exec_all().group(8, 0); + + /* The LOD also serves as the message payload */ + fs_reg lod = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.MOV(lod, brw_imm_ud(0)); + + fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 4); + fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE, tmp, lod, image); + inst->mlen = 1; + inst->size_written = 4 * REG_SIZE; + + for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) { + if (c == 2 && nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_CUBE) { + bld.emit(SHADER_OPCODE_INT_QUOTIENT, + offset(retype(dest, tmp.type), bld, c), + component(offset(tmp, ubld, c), 0), brw_imm_ud(6)); + } else { + bld.MOV(offset(retype(dest, tmp.type), bld, c), + component(offset(tmp, ubld, c), 0)); + } + } + break; + } + case nir_intrinsic_image_load_raw_intel: { const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg addr = retype(get_nir_src(instr->src[1]), diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c b/src/intel/compiler/brw_nir_lower_image_load_store.c index 5eba9dd..e8e00e1 100644 --- a/src/intel/compiler/brw_nir_lower_image_load_store.c +++ b/src/intel/compiler/brw_nir_lower_image_load_store.c @@ -725,6 +725,21 @@ lower_image_size_instr(nir_builder *b, nir_intrinsic_instr *intrin) { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + /* For write-only images, we have an actual image surface so we fall back + * and let the back-end emit a TXS for this. + */ + if (var->data.image.access & ACCESS_NON_READABLE) + return false; + + /* If we have a matching typed format, then we have an actual image surface + * so we fall back and let the back-end emit a TXS for this. + */ + const enum isl_format image_fmt = + isl_format_for_gl_format(var->data.image.format); + if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) + return false; b->cursor = nir_instr_remove(&intrin->instr); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 27de7c4..798c799 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -267,6 +267,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return "sampleinfo_logical"; + case SHADER_OPCODE_IMAGE_SIZE: + return "image_size"; + case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; -- 2.7.4