From f8ea98e4ec4078924f79b0aadd2754726b835f5c Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 19 Feb 2016 23:18:33 -0500 Subject: [PATCH] st/mesa: add GL_ARB_shader_atomic_counter_ops support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilia Mirkin Reviewed-by: Nicolai Hähnle --- docs/relnotes/11.3.0.html | 1 + src/mesa/state_tracker/st_extensions.c | 4 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 60 +++++++++++++++++++++++++++--- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index 99445c2..c31296e 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_internalformat_query2 on i965
  • +
  • GL_ARB_shader_atomic_counter_ops on nvc0
  • GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp
  • GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 42d347c..3666ece 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -407,8 +407,10 @@ void st_init_limits(struct pipe_screen *screen, c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS); - if (c->MaxCombinedAtomicBuffers > 0) + if (c->MaxCombinedAtomicBuffers > 0) { extensions->ARB_shader_atomic_counters = GL_TRUE; + extensions->ARB_shader_atomic_counter_ops = GL_TRUE; + } c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers; c->ShaderStorageBufferOffsetAlignment = diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 26e463e..92cd775 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3158,8 +3158,8 @@ void glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) { const char *callee = ir->callee->function_name(); - ir_dereference *deref = static_cast( - ir->actual_parameters.get_head()); + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *deref = static_cast(param); ir_variable *location = deref->variable_referenced(); st_src_reg buffer( @@ -3188,17 +3188,56 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) if (!strcmp("__intrinsic_atomic_read", callee)) { inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); - inst->buffer = buffer; } else if (!strcmp("__intrinsic_atomic_increment", callee)) { inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, st_src_reg_for_int(1)); - inst->buffer = buffer; } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, st_src_reg_for_int(-1)); - inst->buffer = buffer; emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else if (!strcmp("__intrinsic_atomic_sub", callee)) { + opcode = TGSI_OPCODE_ATOMUADD; + st_src_reg res = get_temp(glsl_type::uvec4_type); + st_dst_reg dstres = st_dst_reg(res); + dstres.writemask = dst.writemask; + emit_asm(ir, TGSI_OPCODE_INEG, dstres, data); + data = res; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, offset, data, data2); } + + inst->buffer = buffer; } void @@ -3591,7 +3630,16 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) /* Filter out intrinsics */ if (!strcmp("__intrinsic_atomic_read", callee) || !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee)) { + !strcmp("__intrinsic_atomic_predecrement", callee) || + !strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_sub", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { visit_atomic_counter_intrinsic(ir); return; } -- 2.7.4