From 161d3e81bef19ddd7870ee88d50479439f28e82b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 14 Feb 2017 22:15:16 -0800 Subject: [PATCH] nir: Combine the int and double [un]pack opcodes NIR is a typeless IR and the two opcodes, when considered bitwise, do exactly the same thing. There's no reason to have two versions. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/glsl_to_nir.cpp | 10 ++----- src/compiler/nir/nir_lower_alu_to_scalar.c | 3 +- src/compiler/nir/nir_lower_double_ops.c | 22 +++++++------- src/compiler/nir/nir_lower_double_packing.c | 46 ++++++++--------------------- src/compiler/nir/nir_opcodes.py | 21 ++++--------- src/compiler/nir/nir_opt_algebraic.py | 2 +- src/compiler/spirv/vtn_glsl450.c | 4 +-- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 31 ++++++------------- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 8 ++--- 9 files changed, 48 insertions(+), 99 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 96d8164..00f20da 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1581,18 +1581,14 @@ nir_visitor::visit(ir_expression *ir) result = nir_unpack_half_2x16(&b, srcs[0]); break; case ir_unop_pack_double_2x32: - result = nir_pack_double_2x32(&b, srcs[0]); - break; - case ir_unop_unpack_double_2x32: - result = nir_unpack_double_2x32(&b, srcs[0]); - break; case ir_unop_pack_int_2x32: case ir_unop_pack_uint_2x32: - result = nir_pack_int_2x32(&b, srcs[0]); + result = nir_pack_64_2x32(&b, srcs[0]); break; + case ir_unop_unpack_double_2x32: case ir_unop_unpack_int_2x32: case ir_unop_unpack_uint_2x32: - result = nir_unpack_int_2x32(&b, srcs[0]); + result = nir_unpack_64_2x32(&b, srcs[0]); break; case ir_unop_bitfield_reverse: result = nir_bitfield_reverse(&b, srcs[0]); diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 8a967c5..080d980 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -188,8 +188,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) return true; } - case nir_op_unpack_double_2x32: - case nir_op_unpack_int_2x32: + case nir_op_unpack_64_2x32: return false; LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index fdd0f44..ad96313 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -41,22 +41,22 @@ static nir_ssa_def * set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp) { /* Split into bits 0-31 and 32-63 */ - nir_ssa_def *lo = nir_unpack_double_2x32_split_x(b, src); - nir_ssa_def *hi = nir_unpack_double_2x32_split_y(b, src); + nir_ssa_def *lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); /* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent * to 1023 */ nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi); /* recombine */ - return nir_pack_double_2x32_split(b, lo, new_hi); + return nir_pack_64_2x32_split(b, lo, new_hi); } static nir_ssa_def * get_exponent(nir_builder *b, nir_ssa_def *src) { /* get bits 32-63 */ - nir_ssa_def *hi = nir_unpack_double_2x32_split_y(b, src); + nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src); /* extract bits 20-30 of the high word */ return nir_ubitfield_extract(b, hi, nir_imm_int(b, 20), nir_imm_int(b, 11)); @@ -67,7 +67,7 @@ get_exponent(nir_builder *b, nir_ssa_def *src) static nir_ssa_def * get_signed_inf(nir_builder *b, nir_ssa_def *zero) { - nir_ssa_def *zero_hi = nir_unpack_double_2x32_split_y(b, zero); + nir_ssa_def *zero_hi = nir_unpack_64_2x32_split_y(b, zero); /* The bit pattern for infinity is 0x7ff0000000000000, where the sign bit * is the highest bit. Only the sign bit can be non-zero in the passed in @@ -76,7 +76,7 @@ get_signed_inf(nir_builder *b, nir_ssa_def *zero) * bits and then pack it together with zero low 32 bits. */ nir_ssa_def *inf_hi = nir_ior(b, nir_imm_int(b, 0x7ff00000), zero_hi); - return nir_pack_double_2x32_split(b, nir_imm_int(b, 0), inf_hi); + return nir_pack_64_2x32_split(b, nir_imm_int(b, 0), inf_hi); } /* @@ -337,8 +337,8 @@ lower_trunc(nir_builder *b, nir_ssa_def *src) nir_imm_int(b, ~0), nir_isub(b, frac_bits, nir_imm_int(b, 32)))); - nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b, src); - nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b, src); + nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src); + nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src); return nir_bcsel(b, @@ -346,9 +346,9 @@ lower_trunc(nir_builder *b, nir_ssa_def *src) nir_imm_double(b, 0.0), nir_bcsel(b, nir_ige(b, unbiased_exp, nir_imm_int(b, 53)), src, - nir_pack_double_2x32_split(b, - nir_iand(b, mask_lo, src_lo), - nir_iand(b, mask_hi, src_hi)))); + nir_pack_64_2x32_split(b, + nir_iand(b, mask_lo, src_lo), + nir_iand(b, mask_hi, src_hi)))); } static nir_ssa_def * diff --git a/src/compiler/nir/nir_lower_double_packing.c b/src/compiler/nir/nir_lower_double_packing.c index 6bb01ff..61c4ea6 100644 --- a/src/compiler/nir/nir_lower_double_packing.c +++ b/src/compiler/nir/nir_lower_double_packing.c @@ -35,31 +35,17 @@ */ static nir_ssa_def * -lower_pack_double(nir_builder *b, nir_ssa_def *src) +lower_pack_64(nir_builder *b, nir_ssa_def *src) { - return nir_pack_double_2x32_split(b, nir_channel(b, src, 0), - nir_channel(b, src, 1)); + return nir_pack_64_2x32_split(b, nir_channel(b, src, 0), + nir_channel(b, src, 1)); } static nir_ssa_def * -lower_unpack_double(nir_builder *b, nir_ssa_def *src) +lower_unpack_64(nir_builder *b, nir_ssa_def *src) { - return nir_vec2(b, nir_unpack_double_2x32_split_x(b, src), - nir_unpack_double_2x32_split_y(b, src)); -} - -static nir_ssa_def * -lower_pack_int(nir_builder *b, nir_ssa_def *src) -{ - return nir_pack_int_2x32_split(b, nir_channel(b, src, 0), - nir_channel(b, src, 1)); -} - -static nir_ssa_def * -lower_unpack_int(nir_builder *b, nir_ssa_def *src) -{ - return nir_vec2(b, nir_unpack_int_2x32_split_x(b, src), - nir_unpack_int_2x32_split_y(b, src)); + return nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), + nir_unpack_64_2x32_split_y(b, src)); } static void @@ -75,10 +61,8 @@ lower_double_pack_impl(nir_function_impl *impl) nir_alu_instr *alu_instr = (nir_alu_instr *) instr; - if (alu_instr->op != nir_op_pack_double_2x32 && - alu_instr->op != nir_op_unpack_double_2x32 && - alu_instr->op != nir_op_pack_int_2x32 && - alu_instr->op != nir_op_unpack_int_2x32) + if (alu_instr->op != nir_op_pack_64_2x32 && + alu_instr->op != nir_op_unpack_64_2x32) continue; b.cursor = nir_before_instr(&alu_instr->instr); @@ -87,17 +71,11 @@ lower_double_pack_impl(nir_function_impl *impl) nir_ssa_def *dest; switch (alu_instr->op) { - case nir_op_pack_double_2x32: - dest = lower_pack_double(&b, src); - break; - case nir_op_unpack_double_2x32: - dest = lower_unpack_double(&b, src); - break; - case nir_op_pack_int_2x32: - dest = lower_pack_int(&b, src); + case nir_op_pack_64_2x32: + dest = lower_pack_64(&b, src); break; - case nir_op_unpack_int_2x32: - dest = lower_unpack_int(&b, src); + case nir_op_unpack_64_2x32: + dest = lower_unpack_64(&b, src); break; default: unreachable("Impossible opcode"); diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index ece673c..b116fcf 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -285,16 +285,10 @@ dst.x = (src0.x << 0) | (src0.w << 24); """) -unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32, +unop_horiz("pack_64_2x32", 1, tuint64, 2, tuint32, "dst.x = src0.x | ((uint64_t)src0.y << 32);") -unop_horiz("pack_int_2x32", 1, tint64, 2, tint32, - "dst.x = src0.x | ((int64_t)src0.y << 32);") - -unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64, - "dst.x = src0.x; dst.y = src0.x >> 32;") - -unop_horiz("unpack_int_2x32", 2, tint32, 1, tint64, +unop_horiz("unpack_64_2x32", 2, tuint32, 1, tuint64, "dst.x = src0.x; dst.y = src0.x >> 32;") # Lowered floating point unpacking operations. @@ -305,10 +299,8 @@ unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32, unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32, "unpack_half_1x16((uint16_t)(src0.x >> 16))") -unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, "src0") -unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, "src0 >> 32") -unop_convert("unpack_int_2x32_split_x", tuint32, tuint64, "src0") -unop_convert("unpack_int_2x32_split_y", tuint32, tuint64, "src0 >> 32") +unop_convert("unpack_64_2x32_split_x", tuint32, tuint64, "src0") +unop_convert("unpack_64_2x32_split_y", tuint32, tuint64, "src0 >> 32") # Bit operations, part of ARB_gpu_shader5. @@ -588,10 +580,7 @@ binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)") binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32, "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") -binop_convert("pack_double_2x32_split", tuint64, tuint32, "", - "src0 | ((uint64_t)src1 << 32)") - -binop_convert("pack_int_2x32_split", tuint64, tuint32, "", +binop_convert("pack_64_2x32_split", tuint64, tuint32, "", "src0 | ((uint64_t)src1 << 32)") # bfm implements the behavior of the first operation of the SM5 "bfi" assembly diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index aaad45a..8a6dd07 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -434,7 +434,7 @@ def fexp2i(exp, bits): if bits == 32: return ('ishl', ('iadd', exp, 127), 23) elif bits == 64: - return ('pack_double_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20)) + return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20)) else: assert False diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index dd38cc9..5d38431 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -452,13 +452,13 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode) case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16; case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16; case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16; - case GLSLstd450PackDouble2x32: return nir_op_pack_double_2x32; + case GLSLstd450PackDouble2x32: return nir_op_pack_64_2x32; case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8; case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; - case GLSLstd450UnpackDouble2x32: return nir_op_unpack_double_2x32; + case GLSLstd450UnpackDouble2x32: return nir_op_unpack_64_2x32; default: unreachable("No NIR equivalent"); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 94f2751..91c14eb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1212,7 +1212,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_pack_double_2x32_split: + case nir_op_pack_64_2x32_split: /* Optimize the common case where we are re-packing a double with * the result of a previous double unpack. In this case we can take the * 32-bit value to use in the re-pack from the original double and bypass @@ -1227,8 +1227,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) continue; const nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr); - if (alu_parent->op == nir_op_unpack_double_2x32_split_x || - alu_parent->op == nir_op_unpack_double_2x32_split_y) + if (alu_parent->op == nir_op_unpack_64_2x32_split_x || + alu_parent->op == nir_op_unpack_64_2x32_split_y) continue; if (!alu_parent->src[0].src.is_ssa) @@ -1237,7 +1237,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) op[i] = get_nir_src(alu_parent->src[0].src); op[i] = offset(retype(op[i], BRW_REGISTER_TYPE_DF), bld, alu_parent->src[0].swizzle[channel]); - if (alu_parent->op == nir_op_unpack_double_2x32_split_y) + if (alu_parent->op == nir_op_unpack_64_2x32_split_y) op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 1); else op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 0); @@ -1245,18 +1245,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) bld.emit(FS_OPCODE_PACK, result, op[0], op[1]); break; - case nir_op_unpack_double_2x32_split_x: - case nir_op_unpack_double_2x32_split_y: { + case nir_op_unpack_64_2x32_split_x: + case nir_op_unpack_64_2x32_split_y: { /* Optimize the common case where we are unpacking from a double we have * previously packed. In this case we can just bypass the pack operation * and source directly from its arguments. */ - unsigned index = (instr->op == nir_op_unpack_double_2x32_split_x) ? 0 : 1; + unsigned index = (instr->op == nir_op_unpack_64_2x32_split_x) ? 0 : 1; if (instr->src[0].src.is_ssa) { nir_instr *parent_instr = instr->src[0].src.ssa->parent_instr; if (parent_instr->type == nir_instr_type_alu) { nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr); - if (alu_parent->op == nir_op_pack_double_2x32_split && + if (alu_parent->op == nir_op_pack_64_2x32_split && alu_parent->src[index].src.is_ssa) { op[0] = retype(get_nir_src(alu_parent->src[index].src), BRW_REGISTER_TYPE_UD); @@ -1268,20 +1268,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) } } - if (instr->op == nir_op_unpack_double_2x32_split_x) - bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0)); - else - bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); - break; - } - - case nir_op_pack_int_2x32_split: - bld.emit(FS_OPCODE_PACK, result, op[0], op[1]); - break; - - case nir_op_unpack_int_2x32_split_x: - case nir_op_unpack_int_2x32_split_y: { - if (instr->op == nir_op_unpack_int_2x32_split_x) + if (instr->op == nir_op_unpack_64_2x32_split_x) bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0)); else bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 2127415..a7f048a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1762,7 +1762,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; } - case nir_op_pack_double_2x32_split: { + case nir_op_pack_64_2x32_split: { dst_reg result = dst_reg(this, glsl_type::dvec4_type); dst_reg tmp = dst_reg(this, glsl_type::uvec4_type); emit(MOV(tmp, retype(op[0], BRW_REGISTER_TYPE_UD))); @@ -1773,9 +1773,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; } - case nir_op_unpack_double_2x32_split_x: - case nir_op_unpack_double_2x32_split_y: { - enum opcode oper = (instr->op == nir_op_unpack_double_2x32_split_x) ? + case nir_op_unpack_64_2x32_split_x: + case nir_op_unpack_64_2x32_split_y: { + enum opcode oper = (instr->op == nir_op_unpack_64_2x32_split_x) ? VEC4_OPCODE_PICK_LOW_32BIT : VEC4_OPCODE_PICK_HIGH_32BIT; dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); emit(MOV(tmp, op[0])); -- 2.7.4