From a1a2a8dfda7b9cac7e36bf8853d984a6009fc27e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 23 Feb 2018 13:54:58 +0100 Subject: [PATCH] nir: add AMD_gcn_shader extended instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniel Schürmann Reviewed-by: Bas Nieuwenhuizen --- src/compiler/nir/nir_lower_alu_to_scalar.c | 2 ++ src/compiler/nir/nir_opcodes.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 080d980..a0377dc 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -96,6 +96,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) case nir_op_vec4: case nir_op_vec3: case nir_op_vec2: + case nir_op_cube_face_coord: + case nir_op_cube_face_index: /* We don't need to scalarize these ops, they're the ones generated to * group up outputs into a value that can be SSAed. */ diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 97da4db..65d1320 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -352,6 +352,34 @@ for i in xrange(1, 5): for j in xrange(1, 5): unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f") + +# AMD_gcn_shader extended instructions +unop_horiz("cube_face_coord", 2, tfloat32, 3, tfloat32, """ +dst.x = dst.y = 0.0; +float absX = fabs(src0.x); +float absY = fabs(src0.y); +float absZ = fabs(src0.z); +if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.y; dst.y = -src0.z; } +if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = -src0.y; dst.y = src0.z; } +if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.z; dst.y = src0.x; } +if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = -src0.z; dst.y = src0.x; } +if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.y; dst.y = src0.x; } +if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.y; dst.y = -src0.x; } +""") + +unop_horiz("cube_face_index", 1, tfloat32, 3, tfloat32, """ +float absX = fabs(src0.x); +float absY = fabs(src0.y); +float absZ = fabs(src0.z); +if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0; +if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1; +if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2; +if (src0.y < 0 && absY >= absX && absY >= absZ) dst.x = 3; +if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; +if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; +""") + + def binop_convert(name, out_type, in_type, alg_props, const_expr): opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr) -- 2.7.4