From 795646d8f863ec2200fa8b92c036b0897f2bdd2b Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 9 Mar 2020 14:09:04 -0400 Subject: [PATCH] pan/bi: Generalize swizzles to avoid extracts We'd really rather not emit extracts. We are approaching on a vector IR anyway which is annoying but really necessary to handle I/O and fp16 correctly. So let's just go all the way and deal with swizzles and masks within reason; it'll still be somewhat saner in the long-term. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_print.c | 16 +++++++--------- src/panfrost/bifrost/bifrost_compile.c | 6 ++++++ src/panfrost/bifrost/compiler.h | 15 ++++++--------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c index cd266a9..58d0d75 100644 --- a/src/panfrost/bifrost/bi_print.c +++ b/src/panfrost/bifrost/bi_print.c @@ -118,7 +118,6 @@ bi_class_name(enum bi_class cl) case BI_CONVERT: return "convert"; case BI_CSEL: return "csel"; case BI_DISCARD: return "discard"; - case BI_EXTRACT: return "extract"; case BI_FMA: return "fma"; case BI_FREXP: return "frexp"; case BI_LOAD: return "load"; @@ -209,17 +208,16 @@ bi_print_alu_type(nir_alu_type t, FILE *fp) } static void -bi_print_swizzle(bi_instruction *ins, FILE *fp) +bi_print_swizzle(bi_instruction *ins, unsigned src, FILE *fp) { - unsigned size = nir_alu_type_get_type_size(ins->dest_type); - unsigned count = 32 / size; - assert(size == 8 || size == 16); + unsigned size = MAX2(nir_alu_type_get_type_size(ins->dest_type), 8); + unsigned count = (size == 64) ? 1 : (32 / size); fprintf(fp, "."); for (unsigned u = 0; u < count; ++u) { - assert(ins->swizzle[u] < size); - fputc("xyzw"[ins->swizzle[u]], fp); + assert(ins->swizzle[src][u] < 4); + fputc("xyzw"[ins->swizzle[src][u]], fp); } } @@ -318,8 +316,8 @@ bi_print_instruction(bi_instruction *ins, FILE *fp) bi_foreach_src(ins, s) { bi_print_src(fp, ins, s); - if (bi_is_src_swizzled(ins, s)) - bi_print_swizzle(ins, fp); + if (ins->src[s] && !(ins->src[s] & (BIR_INDEX_CONSTANT | BIR_INDEX_ZERO))) + bi_print_swizzle(ins, s, fp); bool is_convert = ins->type == BI_CONVERT && s == 0; bool is_branch = ins->type == BI_BRANCH && s < 2 && ins->branch.cond != BI_COND_ALWAYS; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 1c3a0f0..79164ac 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -106,6 +106,9 @@ bi_emit_frag_out(bi_context *ctx, nir_intrinsic_instr *instr) .blend_location = nir_intrinsic_base(instr), .src = { bir_src_index(&instr->src[0]) + }, + .swizzle = { + { 0, 1, 2, 3 } } }; @@ -158,6 +161,9 @@ bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr) .src = { address.dest, bir_src_index(&instr->src[0]) + }, + .swizzle = { + { 0, 1, 2, 3 } } }; diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 14e0bc8..d0ca32a 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -56,7 +56,6 @@ enum bi_class { BI_CONVERT, BI_CSEL, BI_DISCARD, - BI_EXTRACT, BI_FMA, BI_FREXP, BI_LOAD, @@ -191,8 +190,7 @@ typedef struct { unsigned dest; unsigned src[BIR_SRC_COUNT]; - /* If one of the sources has BIR_INDEX_CONSTANT... Also, for - * BI_EXTRACT, the component index is stored here. */ + /* If one of the sources has BIR_INDEX_CONSTANT */ union { uint64_t u64; uint32_t u32; @@ -218,12 +216,11 @@ typedef struct { /* Source types if required by the class */ nir_alu_type src_types[BIR_SRC_COUNT]; - /* If the source type is 8-bit or 16-bit such that SIMD is possible, and - * the class has BI_SWIZZLABLE, this is a swizzle for the input. Swizzles - * in practice only occur with one-source arguments (conversions, - * dedicated swizzle ops) and as component selection on two-sources - * where it is unambiguous which is which. Bounds are 32/type_size. */ - unsigned swizzle[4]; + /* If the source type is 8-bit or 16-bit such that SIMD is possible, + * and the class has BI_SWIZZLABLE, this is a swizzle in the usual + * sense. On non-SIMD instructions, it can be used for component + * selection, so we don't have to special case extraction. */ + uint8_t swizzle[BIR_SRC_COUNT][NIR_MAX_VEC_COMPONENTS]; /* A class-specific op from which the actual opcode can be derived * (along with the above information) */ -- 2.7.4