From b25c42d8aecad16a4c2d94c6ba76d14ec55aac73 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 22 Jul 2022 14:32:41 -0400 Subject: [PATCH] pan/va: Split out compare instructions The different combine modes form different instructions from each other and in particular from the two-source version on Bifrost. Model them as such so we can represent the relevant Valhall-specific lowering/optimizations accurately in the compiler. This requires updating the unit tests to use the new names since there's not much point keeping around the aliases. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/ISA.xml | 114 +++++++++++++++------ .../bifrost/valhall/test/assembler-cases.txt | 24 ++++- .../bifrost/valhall/test/test-lower-constants.cpp | 96 ++++++++--------- src/panfrost/bifrost/valhall/test/test-packing.cpp | 26 ++--- src/panfrost/bifrost/valhall/va_lower_isel.c | 43 +++++++- src/panfrost/bifrost/valhall/valhall.py | 2 +- 6 files changed, 200 insertions(+), 105 deletions(-) diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml index 8b0e2b8..13fba76 100644 --- a/src/panfrost/bifrost/valhall/ISA.xml +++ b/src/panfrost/bifrost/valhall/ISA.xml @@ -2161,54 +2161,109 @@ - + - Evaluates the given condition, do a logical and/or with the condition in + Evaluates the given condition, do a logical or with the condition in the result source, and return in the given result type (integer one, integer minus one, or floating-point one). The third source is useful for chaining together conditions without intermediate bitwise arithmetic; - when this is not desired, tie it to zero and use the OR combine mode (do - not set the `.and` modifier). + when this is not desired, tie it to zero. + + + + + + + A + B + C + - The sequence modifier `.seq` is used to construct 64-bit compares in 2 - `ICMP.u32` instructions, in conjunction with the `u1` result type on the - low half, the `m1` result type on the high half, and the result of the low - half comparison passed as the third source. For comparisons other than - 64-bit, do not set the `.seq` modifier and do not use the `u1` result - type. + + + Evaluates the given condition, do a logical and with the condition in + the result source, and return in the given result type (integer + one, integer minus one, or floating-point one). The third source is useful + for chaining together conditions without intermediate bitwise arithmetic. - - - + + + - - A B C - + - Evaluates the given condition, do a logical and/or with the condition in + Evaluates the given condition, do a logical or with the condition in the result source, and return in the given result type (integer one, integer minus one, or floating-point one). The third source is useful for chaining together conditions without intermediate bitwise arithmetic; - when this is not desired, tie it to zero and use the OR combine mode (do - not set the `.and` modifier). + when this is not desired, tie it to zero. + + + + + + A + B + C + + + + + Evaluates the given condition, do a logical and/or with the condition in + the result source, and return in the given result type (integer + one, integer minus one, or floating-point one). The third source is useful + for chaining together conditions without intermediate bitwise arithmetic. - - + + - A B C - + + + Evaluates the given condition, do a logical or with the condition in + the result source, and return in the given result type (integer + one, integer minus one, or floating-point one). The third source is useful + for chaining together conditions without intermediate bitwise arithmetic. + + + + + + + A + B + C + + + + + Evaluates the given condition, do a logical and with the condition in + the result source, and return in the given result type (integer + one, integer minus one, or floating-point one). The third source is useful + for chaining together conditions without intermediate bitwise arithmetic. + + + + + + + A + B + C + + + Evaluates the given condition, do a logical and/or with the condition in the result source, and return in the given result type (integer @@ -2217,20 +2272,15 @@ when this is not desired, tie it to zero and use the OR combine mode (do not set the `.and` modifier). - The sequence modifier `.seq` is used to construct signed 64-bit compares + Used to construct signed 64-bit compares in 1 `ICMP.u32` and 1 `ICMP.s32` instruction, in conjunction with the `u1` result type on the low half, the `m1` result type on the high half, and - the result of the low half comparison passed as the third source. For - comparisons other than 64-bit, do not set the `.seq` modifier and do not - use the `u1` result type. + the result of the low half comparison passed as the third source. - - - + + - - A B C diff --git a/src/panfrost/bifrost/valhall/test/assembler-cases.txt b/src/panfrost/bifrost/valhall/test/assembler-cases.txt index 59a4d7c..972e3e5 100644 --- a/src/panfrost/bifrost/valhall/test/assembler-cases.txt +++ b/src/panfrost/bifrost/valhall/test/assembler-cases.txt @@ -30,7 +30,7 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0 80 7c 47 20 00 c0 a3 01 SHADDX.u64 r0, u0, ^r60.w0, shift:0x4 40 00 00 38 08 44 61 78 STORE.i128.slot0.end @r4:r5:r6:r7, ^r0, offset:0 00 00 00 00 00 c0 00 78 NOP.end -40 c4 c0 9c 01 c1 f0 00 ICMP.u32.gt.m1 r1, ^r0, 0x1000000.b3, 0x0 +40 c4 c0 9c 01 c1 f0 00 ICMP_OR.u32.gt.m1 r1, ^r0, 0x1000000.b3, 0x0 42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, ^r2, offset:0 00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0 00 00 00 30 00 c7 90 00 S8_TO_S32 r7, r0.b3 @@ -87,10 +87,10 @@ f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.end @r0:r1, blend_descriptor_0.w0, 42 14 13 12 ad c2 12 01 IADD_IMM.v4i8 r2, ^r2, #0xAD121314 42 14 00 13 00 c2 11 01 IADD_IMM.v2i16 r2, ^r2, #0x130014 42 ab 4b 00 00 c2 10 01 IADD_IMM.i32 r2, ^r2, #0x4BAB -43 42 c0 84 11 c2 f9 00 ICMP.v2s16.gt.m1 r2, ^r3.h10, ^r2.h10, 0x0 -43 42 c0 90 01 c2 f5 00 FCMP.v2f16.gt.m1 r2, ^r3.h10, ^r2.h00, 0x0 +43 42 c0 84 11 c2 f9 00 ICMP_OR.v2s16.gt.m1 r2, ^r3.h10, ^r2.h10, 0x0 +43 42 c0 90 01 c2 f5 00 FCMP_OR.v2f16.gt.m1 r2, ^r3.h10, ^r2.h00, 0x0 42 00 07 00 20 c2 90 00 V2S16_TO_V2F16 r2, ^r2 -00 c0 c0 00 43 c1 f2 00 ICMP.v4u8.ne.i1 r1, r0.b0000, 0x0, 0x0 +00 c0 c0 00 43 c1 f2 00 ICMP_OR.v4u8.ne.i1 r1, r0.b0000, 0x0, 0x0 41 03 00 00 00 c0 1f 50 BRANCHZ.reconverge ^r1, offset:3 00 03 00 00 20 c0 1f 50 BRANCHZ.reconverge r0.h0, offset:3 00 03 00 00 40 c0 1f 50 BRANCHZ.reconverge r0.h1, offset:3 @@ -99,7 +99,7 @@ c0 00 00 00 00 c0 10 01 IADD_IMM.i32 r0, 0x0, #0x0 c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1 80 00 27 20 00 c2 a3 01 SHADDX.u64 r2, u0, r0.w0, shift:0x2 40 c9 00 10 00 c0 a0 00 IADD.u32 r0, ^r0, 0x7060504.b0 -00 82 c0 80 03 c1 f0 00 ICMP.u32.ne.m1 r1, r0, u2, 0x0 +00 82 c0 80 03 c1 f0 00 ICMP_OR.u32.ne.m1 r1, r0, u2, 0x0 04 00 00 00 00 c5 91 00 MOV.i32 r5, r4 04 00 00 00 00 c6 91 00 MOV.i32 r6, r4 04 00 00 00 00 c7 91 08 MOV.i32.wait0 r7, r4 @@ -223,3 +223,17 @@ c0 f1 00 00 10 c1 2f 08 BRANCHZI.eq.absolute.wait0 0x0, blend_descriptor_0.w1 82 00 80 15 b4 80 38 49 VAR_TEX_SINGLE.slot0.skip.sample_store.f.32.2d.zero.wait @r0:r1:r2:r3, u2, u0 82 20 80 15 b4 80 38 09 VAR_TEX_SINGLE.slot0.skip.sample_store.f.32.2d.computed.wait0 @r0:r1:r2:r3, u2, u0 82 20 80 1d 84 80 38 41 VAR_TEX_SINGLE.slot0.skip.sample_store.s.32.2d.computed.wait0126 @r0, u2, u0 +40 c0 c0 80 03 c0 f0 10 ICMP_OR.u32.ne.m1.wait1 r0, ^r0, 0x0, 0x0 +42 43 40 01 01 c0 f8 00 ICMP_AND.s32.gt.i1 r0, ^r2, ^r3, ^r0 +42 c0 c0 c2 03 c0 f0 10 ICMP_MULTI.u32.ne.u1.wait1 r0, ^r2, 0x0, 0x0 +44 46 c0 c2 01 c2 f0 00 ICMP_MULTI.u32.gt.u1 r2, ^r4, ^r6, 0x0 +45 47 42 82 01 c2 f0 00 ICMP_MULTI.u32.gt.m1 r2, ^r5, ^r7, ^r2 +43 c0 40 82 03 c0 f0 00 ICMP_MULTI.u32.ne.m1 r0, ^r3, 0x0, ^r0 +40 42 c0 c2 01 c0 f0 00 ICMP_MULTI.u32.gt.u1 r0, ^r0, ^r2, 0x0 +41 43 40 82 01 c4 f8 00 ICMP_MULTI.s32.gt.m1 r4, ^r1, ^r3, ^r0 +40 c0 c0 a8 03 c0 f5 10 FCMP_OR.v2f16.ne.m1.wait1 r0, ^r0, 0x0, 0x0 +41 41 40 ad 01 c0 f5 00 FCMP_AND.v2f16.gt.m1 r0, ^r1, ^r1.h11, ^r0 +40 c0 c0 a8 03 c0 f5 10 FCMP_OR.v2f16.ne.m1.wait1 r0, ^r0, 0x0, 0x0 +41 41 40 ad 01 c0 f5 00 FCMP_AND.v2f16.gt.m1 r0, ^r1, ^r1.h11, ^r0 +c4 c0 40 10 71 c0 b6 00 LSHIFT_AND.v4i8 r0, 0x1000000.b3333, 0x0.b00, ^r0 +40 00 13 00 80 c0 90 00 V2U8_TO_V2F16 r0, ^r0.b02 diff --git a/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp b/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp index 8438ebd..2d98a8f 100644 --- a/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp +++ b/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp @@ -114,70 +114,70 @@ TEST_F(LowerConstants, Int8InInt32) TEST_F(LowerConstants, ZeroExtendForUnsigned) { - CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFF), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_byte(va_lut(1), 0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); - - CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFFFF), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_half(va_lut(1), 0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF), + bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + + CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); } TEST_F(LowerConstants, SignExtendPositiveForSigned) { - CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0x7F), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_byte(va_lut(2), 3), BI_CMPF_LT, BI_RESULT_TYPE_I1)); - - CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0x7FFF), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_half(va_lut(2), 1), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0x7F), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + + CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0x7FFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); } TEST_F(LowerConstants, SignExtendNegativeForSigned) { - CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFFFFFFF8), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_byte(va_lut(23), 0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); - - CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFFFFFAFC), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_half(va_lut(3), 1), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + + CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); } TEST_F(LowerConstants, DontZeroExtendForSigned) { - CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFF), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_iadd_imm_i32(b, va_lut(0), 0xFF), BI_CMPF_LT, BI_RESULT_TYPE_I1)); - - CASE(bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFFFF), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_s32_to(b, bi_register(0), bi_register(0), - bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), - BI_CMPF_LT, BI_RESULT_TYPE_I1)); + CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1)); + + CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), + bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0), + BI_CMPF_LT, BI_RESULT_TYPE_I1)); } TEST_F(LowerConstants, DontZeroExtendNegative) { - CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFFFFFFF8), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), - BI_CMPF_LT, BI_RESULT_TYPE_I1)); - - CASE(bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_imm_u32(0xFFFFFAFC), BI_CMPF_LT, BI_RESULT_TYPE_I1), - bi_icmp_u32_to(b, bi_register(0), bi_register(0), - bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), - BI_CMPF_LT, BI_RESULT_TYPE_I1)); + CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), bi_register(0), + BI_CMPF_LT, BI_RESULT_TYPE_I1)); + + CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1), + bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), + bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), bi_register(0), + BI_CMPF_LT, BI_RESULT_TYPE_I1)); } TEST_F(LowerConstants, HandleTrickyNegativesFP16) diff --git a/src/panfrost/bifrost/valhall/test/test-packing.cpp b/src/panfrost/bifrost/valhall/test/test-packing.cpp index 7d44bac..932be2a 100644 --- a/src/panfrost/bifrost/valhall/test/test-packing.cpp +++ b/src/panfrost/bifrost/valhall/test/test-packing.cpp @@ -147,19 +147,17 @@ TEST_F(ValhallPacking, FaddImm) { } TEST_F(ValhallPacking, Comparions) { - bi_instr *I = - bi_icmp_v2s16_to(b, bi_register(2), + CASE(bi_icmp_or_v2s16_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(3), true, false)), bi_discard(bi_swz_16(bi_register(2), true, false)), - BI_CMPF_GT, - BI_RESULT_TYPE_M1); - I->src[2] = zero; // TODO: model in the IR + zero, BI_CMPF_GT, BI_RESULT_TYPE_M1), + 0x00f9c21184c04243); - CASE(I, 0x00f9c21184c04243); - - I->op = BI_OPCODE_FCMP_V2F16; - I->src[1] = bi_discard(bi_swz_16(bi_register(2), false, false)); - CASE(I, 0x00f5c20190c04243); + CASE(bi_fcmp_or_v2f16_to(b, bi_register(2), + bi_discard(bi_swz_16(bi_register(3), true, false)), + bi_discard(bi_swz_16(bi_register(2), false, false)), + zero, BI_CMPF_GT, BI_RESULT_TYPE_M1), + 0x00f5c20190c04243); } TEST_F(ValhallPacking, Conversions) { @@ -307,11 +305,9 @@ TEST_F(ValhallPacking, Convert16To32) { } TEST_F(ValhallPacking, Swizzle8) { - bi_instr *I = bi_icmp_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), - zero, BI_CMPF_NE, BI_RESULT_TYPE_I1); - I->src[2] = zero; // TODO: model in the IR - - CASE(I, 0x00f2c14300c0c000); + CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), + zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1), + 0x00f2c14300c0c000); } TEST_F(ValhallPacking, FauPage1) { diff --git a/src/panfrost/bifrost/valhall/va_lower_isel.c b/src/panfrost/bifrost/valhall/va_lower_isel.c index a43ddc5..3ea18a9 100644 --- a/src/panfrost/bifrost/valhall/va_lower_isel.c +++ b/src/panfrost/bifrost/valhall/va_lower_isel.c @@ -41,30 +41,65 @@ va_lower_isel(bi_instr *I) I->src[1] = bi_zero(); break; - /* Extra source in Valhall not yet modeled in the Bifrost IR */ case BI_OPCODE_ICMP_I32: - I->op = BI_OPCODE_ICMP_U32; + I->op = BI_OPCODE_ICMP_OR_U32; I->src[2] = bi_zero(); break; case BI_OPCODE_ICMP_V2I16: - I->op = BI_OPCODE_ICMP_V2U16; + I->op = BI_OPCODE_ICMP_OR_V2U16; I->src[2] = bi_zero(); break; case BI_OPCODE_ICMP_V4I8: - I->op = BI_OPCODE_ICMP_V4U8; + I->op = BI_OPCODE_ICMP_OR_V4U8; I->src[2] = bi_zero(); break; case BI_OPCODE_ICMP_U32: + I->op = BI_OPCODE_ICMP_OR_U32; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_ICMP_V2U16: + I->op = BI_OPCODE_ICMP_OR_V2U16; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_ICMP_V4U8: + I->op = BI_OPCODE_ICMP_OR_V4U8; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_ICMP_S32: + I->op = BI_OPCODE_ICMP_OR_S32; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_ICMP_V2S16: + I->op = BI_OPCODE_ICMP_OR_V2S16; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_ICMP_V4S8: + I->op = BI_OPCODE_ICMP_OR_V4S8; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_FCMP_F32: + I->op = BI_OPCODE_FCMP_OR_F32; + I->src[2] = bi_zero(); + I->nr_srcs = 3; + break; + case BI_OPCODE_FCMP_V2F16: + I->op = BI_OPCODE_FCMP_OR_V2F16; I->src[2] = bi_zero(); break; diff --git a/src/panfrost/bifrost/valhall/valhall.py b/src/panfrost/bifrost/valhall/valhall.py index 5e3497f..44ca20e 100644 --- a/src/panfrost/bifrost/valhall/valhall.py +++ b/src/panfrost/bifrost/valhall/valhall.py @@ -200,7 +200,7 @@ class Instruction: self.secondary_mask = 0xF if opcode2 is not None else 0x0 if "left" in [x.name for x in self.modifiers]: self.secondary_mask |= 0x100 - if len(srcs) == 3 and (srcs[1].widen or srcs[1].lanes): + if len(srcs) == 3 and (srcs[1].widen or srcs[1].lanes or srcs[1].swizzle): self.secondary_mask &= ~0xC # conflicts if opcode == 0x90: # XXX: XMLify this, but disambiguates sign of conversions -- 2.7.4