From 347d2be7bef3a49b7dbe19ff1f964c1c3fb2999f Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 12 Dec 2022 14:25:32 +0900 Subject: [PATCH] [AArch64] Add Neon int instructions to isAssociativeAndCommutative Differential Revision: https://reviews.llvm.org/D139810 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 23 +++++++ llvm/test/CodeGen/AArch64/machine-combiner.ll | 69 +++++++++++++++++++ llvm/test/CodeGen/AArch64/reduce-shuffle.ll | 96 +++++++++++++-------------- 3 files changed, 140 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index a22a67a..11ab7d0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4997,6 +4997,29 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, case AArch64::EORXrr: case AArch64::EONWrr: case AArch64::EONXrr: + // -- Advanced SIMD instructions -- + // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL + // in the Advanced SIMD instruction set. + case AArch64::ADDv8i8: + case AArch64::ADDv16i8: + case AArch64::ADDv4i16: + case AArch64::ADDv8i16: + case AArch64::ADDv2i32: + case AArch64::ADDv4i32: + case AArch64::ADDv1i64: + case AArch64::ADDv2i64: + case AArch64::MULv8i8: + case AArch64::MULv16i8: + case AArch64::MULv4i16: + case AArch64::MULv8i16: + case AArch64::MULv2i32: + case AArch64::MULv4i32: + case AArch64::ANDv8i8: + case AArch64::ANDv16i8: + case AArch64::ORRv8i8: + case AArch64::ORRv16i8: + case AArch64::EORv8i8: + case AArch64::EORv16i8: return true; default: diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index f5fcdda..10d602a 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -532,6 +532,75 @@ define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, ret <2 x double> %t2 } +; Verify that vector integer arithmetic operations are reassociated. + +define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) { +; CHECK-LABEL: reassociate_muls_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mul v1.2s, v3.2s, v2.2s +; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ret + %t0 = mul <2 x i32> %x0, %x1 + %t1 = mul <2 x i32> %x2, %t0 + %t2 = mul <2 x i32> %x3, %t1 + ret <2 x i32> %t2 +} + +define <2 x i64> @reassociate_adds_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) { +; CHECK-LABEL: reassociate_adds_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: add v1.2d, v3.2d, v2.2d +; CHECK-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-NEXT: ret + %t0 = add <2 x i64> %x0, %x1 + %t1 = add <2 x i64> %x2, %t0 + %t2 = add <2 x i64> %x3, %t1 + ret <2 x i64> %t2 +} + +; Verify that vector bitwise operations are reassociated. + +define <16 x i8> @reassociate_ands_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) { +; CHECK-LABEL: reassociate_ands_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v1.16b, v2.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %t0 = or <16 x i8> %x0, %x1 + %t1 = and <16 x i8> %t0, %x2 + %t2 = and <16 x i8> %t1, %x3 + ret <16 x i8> %t2 +} + +define <4 x i16> @reassociate_ors_v4i16(<4 x i16> %x0, <4 x i16> %x1, <4 x i16> %x2, <4 x i16> %x3) { +; CHECK-LABEL: reassociate_ors_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v2.8b, v3.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %t0 = xor <4 x i16> %x0, %x1 + %t1 = or <4 x i16> %t0, %x2 + %t2 = or <4 x i16> %t1, %x3 + ret <4 x i16> %t2 +} + +define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) { +; CHECK-LABEL: reassociate_xors_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: eor v1.16b, v2.16b, v3.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %t0 = and <4 x i32> %x0, %x1 + %t1 = xor <4 x i32> %t0, %x2 + %t2 = xor <4 x i32> %t1, %x3 + ret <4 x i32> %t2 +} + ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 ; Verify that reassociation is not happening needlessly or wrongly. diff --git a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll index 797f372..b3d1388 100644 --- a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll @@ -41,48 +41,48 @@ define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur ; CHECK-NEXT: uzp2 v5.4s, v3.4s, v2.4s ; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #12 ; CHECK-NEXT: zip1 v17.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v7.16b, v3.16b +; CHECK-NEXT: zip2 v4.4s, v2.4s, v3.4s ; CHECK-NEXT: zip2 v6.4s, v1.4s, v0.4s ; CHECK-NEXT: zip2 v18.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v3.4s -; CHECK-NEXT: ext v19.16b, v1.16b, v17.16b, #8 -; CHECK-NEXT: mov v1.s[3], v0.s[2] -; CHECK-NEXT: zip2 v4.4s, v2.4s, v3.4s -; CHECK-NEXT: mov v7.16b, v3.16b -; CHECK-NEXT: ext v16.16b, v2.16b, v16.16b, #12 ; CHECK-NEXT: mov v7.s[0], v2.s[1] +; CHECK-NEXT: ext v16.16b, v2.16b, v16.16b, #12 +; CHECK-NEXT: ext v19.16b, v1.16b, v17.16b, #8 +; CHECK-NEXT: uzp2 v5.4s, v5.4s, v3.4s ; CHECK-NEXT: mov v2.s[1], v3.s[0] +; CHECK-NEXT: mov v1.s[3], v0.s[2] +; CHECK-NEXT: mov v7.d[1], v17.d[1] ; CHECK-NEXT: mov v5.d[1], v6.d[1] +; CHECK-NEXT: mov v2.d[1], v19.d[1] ; CHECK-NEXT: mov v18.d[1], v1.d[1] ; CHECK-NEXT: mov v16.d[1], v6.d[1] ; CHECK-NEXT: mov v4.d[1], v1.d[1] -; CHECK-NEXT: mov v7.d[1], v17.d[1] -; CHECK-NEXT: mov v2.d[1], v19.d[1] +; CHECK-NEXT: add v0.4s, v7.4s, v2.4s ; CHECK-NEXT: add v1.4s, v5.4s, v18.4s +; CHECK-NEXT: rev64 v5.4s, v0.4s ; CHECK-NEXT: sub v3.4s, v4.4s, v16.4s ; CHECK-NEXT: rev64 v4.4s, v1.4s -; CHECK-NEXT: add v0.4s, v7.4s, v2.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s -; CHECK-NEXT: rev64 v5.4s, v0.4s -; CHECK-NEXT: mov v4.d[1], v1.d[1] +; CHECK-NEXT: mov v5.d[1], v0.d[1] ; CHECK-NEXT: add v6.4s, v3.4s, v2.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s -; CHECK-NEXT: mov v5.d[1], v0.d[1] -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v4.d[1], v1.d[1] ; CHECK-NEXT: rev64 v7.4s, v2.4s ; CHECK-NEXT: rev64 v3.4s, v6.4s -; CHECK-NEXT: rev64 v4.4s, v0.4s ; CHECK-NEXT: add v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sub v7.4s, v2.4s, v7.4s ; CHECK-NEXT: addp v5.4s, v1.4s, v6.4s ; CHECK-NEXT: addp v2.4s, v0.4s, v2.4s ; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s +; CHECK-NEXT: rev64 v4.4s, v0.4s ; CHECK-NEXT: rev64 v6.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: zip1 v16.4s, v5.4s, v5.4s ; CHECK-NEXT: ext v17.16b, v2.16b, v7.16b, #4 ; CHECK-NEXT: ext v18.16b, v5.16b, v3.16b, #4 -; CHECK-NEXT: ext v4.16b, v0.16b, v2.16b, #8 +; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s +; CHECK-NEXT: ext v4.16b, v0.16b, v2.16b, #8 ; CHECK-NEXT: ext v6.16b, v1.16b, v5.16b, #4 ; CHECK-NEXT: trn2 v1.4s, v16.4s, v1.4s ; CHECK-NEXT: zip2 v16.4s, v17.4s, v2.4s @@ -91,41 +91,41 @@ define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur ; CHECK-NEXT: ext v6.16b, v6.16b, v6.16b, #4 ; CHECK-NEXT: ext v16.16b, v7.16b, v16.16b, #12 ; CHECK-NEXT: ext v17.16b, v3.16b, v17.16b, #12 -; CHECK-NEXT: mov v0.s[2], v2.s[1] -; CHECK-NEXT: uzp2 v4.4s, v4.4s, v18.4s ; CHECK-NEXT: mov v3.s[2], v5.s[3] ; CHECK-NEXT: mov v7.s[2], v2.s[3] -; CHECK-NEXT: sub v18.4s, v1.4s, v6.4s -; CHECK-NEXT: mov v6.s[0], v5.s[1] -; CHECK-NEXT: sub v19.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v0.s[2], v2.s[1] +; CHECK-NEXT: uzp2 v4.4s, v4.4s, v18.4s ; CHECK-NEXT: sub v20.4s, v3.4s, v17.4s ; CHECK-NEXT: sub v21.4s, v7.4s, v16.4s -; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: mov v3.s[1], v5.s[2] ; CHECK-NEXT: mov v7.s[1], v2.s[2] -; CHECK-NEXT: add v1.4s, v1.4s, v6.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: sub v18.4s, v1.4s, v6.4s +; CHECK-NEXT: mov v6.s[0], v5.s[1] +; CHECK-NEXT: sub v19.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: add v2.4s, v3.4s, v17.4s ; CHECK-NEXT: add v3.4s, v7.4s, v16.4s -; CHECK-NEXT: mov v1.d[1], v18.d[1] -; CHECK-NEXT: mov v0.d[1], v19.d[1] +; CHECK-NEXT: add v1.4s, v1.4s, v6.4s ; CHECK-NEXT: mov v3.d[1], v21.d[1] ; CHECK-NEXT: mov v2.d[1], v20.d[1] -; CHECK-NEXT: cmlt v4.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 +; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v1.d[1], v18.d[1] +; CHECK-NEXT: mov v0.d[1], v19.d[1] ; CHECK-NEXT: cmlt v6.8h, v3.8h, #0 ; CHECK-NEXT: cmlt v7.8h, v2.8h, #0 +; CHECK-NEXT: cmlt v4.8h, v1.8h, #0 ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s ; CHECK-NEXT: add v2.4s, v7.4s, v2.4s +; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 ; CHECK-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b ; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b ; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b ; CHECK-NEXT: add v2.4s, v2.4s, v3.4s +; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: lsr w9, w8, #16 @@ -321,25 +321,25 @@ define i32 @v2(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur ; CHECK-NEXT: ext v0.16b, v4.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v16.16b, v3.16b, #8 ; CHECK-NEXT: add v1.4s, v5.4s, v1.4s -; CHECK-NEXT: sub v5.4s, v6.4s, v17.4s +; CHECK-NEXT: sub v2.4s, v7.4s, v2.4s ; CHECK-NEXT: ext v0.16b, v0.16b, v4.16b, #4 ; CHECK-NEXT: ext v3.16b, v3.16b, v16.16b, #4 -; CHECK-NEXT: cmlt v6.8h, v5.8h, #0 -; CHECK-NEXT: sub v2.4s, v7.4s, v2.4s -; CHECK-NEXT: add v4.4s, v6.4s, v5.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: sub v5.4s, v6.4s, v17.4s ; CHECK-NEXT: cmlt v7.8h, v2.8h, #0 ; CHECK-NEXT: cmlt v17.8h, v1.8h, #0 -; CHECK-NEXT: eor v3.16b, v4.16b, v6.16b -; CHECK-NEXT: cmlt v4.8h, v0.8h, #0 +; CHECK-NEXT: cmlt v6.8h, v5.8h, #0 ; CHECK-NEXT: add v1.4s, v17.4s, v1.4s ; CHECK-NEXT: add v2.4s, v7.4s, v2.4s -; CHECK-NEXT: add v0.4s, v4.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v4.4s, v6.4s, v5.4s ; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b ; CHECK-NEXT: eor v1.16b, v1.16b, v17.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v4.16b +; CHECK-NEXT: cmlt v3.8h, v0.8h, #0 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-NEXT: eor v2.16b, v4.16b, v6.16b +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 @@ -545,17 +545,17 @@ define i32 @v3(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocaptur ; CHECK-NEXT: cmlt v6.8h, v3.8h, #0 ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: cmlt v2.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v7.8h, v0.8h, #0 -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: cmlt v7.8h, v0.8h, #0 ; CHECK-NEXT: add v4.4s, v5.4s, v4.4s -; CHECK-NEXT: add v0.4s, v7.4s, v0.4s -; CHECK-NEXT: eor v4.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b ; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b ; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: add v0.4s, v7.4s, v0.4s +; CHECK-NEXT: eor v2.16b, v4.16b, v5.16b +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 -- 2.7.4