From: Igor Kirillov Date: Wed, 24 Nov 2021 17:23:24 +0000 (+0000) Subject: [AArch64][SVEIntrinsicOpts] Fix: predicated SVE mul/fmul are not commutative X-Git-Tag: upstream/15.0.7~24730 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=08d45e6f4da03836dc2380b7bae650ae80cfae35;p=platform%2Fupstream%2Fllvm.git [AArch64][SVEIntrinsicOpts] Fix: predicated SVE mul/fmul are not commutative We can not swap multiplicand and multiplier because the sve intrinsics are predicated. Imagine lanes in vectors having the following values: pg = 0 multiplicand = 1 (from dup) multiplier = 2 The resulting value should be 1, but if we swap multiplicand and multiplier it will become 2, which is incorrect. Differential Revision: https://reviews.llvm.org/D114577 --- diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ea21ad1..34015d2 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -833,17 +833,12 @@ static Optional instCombineSVEVectorMul(InstCombiner &IC, return match(SplatValue, m_FPOne()) || match(SplatValue, m_One()); }; - // The OpMultiplier variable should always point to the dup (if any), so - // swap if necessary. - if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand)) - std::swap(OpMultiplier, OpMultiplicand); - if (IsUnitSplat(OpMultiplier)) { - // [f]mul pg (dupx 1) %n => %n + // [f]mul pg %n, (dupx 1) => %n OpMultiplicand->takeName(&II); return IC.replaceInstUsesWith(II, OpMultiplicand); } else if (IsUnitDup(OpMultiplier)) { - // [f]mul pg (dup pg 1) %n => %n + // [f]mul pg %n, (dup pg 1) => %n auto *DupInst = cast(OpMultiplier); auto *DupPg = DupInst->getOperand(1); // TODO: this is naive. The optimization is still valid if DupPg diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll index 9bc48d87..77c76ea 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll @@ -32,7 +32,8 @@ define @idempotent_fmul_f64( %pg, @idempotent_fmul_different_argument_order( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_different_argument_order( -; CHECK-NEXT: ret [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[PG:%.*]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer), [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0) ; Different argument order to the above tests. diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll index e07fa92..e6acd36 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll @@ -32,7 +32,8 @@ define @idempotent_mul_i64( %pg, @idempotent_mul_different_argument_order( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_mul_different_argument_order( -; CHECK-NEXT: ret [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mul.nxv2i64( [[PG:%.*]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer), [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) ; Different argument order to the above tests.