From 99fc6ec34cc1b023a837830d266fbbd523a509c3 Mon Sep 17 00:00:00 2001 From: sgokhale Date: Wed, 5 Apr 2023 11:11:36 +0530 Subject: [PATCH] [AArch64][SVE][CodeGen] Generate fused mul+add/sub ops with one of add/sub operands as splat Currently, depending upon whether the add/sub instruction can synthesize immediate directly, its decided whether to generate mul+(add/sub immediate) or mov+mla/mad/msb/mls ops. If the add/sub can synthesize immediate directly, then fused ops wont get generated. This patch tries to address this by having makeshift higher priority for the fused ops. Specifically, patch aims at transformation similar to below: add ( mul, splat_vector(C)) -> MOV C MAD Differential Revision: https://reviews.llvm.org/D142656 --- llvm/lib/Target/AArch64/SVEInstrFormats.td | 10 ++++++---- llvm/test/CodeGen/AArch64/sve-int-arith.ll | 29 +++++++++++++++-------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 736d5b4..8256136 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3198,10 +3198,12 @@ multiclass sve_int_3op_p_mladdsub { def _UNDEF_S : PredThreeOpPseudo; def _UNDEF_D : PredThreeOpPseudo; - def : SVE_4_Op_Pat(NAME # _UNDEF_B)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; + let AddedComplexity = 9 in { + def : SVE_4_Op_Pat(NAME # _UNDEF_B)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; + } } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll index 99fbba5..6dcad12 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -586,8 +586,8 @@ define @muladd_i16_positiveAddend( %a, %a, %b @@ -612,8 +612,8 @@ define @muladd_i8_positiveAddend( %a, %a, %b @@ -625,8 +625,8 @@ define @muladd_i8_negativeAddend( %a, %a, %b @@ -744,13 +744,14 @@ define @mulsub_i8_negativeAddend( %a, %2 } +; TOFIX: Should generate msb for mul+sub in this case. Shuffling operand of sub generates the required msb instruction. define @multiple_fused_ops( %a, %b) ; CHECK-LABEL: multiple_fused_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #200 // =0xc8 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: add z2.h, z2.h, #200 // =0xc8 +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mla z2.h, p0/m, z0.h, z1.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -771,15 +772,15 @@ define void @mad_in_loop(ptr %dst, ptr %src1, ptr %src2, i32 %n) { ; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: cntw x10 +; CHECK-NEXT: mov z0.s, #1 // =0x1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: whilelo p1.s, xzr, x9 ; CHECK-NEXT: .LBB70_2: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1, x8, lsl #2] -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x2, x8, lsl #2] -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: add z0.s, z0.s, #1 // =0x1 -; CHECK-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1, x8, lsl #2] +; CHECK-NEXT: ld1w { z2.s }, p1/z, [x2, x8, lsl #2] +; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s +; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] ; CHECK-NEXT: add x8, x8, x10 ; CHECK-NEXT: whilelo p1.s, x8, x9 ; CHECK-NEXT: b.mi .LBB70_2 -- 2.7.4