From 99b007806420f8fc5cc9db7f4b71ba511fedb686 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 30 May 2022 10:47:44 +0100 Subject: [PATCH] [AArch64] Tests for showing MachineCombiner COPY patterns. NFC --- llvm/test/CodeGen/AArch64/machine-combiner-copy.ll | 111 +++++++++++++++++++++ .../CodeGen/AArch64/machine-combiner-fmul-dup.mir | 72 +++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/machine-combiner-copy.ll diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll new file mode 100644 index 0000000..2776743 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -mattr=+fullfp16 -O3 | FileCheck %s + +define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef %B, ptr noalias nocapture noundef %C, i32 noundef %n) { +; CHECK-LABEL: fma_dup_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-NEXT: cbz w2, .LBB0_8 +; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: mov w8, w2 +; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: b.hi .LBB0_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_3: // %vector.ph +; CHECK-NEXT: and x9, x8, #0xfffffff0 +; CHECK-NEXT: add x10, x1, #16 +; CHECK-NEXT: add x11, x0, #16 +; CHECK-NEXT: mov x12, x9 +; CHECK-NEXT: dup v1.8h, v0.h[0] +; CHECK-NEXT: .LBB0_4: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp q2, q3, [x11, #-16] +; CHECK-NEXT: subs x12, x12, #16 +; CHECK-NEXT: add x11, x11, #32 +; CHECK-NEXT: ldp q4, q5, [x10, #-16] +; CHECK-NEXT: fmla v4.8h, v2.8h, v1.8h +; CHECK-NEXT: fmla v5.8h, v3.8h, v0.h[0] +; CHECK-NEXT: stp q4, q5, [x10, #-16] +; CHECK-NEXT: add x10, x10, #32 +; CHECK-NEXT: b.ne .LBB0_4 +; CHECK-NEXT: // %bb.5: // %middle.block +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.eq .LBB0_8 +; CHECK-NEXT: .LBB0_6: // %for.body.preheader1 +; CHECK-NEXT: lsl x10, x9, #1 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: add x9, x1, x10 +; CHECK-NEXT: add x10, x0, x10 +; CHECK-NEXT: .LBB0_7: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr h1, [x10], #2 +; CHECK-NEXT: ldr h2, [x9] +; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: fmadd h1, h1, h0, h2 +; CHECK-NEXT: str h1, [x9], #2 +; CHECK-NEXT: b.ne .LBB0_7 +; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %cmp6.not = icmp eq i32 %n, 0 + br i1 %cmp6.not, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + %min.iters.check = icmp ult i32 %n, 16 + br i1 %min.iters.check, label %for.body.preheader14, label %vector.ph + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i64 %wide.trip.count, 4294967280 + %broadcast.splatinsert = insertelement <8 x half> poison, half %B, i64 0 + %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> poison, <8 x i32> zeroinitializer + %broadcast.splatinsert10 = insertelement <8 x half> poison, half %B, i64 0 + %broadcast.splat11 = shufflevector <8 x half> %broadcast.splatinsert10, <8 x half> poison, <8 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds half, ptr %A, i64 %index + %wide.load = load <8 x half>, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %0, i64 8 + %wide.load9 = load <8 x half>, ptr %1, align 2 + %2 = fmul fast <8 x half> %wide.load, %broadcast.splat + %3 = fmul fast <8 x half> %wide.load9, %broadcast.splat11 + %4 = getelementptr inbounds half, ptr %C, i64 %index + %wide.load12 = load <8 x half>, ptr %4, align 2 + %5 = getelementptr inbounds half, ptr %4, i64 8 + %wide.load13 = load <8 x half>, ptr %5, align 2 + %6 = fadd fast <8 x half> %wide.load12, %2 + %7 = fadd fast <8 x half> %wide.load13, %3 + store <8 x half> %6, ptr %4, align 2 + store <8 x half> %7, ptr %5, align 2 + %index.next = add nuw i64 %index, 16 + %8 = icmp eq i64 %index.next, %n.vec + br i1 %8, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader14 + +for.body.preheader14: ; preds = %for.body.preheader, %middle.block + %indvars.iv.ph = phi i64 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void + +for.body: ; preds = %for.body.preheader14, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader14 ] + %arrayidx = getelementptr inbounds half, ptr %A, i64 %indvars.iv + %9 = load half, ptr %arrayidx, align 2 + %mul = fmul fast half %9, %B + %arrayidx2 = getelementptr inbounds half, ptr %C, i64 %indvars.iv + %10 = load half, ptr %arrayidx2, align 2 + %add = fadd fast half %10, %mul + store half %add, ptr %arrayidx2, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir index 4de93fc..a2a2841 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir @@ -95,6 +95,10 @@ br label %for.cond } + define void @extracopy(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 { + unreachable + } + attributes #0 = { "target-cpu"="cortex-a57" } ... @@ -545,3 +549,71 @@ body: | B %bb.1 ... +--- +name: extracopy +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: fpr64 } + - { id: 2, class: fpr64 } + - { id: 3, class: fpr64 } + - { id: 4, class: gpr64common } + - { id: 5, class: fpr64 } + - { id: 6, class: fpr64 } + - { id: 7, class: fpr128 } + - { id: 8, class: fpr128 } + - { id: 9, class: fpr64 } + - { id: 10, class: fpr64 } + - { id: 11, class: fpr64 } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } + - { reg: '$d2', virtual-reg: '%3' } + - { reg: '$x0', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: extracopy + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]] + ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane killed [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY5]], [[COPY6]] + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2f32_]], [[COPY4]] + ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16) + ; CHECK-NEXT: B %bb.1 + bb.0: + liveins: $d0, $d1, $d2, $x0 + + %4:gpr64common = COPY $x0 + %3:fpr64 = COPY $d2 + %2:fpr64 = COPY $d1 + %1:fpr64 = COPY $d0 + %8:fpr128 = IMPLICIT_DEF + %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub + %6:fpr64 = COPY %3 + %5:fpr64 = COPY %2 + %11:fpr64 = DUPv2i32lane killed %7, 0 + %0:fpr64 = COPY %11 + + bb.1: + %9:fpr64 = FMULv2f32 %5, %0 + %10:fpr64 = FADDv2f32 killed %9, %6 + STRDui killed %10, %4, 0 :: (store 8, align 16) + B %bb.1 + +... -- 2.7.4