From cf0aa0b66ccec87b0ac14f632998074e507be950 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Tue, 13 Jul 2021 21:15:30 -0500 Subject: [PATCH] [NFC][PowerPC] Added test to check regsiter allocation for ACC registers ACC regsiters are a combination of 4 consecutive vector regsiters and therefore somtimes require special treatment for register allocation. This patch only adds a test. --- llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll | 352 ++++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll diff --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll new file mode 100644 index 0000000..ace652d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll @@ -0,0 +1,352 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE + +%0 = type <{ double }> +%1 = type <{ double }> + +define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unnamed_addr { +; CHECK-LABEL: acc_regalloc: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: lxv vs0, 0(0) +; CHECK-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-NEXT: xxlxor vs3, vs3, vs3 +; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill +; CHECK-NEXT: xxlxor v2, v2, v2 +; CHECK-NEXT: li r6, 1 +; CHECK-NEXT: li r4, 16 +; CHECK-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill +; CHECK-NEXT: extswsli r3, r3, 3 +; CHECK-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill +; CHECK-NEXT: xvmaddadp vs3, vs0, vs3 +; CHECK-NEXT: lxvdsx vs1, 0, r3 +; CHECK-NEXT: xvmaddadp vs2, vs1, vs2 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %bb9 +; CHECK-NEXT: # +; CHECK-NEXT: addi r6, r6, 2 +; CHECK-NEXT: lxv vs5, -64(r5) +; CHECK-NEXT: lxv vs6, -16(r5) +; CHECK-NEXT: lxv vs4, 16(0) +; CHECK-NEXT: xxlor v7, vs2, vs2 +; CHECK-NEXT: xxlxor v8, v8, v8 +; CHECK-NEXT: xxlxor v1, v1, v1 +; CHECK-NEXT: mulld r6, r6, r3 +; CHECK-NEXT: xvmaddadp v7, vs5, v2 +; CHECK-NEXT: xxlxor v6, v6, v6 +; CHECK-NEXT: xvmaddadp v8, vs6, v8 +; CHECK-NEXT: xvmaddadp v1, vs4, vs1 +; CHECK-NEXT: xvmuldp v0, vs4, v2 +; CHECK-NEXT: xvmaddadp v1, v2, v2 +; CHECK-NEXT: xvmaddadp v0, v2, v2 +; CHECK-NEXT: lxvdsx v4, r6, r4 +; CHECK-NEXT: xvmaddadp v6, vs5, v6 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: xvmuldp v9, vs6, v4 +; CHECK-NEXT: xvmuldp v3, vs5, v4 +; CHECK-NEXT: xvmuldp v11, vs0, v4 +; CHECK-NEXT: vmr v10, v2 +; CHECK-NEXT: xvmuldp v5, v4, v2 +; CHECK-NEXT: vmr v4, v2 +; CHECK-NEXT: xxlor vs18, v8, v8 +; CHECK-NEXT: xxlor vs4, v2, v2 +; CHECK-NEXT: xxlor vs12, v10, v10 +; CHECK-NEXT: xxlor vs13, v11, v11 +; CHECK-NEXT: xxlor v10, vs3, vs3 +; CHECK-NEXT: xxlor vs8, v4, v4 +; CHECK-NEXT: xxlor vs9, v5, v5 +; CHECK-NEXT: xxlor vs10, v0, v0 +; CHECK-NEXT: xxlor vs11, v1, v1 +; CHECK-NEXT: xxmtacc acc2 +; CHECK-NEXT: xxlor vs19, v9, v9 +; CHECK-NEXT: vmr v8, v2 +; CHECK-NEXT: xxlor vs5, v3, v3 +; CHECK-NEXT: xxlor vs6, v6, v6 +; CHECK-NEXT: xxlor vs7, v7, v7 +; CHECK-NEXT: xxlor vs14, v10, v10 +; CHECK-NEXT: xxlor vs15, v11, v11 +; CHECK-NEXT: xxlor vs16, v8, v8 +; CHECK-NEXT: xxlor vs17, v9, v9 +; CHECK-NEXT: xxmtacc acc1 +; CHECK-NEXT: xxmtacc acc3 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xxmtacc acc4 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0 +; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0 +; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: xxmfacc acc2 +; CHECK-NEXT: xxmfacc acc3 +; CHECK-NEXT: xxmfacc acc4 +; CHECK-NEXT: stxv vs5, 0(r3) +; CHECK-NEXT: stxv vs13, 32(r3) +; CHECK-NEXT: stxv vs8, 16(0) +; CHECK-NEXT: stxv vs16, 48(0) +; CHECK-NEXT: b .LBB0_1 +; +; TRACKLIVE-LABEL: acc_regalloc: +; TRACKLIVE: # %bb.0: # %bb +; TRACKLIVE-NEXT: lwz r3, 0(r3) +; TRACKLIVE-NEXT: lxv vs0, 0(0) +; TRACKLIVE-NEXT: xxlxor vs2, vs2, vs2 +; TRACKLIVE-NEXT: xxlxor vs3, vs3, vs3 +; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: xxlxor v2, v2, v2 +; TRACKLIVE-NEXT: li r6, 1 +; TRACKLIVE-NEXT: li r4, 16 +; TRACKLIVE-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: extswsli r3, r3, 3 +; TRACKLIVE-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill +; TRACKLIVE-NEXT: xvmaddadp vs3, vs0, vs3 +; TRACKLIVE-NEXT: lxvdsx vs1, 0, r3 +; TRACKLIVE-NEXT: xvmaddadp vs2, vs1, vs2 +; TRACKLIVE-NEXT: .p2align 4 +; TRACKLIVE-NEXT: .LBB0_1: # %bb9 +; TRACKLIVE-NEXT: # +; TRACKLIVE-NEXT: addi r6, r6, 2 +; TRACKLIVE-NEXT: lxv vs4, 16(0) +; TRACKLIVE-NEXT: xxlxor v1, v1, v1 +; TRACKLIVE-NEXT: lxv vs6, -16(r5) +; TRACKLIVE-NEXT: lxv vs5, -64(r5) +; TRACKLIVE-NEXT: xxlxor v8, v8, v8 +; TRACKLIVE-NEXT: xxlor v7, vs2, vs2 +; TRACKLIVE-NEXT: xxlxor v6, v6, v6 +; TRACKLIVE-NEXT: mulld r6, r6, r3 +; TRACKLIVE-NEXT: vmr v10, v2 +; TRACKLIVE-NEXT: xxlor vs8, v10, v10 +; TRACKLIVE-NEXT: xvmaddadp v1, vs4, vs1 +; TRACKLIVE-NEXT: xvmuldp v0, vs4, v2 +; TRACKLIVE-NEXT: xvmaddadp v8, vs6, v8 +; TRACKLIVE-NEXT: xvmaddadp v7, vs5, v2 +; TRACKLIVE-NEXT: xvmaddadp v6, vs5, v6 +; TRACKLIVE-NEXT: xxlor vs4, v2, v2 +; TRACKLIVE-NEXT: lxvdsx v4, r6, r4 +; TRACKLIVE-NEXT: li r6, 0 +; TRACKLIVE-NEXT: xvmaddadp v1, v2, v2 +; TRACKLIVE-NEXT: xvmaddadp v0, v2, v2 +; TRACKLIVE-NEXT: xxlor vs18, v8, v8 +; TRACKLIVE-NEXT: vmr v8, v2 +; TRACKLIVE-NEXT: xxlor vs7, v7, v7 +; TRACKLIVE-NEXT: xxlor vs16, v8, v8 +; TRACKLIVE-NEXT: xvmuldp v3, vs5, v4 +; TRACKLIVE-NEXT: xvmuldp v5, vs0, v4 +; TRACKLIVE-NEXT: xvmuldp v9, vs6, v4 +; TRACKLIVE-NEXT: xvmuldp v11, v4, v2 +; TRACKLIVE-NEXT: vmr v4, v2 +; TRACKLIVE-NEXT: xxlor vs6, v6, v6 +; TRACKLIVE-NEXT: xxlor vs12, v4, v4 +; TRACKLIVE-NEXT: xxlor v4, vs3, vs3 +; TRACKLIVE-NEXT: xxlor vs10, v0, v0 +; TRACKLIVE-NEXT: xxlor vs11, v1, v1 +; TRACKLIVE-NEXT: xxlor vs14, v4, v4 +; TRACKLIVE-NEXT: xxlor vs5, v3, v3 +; TRACKLIVE-NEXT: xxlor vs9, v11, v11 +; TRACKLIVE-NEXT: xxlor vs13, v5, v5 +; TRACKLIVE-NEXT: xxlor vs15, v5, v5 +; TRACKLIVE-NEXT: xxlor vs19, v9, v9 +; TRACKLIVE-NEXT: xxlor vs17, v9, v9 +; TRACKLIVE-NEXT: xxmtacc acc1 +; TRACKLIVE-NEXT: xxmtacc acc2 +; TRACKLIVE-NEXT: xxmtacc acc3 +; TRACKLIVE-NEXT: xxmtacc acc4 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0 +; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0 +; TRACKLIVE-NEXT: xxmfacc acc1 +; TRACKLIVE-NEXT: xxmfacc acc2 +; TRACKLIVE-NEXT: xxmfacc acc3 +; TRACKLIVE-NEXT: xxmfacc acc4 +; TRACKLIVE-NEXT: stxv vs5, 0(r3) +; TRACKLIVE-NEXT: stxv vs13, 32(r3) +; TRACKLIVE-NEXT: stxv vs8, 16(0) +; TRACKLIVE-NEXT: stxv vs16, 48(0) +; TRACKLIVE-NEXT: b .LBB0_1 +bb: + %i = load i32, i32* %arg, align 4 + %i3 = sext i32 %i to i64 + %i4 = shl nsw i64 %i3, 3 + %i5 = bitcast [0 x %0]* %arg1 to i8* + %i6 = getelementptr i8, i8* %i5, i64 undef + %i7 = getelementptr [0 x %1], [0 x %1]* %arg2, i64 0, i64 -8 + %i8 = getelementptr i8, i8* %i6, i64 undef + br label %bb9 + +bb9: ; preds = %bb95, %bb + %i10 = phi i64 [ 1, %bb ], [ 0, %bb95 ] + %i11 = getelementptr %1, %1* null, i64 2 + %i12 = bitcast %1* %i11 to <2 x double>* + %i13 = load <2 x double>, <2 x double>* %i12, align 1 + %i14 = add nuw nsw i64 %i10, 2 + %i15 = getelementptr inbounds %1, %1* %i7, i64 undef + %i16 = bitcast %1* %i15 to <2 x double>* + %i17 = load <2 x double>, <2 x double>* %i16, align 1 + %i18 = load <2 x double>, <2 x double>* null, align 1 + %i19 = getelementptr %1, %1* %i15, i64 6 + %i20 = bitcast %1* %i19 to <2 x double>* + %i21 = load <2 x double>, <2 x double>* %i20, align 1 + %i22 = load i64, i64* undef, align 8 + %i23 = insertelement <2 x i64> poison, i64 %i22, i32 0 + %i24 = bitcast <2 x i64> %i23 to <2 x double> + %i25 = shufflevector <2 x double> %i24, <2 x double> undef, <2 x i32> zeroinitializer + %i26 = mul i64 %i14, %i4 + %i27 = getelementptr i8, i8* null, i64 %i26 + %i28 = getelementptr inbounds i8, i8* %i27, i64 0 + %i29 = getelementptr i8, i8* %i28, i64 16 + %i30 = bitcast i8* %i29 to i64* + %i31 = load i64, i64* %i30, align 8 + %i32 = insertelement <2 x i64> poison, i64 %i31, i32 0 + %i33 = bitcast <2 x i64> %i32 to <2 x double> + %i34 = shufflevector <2 x double> %i33, <2 x double> undef, <2 x i32> zeroinitializer + %i35 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %i25, <2 x double> zeroinitializer) + %i36 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i13, <2 x double> %i25, <2 x double> zeroinitializer) + %i37 = fmul contract <2 x double> %i13, zeroinitializer + %i38 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> %i35) + %i39 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i36) + %i40 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> zeroinitializer) + %i41 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i37) + %i42 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i18, <2 x double> zeroinitializer, <2 x double> zeroinitializer) + %i43 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i21, <2 x double> zeroinitializer, <2 x double> zeroinitializer) + %i44 = fmul contract <2 x double> %i17, %i34 + %i45 = fmul contract <2 x double> zeroinitializer, %i34 + %i46 = fmul contract <2 x double> %i18, %i34 + %i47 = fmul contract <2 x double> %i21, %i34 + %i48 = bitcast <2 x double> %i44 to <16 x i8> + %i49 = bitcast <2 x double> %i40 to <16 x i8> + %i50 = bitcast <2 x double> %i38 to <16 x i8> + %i51 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i48, <16 x i8> %i49, <16 x i8> %i50) + %i52 = bitcast <2 x double> %i45 to <16 x i8> + %i53 = bitcast <2 x double> %i41 to <16 x i8> + %i54 = bitcast <2 x double> %i39 to <16 x i8> + %i55 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i52, <16 x i8> %i53, <16 x i8> %i54) + %i56 = bitcast <2 x double> %i46 to <16 x i8> + %i57 = bitcast <2 x double> %i42 to <16 x i8> + %i58 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i56, <16 x i8> %i57, <16 x i8> %i56) + %i59 = bitcast <2 x double> %i47 to <16 x i8> + %i60 = bitcast <2 x double> %i43 to <16 x i8> + %i61 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i59, <16 x i8> %i60, <16 x i8> %i59) + %i62 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i51, <256 x i1> undef, <16 x i8> undef) + %i63 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i55, <256 x i1> undef, <16 x i8> undef) + %i64 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i58, <256 x i1> undef, <16 x i8> undef) + %i65 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i61, <256 x i1> undef, <16 x i8> undef) + %i66 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i62, <256 x i1> undef, <16 x i8> undef) + %i67 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i63, <256 x i1> undef, <16 x i8> undef) + %i68 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i64, <256 x i1> undef, <16 x i8> undef) + %i69 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i65, <256 x i1> undef, <16 x i8> undef) + %i70 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i66, <256 x i1> undef, <16 x i8> undef) + %i71 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i67, <256 x i1> undef, <16 x i8> undef) + %i72 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i68, <256 x i1> undef, <16 x i8> undef) + %i73 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i69, <256 x i1> undef, <16 x i8> undef) + %i74 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i70, <256 x i1> undef, <16 x i8> undef) + %i75 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i71, <256 x i1> undef, <16 x i8> undef) + %i76 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i72, <256 x i1> undef, <16 x i8> undef) + %i77 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i73, <256 x i1> undef, <16 x i8> undef) + %i78 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i74, <256 x i1> undef, <16 x i8> undef) + %i79 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i75, <256 x i1> undef, <16 x i8> undef) + %i80 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i76, <256 x i1> undef, <16 x i8> undef) + %i81 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i77, <256 x i1> undef, <16 x i8> undef) + br label %bb82 + +bb82: ; preds = %bb82, %bb9 + %i83 = phi <512 x i1> [ %i94, %bb82 ], [ %i81, %bb9 ] + %i84 = phi <512 x i1> [ %i93, %bb82 ], [ %i80, %bb9 ] + %i85 = phi <512 x i1> [ %i92, %bb82 ], [ %i79, %bb9 ] + %i86 = phi <512 x i1> [ %i91, %bb82 ], [ %i78, %bb9 ] + %i87 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i86, <256 x i1> undef, <16 x i8> undef) + %i88 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i85, <256 x i1> undef, <16 x i8> undef) + %i89 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i84, <256 x i1> undef, <16 x i8> undef) + %i90 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i83, <256 x i1> undef, <16 x i8> undef) + %i91 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i87, <256 x i1> undef, <16 x i8> undef) + %i92 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i88, <256 x i1> undef, <16 x i8> undef) + %i93 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i89, <256 x i1> undef, <16 x i8> undef) + %i94 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i90, <256 x i1> undef, <16 x i8> undef) + br i1 undef, label %bb95, label %bb82 + +bb95: ; preds = %bb82 + %i96 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i91) + %i97 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i96, 2 + %i98 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i92) + %i99 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i98, 3 + %i100 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i93) + %i101 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i100, 2 + %i102 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i94) + %i103 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i102, 3 + %i104 = getelementptr inbounds i8, i8* %i8, i64 undef + %i105 = bitcast i8* %i104 to <16 x i8>* + store <16 x i8> %i97, <16 x i8>* %i105, align 1 + %i106 = getelementptr i8, i8* %i104, i64 32 + %i107 = bitcast i8* %i106 to <16 x i8>* + store <16 x i8> %i101, <16 x i8>* %i107, align 1 + %i108 = getelementptr i8, i8* null, i64 16 + %i109 = bitcast i8* %i108 to <16 x i8>* + store <16 x i8> %i99, <16 x i8>* %i109, align 1 + %i110 = getelementptr i8, i8* null, i64 48 + %i111 = bitcast i8* %i110 to <16 x i8>* + store <16 x i8> %i103, <16 x i8>* %i111, align 1 + br label %bb9 +} + +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) + -- 2.7.4