[PowerPC] Add prefix load pattern for fpext to v2f64

author Amy Kwan <amy.kwan1@ibm.com>

Tue, 21 Sep 2021 15:49:33 +0000 (10:49 -0500)

committer Amy Kwan <amy.kwan1@ibm.com>

Tue, 21 Sep 2021 17:45:24 +0000 (12:45 -0500)
author Amy Kwan <amy.kwan1@ibm.com>
Tue, 21 Sep 2021 15:49:33 +0000 (10:49 -0500)
committer Amy Kwan <amy.kwan1@ibm.com>
Tue, 21 Sep 2021 17:45:24 +0000 (12:45 -0500)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td

index 0b5c9bc..05e23cd 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2790,6 +2790,10 @@ let Predicates = [PrefixInstrs] in {
    def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>;
    def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>;
    def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>;
+
+  // Prefixed fpext to v2f64
+  def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)),
+            (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>;
  }
  
  def InsertEltShift {
diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll

index 15dc6bc..5fdd21e 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -108,3 +108,57 @@ entry:
    %add = fadd <2 x double> %1, %a
    ret <2 x double> %add
  }
+
+%0 = type <{ i32, i8, [1 x i8], i16, i32, i32, i8, [1 x i8], i16, i32, float, float, double, double, ppc_fp128, { float, float }, { float, float }, { double, double }, { double, double }, { ppc_fp128, ppc_fp128 }, [89856 x i8] }>
+@Glob1 = external dso_local unnamed_addr global [25 x %0], align 16
+
+define dso_local i32 @test6() #0 {
+; CHECK-P10-LABEL: test6:
+; CHECK-P10:       # %bb.0: # %bb
+; CHECK-P10-NEXT:    plfd f0, Glob1@PCREL+562536(0), 1
+; CHECK-P10-NEXT:    xxlxor vs1, vs1, vs1
+; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
+; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P10-NEXT:    xvcmpeqdp v2, vs1, vs0
+; CHECK-P10-NEXT:    xxswapd v3, v2
+; CHECK-P10-NEXT:    xxland vs0, v2, v3
+; CHECK-P10-NEXT:    mfvsrld r3, vs0
+; CHECK-P10-NEXT:    andi. r3, r3, 1
+; CHECK-P10-NEXT:    bc 4, gt, .LBB5_2
+; CHECK-P10-NEXT:  # %bb.1: # %bb8
+; CHECK-P10-NEXT:  .LBB5_2: # %bb7
+;
+; CHECK-P10-BE-LABEL: test6:
+; CHECK-P10-BE:       # %bb.0: # %bb
+; CHECK-P10-BE-NEXT:    addis r3, r2, Glob1@toc@ha
+; CHECK-P10-BE-NEXT:    xxlxor vs1, vs1, vs1
+; CHECK-P10-BE-NEXT:    addi r3, r3, Glob1@toc@l
+; CHECK-P10-BE-NEXT:    plfd f0, 562536(r3), 0
+; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
+; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P10-BE-NEXT:    xvcmpeqdp v2, vs1, vs0
+; CHECK-P10-BE-NEXT:    xxswapd v3, v2
+; CHECK-P10-BE-NEXT:    xxland vs0, v2, v3
+; CHECK-P10-BE-NEXT:    mffprd r3, f0
+; CHECK-P10-BE-NEXT:    andi. r3, r3, 1
+; CHECK-P10-BE-NEXT:    bc 4, gt, .LBB5_2
+; CHECK-P10-BE-NEXT:  # %bb.1: # %bb8
+; CHECK-P10-BE-NEXT:  .LBB5_2: # %bb7
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %i = load <2 x float>, <2 x float>* bitcast (i8* getelementptr inbounds ([25 x %0], [25 x %0]* @Glob1, i64 0, i64 6, i32 20, i64 22392) to <2 x float>*), align 8
+  %i2 = fpext <2 x float> %i to <2 x double>
+  %i3 = fcmp contract oeq <2 x double> zeroinitializer, %i2
+  %i4 = shufflevector <2 x i1> %i3, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
+  %i5 = and <2 x i1> %i3, %i4
+  %i6 = extractelement <2 x i1> %i5, i32 0
+  br i1 %i6, label %bb8, label %bb7
+
+bb7:                                              ; preds = %bb1
+  unreachable
+
+bb8:                                              ; preds = %bb1
+  unreachable
+}
author	Amy Kwan <amy.kwan1@ibm.com>
	Tue, 21 Sep 2021 15:49:33 +0000 (10:49 -0500)
committer	Amy Kwan <amy.kwan1@ibm.com>
	Tue, 21 Sep 2021 17:45:24 +0000 (12:45 -0500)
llvm/lib/Target/PowerPC/PPCInstrPrefix.td		patch \| blob \| history
llvm/test/CodeGen/PowerPC/reduce_scalarization.ll		patch \| blob \| history