From af430944b3ba8ca55c4fd6b73f53c198c469ffee Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Thu, 26 May 2022 09:24:12 -0500 Subject: [PATCH] [PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+. This patch updates two patterns involving `scalar_to_vector` and `SCALAR_TO_VECTOR_PERMUTED` nodes to be safe for both 64-bit and 32-bit by pulling the patterns out of the 64-bit specific guard. These patterns are matched on POWER8 and above. Differential Revision: https://reviews.llvm.org/D125389 --- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 17 ++-- .../CodeGen/PowerPC/aix_scalar_vector_permuted.ll | 29 +++--- .../CodeGen/PowerPC/canonical-merge-shuffles.ll | 3 +- llvm/test/CodeGen/PowerPC/float-vector-gather.ll | 18 ++-- llvm/test/CodeGen/PowerPC/load-and-splat.ll | 3 +- llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll | 3 +- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 5 +- llvm/test/CodeGen/PowerPC/reduce_scalarization.ll | 57 +++++------ llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll | 113 +++++---------------- llvm/test/CodeGen/PowerPC/vec_insert_elt.ll | 25 ++--- 10 files changed, 93 insertions(+), 180 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 1e87af5..6e56249 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3367,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)), def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)), (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>; + +defm : ScalToVecWPermute< + v4i32, (i32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; +defm : ScalToVecWPermute< + v4f32, (f32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; } // HasVSX, HasP8Vector, IsBigEndian // Big endian Power8 64Bit VSX subtarget. @@ -3381,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>; -defm : ScalToVecWPermute< - v4i32, (i32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; -defm : ScalToVecWPermute< - v4f32, (f32 (load ForceXForm:$src)), - (XXSLDWIs (LIWZX ForceXForm:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; def : Pat @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca ; ; P8-AIX-32-LABEL: testSplat4hi: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lwz r3, 0(r3) -; P8-AIX-32-NEXT: mtfprwz f0, r3 +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX-32-NEXT: xxspltw v2, vs0, 1 ; P8-AIX-32-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll index 0b3e75e..7d97598 100644 --- a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll +++ b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll @@ -43,17 +43,13 @@ float* nocapture readonly %d) { ; CHECK-BE-AIX-32-LABEL: vector_gatherf: ; CHECK-BE-AIX-32-LABEL: # %bb.0: # %entry -; CHECK-BE-AIX-32-DAG: lfs f[[REG0:[0-9]+]] -; CHECK-BE-AIX-32-DAG: lfs f[[REG1:[0-9]+]] -; CHECK-BE-AIX-32-DAG: lfs f[[REG2:[0-9]+]] -; CHECK-BE-AIX-32-DAG: lfs f[[REG3:[0-9]+]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG0]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG1:[0-9]+]], f[[REG1]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG2:[0-9]+]], f[[REG2]] -; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG3]] -; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG1]], v[[VREG0]], v[[VREG1]] -; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG0]], v[[VREG2]], v[[VREG0]] -; CHECK-BE-AIX-32-NEXT: xxmrghd v[[VREG1]], v[[VREG0]], v[[VREG1]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG0:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG1:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG2:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG3:[0-9]+]] +; CHECK-BE-AIX-32-DAG: vmrgow v[[REG0]], v[[REG1]], v[[REG0]] +; CHECK-BE-AIX-32-DAG: vmrgow v[[REG3]], v[[REG2]], v[[REG3]] +; CHECK-BE-AIX-32-NEXT: xxmrghd v[[REG0]], v[[REG3]], v[[REG0]] ; CHECK-BE-AIX-32-NEXT: blr entry: %0 = load float, float* %a, align 4 diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index 469a56d..699f5a8 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -560,8 +560,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { ; ; P8-AIX32-LABEL: unadjusted_lxvwsx: ; P8-AIX32: # %bb.0: # %entry -; P8-AIX32-NEXT: lwz r3, 0(r3) -; P8-AIX32-NEXT: mtfprwz f0, r3 +; P8-AIX32-NEXT: lfiwzx f0, 0, r3 ; P8-AIX32-NEXT: xxspltw v2, vs0, 1 ; P8-AIX32-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll index 8160263..d332f54 100644 --- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -28,8 +28,7 @@ define <16 x i8> @test(i32* %s, i32* %t) { ; ; CHECK-AIX-32-LABEL: test: ; CHECK-AIX-32: # %bb.0: # %entry -; CHECK-AIX-32-NEXT: lwz r3, 0(r3) -; CHECK-AIX-32-NEXT: mtfprwz f0, r3 +; CHECK-AIX-32-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-NEXT: xxspltw v2, vs0, 1 ; CHECK-AIX-32-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index f3959c3..d170dc7 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -66,9 +66,8 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; P9BE: lxsiwzx [[REG:[0-9]+]] ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] ; P9BE-32-LABEL: test32: -; P9BE-32: lwzx [[REG1:[0-9]+]] -; P9BE-32: mtvsrwz [[REG2:[0-9]+]], [[REG1]] -; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG2]] +; P9BE-32: lxsiwzx [[REG:[0-9]+]] +; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] entry: %idx.ext63 = sext i32 %i_pix2 to i64 %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63 diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll index d7883b5..5034778 100644 --- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll @@ -68,18 +68,15 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl ; ; AIX-32-LABEL: test2: ; AIX-32: # %bb.0: # %entry -; AIX-32-NEXT: lfs f0, 4(r3) -; AIX-32-NEXT: lfs f1, 0(r3) ; AIX-32-NEXT: lwz r5, L..C0(r2) # %const.0 -; AIX-32-NEXT: lfs f2, 4(r4) -; AIX-32-NEXT: xscvdpspn v2, f0 -; AIX-32-NEXT: lfs f0, 0(r4) -; AIX-32-NEXT: lxvw4x v0, 0, r5 -; AIX-32-NEXT: xscvdpspn v3, f1 -; AIX-32-NEXT: xscvdpspn v4, f2 -; AIX-32-NEXT: xscvdpspn v5, f0 -; AIX-32-NEXT: vperm v2, v3, v2, v0 -; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: li r6, 4 +; AIX-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-32-NEXT: lxsiwzx v0, 0, r4 +; AIX-32-NEXT: lxsiwzx v2, r3, r6 +; AIX-32-NEXT: lxsiwzx v5, r4, r6 +; AIX-32-NEXT: lxvw4x v4, 0, r5 +; AIX-32-NEXT: vperm v2, v3, v2, v4 +; AIX-32-NEXT: vperm v3, v0, v5, v4 ; AIX-32-NEXT: xvsubsp vs0, v2, v3 ; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 ; AIX-32-NEXT: xscvspdpn f0, vs0 @@ -117,18 +114,15 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl ; ; AIX-32-LABEL: test3: ; AIX-32: # %bb.0: # %entry -; AIX-32-NEXT: lfs f0, 4(r3) -; AIX-32-NEXT: lfs f1, 0(r3) ; AIX-32-NEXT: lwz r5, L..C1(r2) # %const.0 -; AIX-32-NEXT: lfs f2, 4(r4) -; AIX-32-NEXT: xscvdpspn v2, f0 -; AIX-32-NEXT: lfs f0, 0(r4) -; AIX-32-NEXT: lxvw4x v0, 0, r5 -; AIX-32-NEXT: xscvdpspn v3, f1 -; AIX-32-NEXT: xscvdpspn v4, f2 -; AIX-32-NEXT: xscvdpspn v5, f0 -; AIX-32-NEXT: vperm v2, v3, v2, v0 -; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: li r6, 4 +; AIX-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-32-NEXT: lxsiwzx v0, 0, r4 +; AIX-32-NEXT: lxsiwzx v2, r3, r6 +; AIX-32-NEXT: lxsiwzx v5, r4, r6 +; AIX-32-NEXT: lxvw4x v4, 0, r5 +; AIX-32-NEXT: vperm v2, v3, v2, v4 +; AIX-32-NEXT: vperm v3, v0, v5, v4 ; AIX-32-NEXT: xvaddsp vs0, v2, v3 ; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 ; AIX-32-NEXT: xscvspdpn f0, vs0 @@ -166,18 +160,15 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl ; ; AIX-32-LABEL: test4: ; AIX-32: # %bb.0: # %entry -; AIX-32-NEXT: lfs f0, 4(r3) -; AIX-32-NEXT: lfs f1, 0(r3) ; AIX-32-NEXT: lwz r5, L..C2(r2) # %const.0 -; AIX-32-NEXT: lfs f2, 4(r4) -; AIX-32-NEXT: xscvdpspn v2, f0 -; AIX-32-NEXT: lfs f0, 0(r4) -; AIX-32-NEXT: lxvw4x v0, 0, r5 -; AIX-32-NEXT: xscvdpspn v3, f1 -; AIX-32-NEXT: xscvdpspn v4, f2 -; AIX-32-NEXT: xscvdpspn v5, f0 -; AIX-32-NEXT: vperm v2, v3, v2, v0 -; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: li r6, 4 +; AIX-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-32-NEXT: lxsiwzx v0, 0, r4 +; AIX-32-NEXT: lxsiwzx v2, r3, r6 +; AIX-32-NEXT: lxsiwzx v5, r4, r6 +; AIX-32-NEXT: lxvw4x v4, 0, r5 +; AIX-32-NEXT: vperm v2, v3, v2, v4 +; AIX-32-NEXT: vperm v3, v0, v5, v4 ; AIX-32-NEXT: xvmulsp vs0, v2, v3 ; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 ; AIX-32-NEXT: xscvspdpn f0, vs0 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll index e0fd752..87e4c71 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -10,16 +10,16 @@ ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-ibm-aix-xcoff< %s | FileCheck %s \ -; RUN: --check-prefixes=P9-AIX,P9-AIX-64 +; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-64 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ -; RUN: --check-prefixes=P9-AIX,P9-AIX-32 +; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-32 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ -; RUN: --check-prefixes=P8-AIX-64 +; RUN: --check-prefixes=AIX,P8-AIX-64 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ -; RUN: --check-prefixes=P8-AIX-32 +; RUN: --check-prefixes=AIX,P8-AIX-32 ; Function Attrs: norecurse nounwind readonly define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { @@ -422,9 +422,8 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec ; ; P8-AIX-32-LABEL: s2v_test_f1: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 0(r3) ; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0 -; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxsiwzx v3, 0, r3 ; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 ; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 ; P8-AIX-32-NEXT: blr @@ -466,33 +465,12 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f2: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: addi r3, r3, 4 -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f2: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 4(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f2: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: addi r3, r3, 4 -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f2: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 4(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f2: +; AIX: # %bb.0: # %entry +; AIX-NEXT: addi r3, r3, 4 +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -542,8 +520,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P9-AIX-32-LABEL: s2v_test_f3: ; P9-AIX-32: # %bb.0: # %entry ; P9-AIX-32-NEXT: slwi r4, r4, 2 -; P9-AIX-32-NEXT: lfsx f0, r3, r4 -; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: lxsiwzx v3, r3, r4 ; P9-AIX-32-NEXT: vmrgow v2, v3, v2 ; P9-AIX-32-NEXT: blr ; @@ -557,8 +534,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P8-AIX-32-LABEL: s2v_test_f3: ; P8-AIX-32: # %bb.0: # %entry ; P8-AIX-32-NEXT: slwi r4, r4, 2 -; P8-AIX-32-NEXT: lfsx f0, r3, r4 -; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxsiwzx v3, r3, r4 ; P8-AIX-32-NEXT: vmrgow v2, v3, v2 ; P8-AIX-32-NEXT: blr entry: @@ -601,33 +577,12 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f4: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: addi r3, r3, 4 -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f4: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 4(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f4: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: addi r3, r3, 4 -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f4: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 4(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f4: +; AIX: # %bb.0: # %entry +; AIX-NEXT: addi r3, r3, 4 +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -663,31 +618,11 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr ; -; P9-AIX-64-LABEL: s2v_test_f5: -; P9-AIX-64: # %bb.0: # %entry -; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P9-AIX-64-NEXT: vmrgow v2, v3, v2 -; P9-AIX-64-NEXT: blr -; -; P9-AIX-32-LABEL: s2v_test_f5: -; P9-AIX-32: # %bb.0: # %entry -; P9-AIX-32-NEXT: lfs f0, 0(r3) -; P9-AIX-32-NEXT: xscvdpspn v3, f0 -; P9-AIX-32-NEXT: vmrgow v2, v3, v2 -; P9-AIX-32-NEXT: blr -; -; P8-AIX-64-LABEL: s2v_test_f5: -; P8-AIX-64: # %bb.0: # %entry -; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 -; P8-AIX-64-NEXT: vmrgow v2, v3, v2 -; P8-AIX-64-NEXT: blr -; -; P8-AIX-32-LABEL: s2v_test_f5: -; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: lfs f0, 0(r3) -; P8-AIX-32-NEXT: xscvdpspn v3, f0 -; P8-AIX-32-NEXT: vmrgow v2, v3, v2 -; P8-AIX-32-NEXT: blr +; AIX-LABEL: s2v_test_f5: +; AIX: # %bb.0: # %entry +; AIX-NEXT: lxsiwzx v3, 0, r3 +; AIX-NEXT: vmrgow v2, v3, v2 +; AIX-NEXT: blr entry: %0 = load float, float* %ptr1, align 8 %vecins = insertelement <2 x float> %vec, float %0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll index 225367f..dd873aa 100644 --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -659,15 +659,14 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) { ; ; AIX-P8-32-LABEL: testFloatImm2: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lfs f0, 0(r3) ; AIX-P8-32-NEXT: lwz r4, L..C8(r2) # %const.0 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3 +; AIX-P8-32-NEXT: li r5, 4 ; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 -; AIX-P8-32-NEXT: lfs f0, 4(r3) -; AIX-P8-32-NEXT: lwz r3, L..C9(r2) # %const.1 +; AIX-P8-32-NEXT: lwz r4, L..C9(r2) # %const.1 ; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 -; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 ; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 ; AIX-P8-32-NEXT: blr entry: @@ -732,17 +731,15 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) { ; ; AIX-P8-32-LABEL: testFloatImm3: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lis r4, 4 -; AIX-P8-32-NEXT: lfsx f0, r3, r4 ; AIX-P8-32-NEXT: lwz r4, L..C10(r2) # %const.0 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lis r5, 4 +; AIX-P8-32-NEXT: lxsiwzx v3, r3, r5 ; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 -; AIX-P8-32-NEXT: lfs f0, 0(r3) -; AIX-P8-32-NEXT: lwz r3, L..C11(r2) # %const.1 +; AIX-P8-32-NEXT: lwz r4, L..C11(r2) # %const.1 ; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 -; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 -; AIX-P8-32-NEXT: xscvdpspn v3, f0 -; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r4 +; AIX-P8-32-NEXT: lxsiwzx v4, 0, r3 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 ; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 -- 2.7.4