From 8a58f21f5b6c228137a9b87906fe5b720c4d1dfb Mon Sep 17 00:00:00 2001 From: Kamau Bridgeman Date: Tue, 22 Dec 2020 12:04:57 -0500 Subject: [PATCH] [PowerPC][Power10] Exploit store rightmost vector element instructions Using the store rightmost vector element instructions to do vector element extraction and store. The rightmost vector element on little endian is the zeroth vector element, with these patterns that element can be extracted and stored in one instruction for all vector types. Differential Revision: https://reviews.llvm.org/D89195 --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 25 +- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll | 261 ++++++++++++--------- .../CodeGen/PowerPC/store-rightmost-vector-elt.ll | 109 +++++++++ 3 files changed, 277 insertions(+), 118 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 54e9ada..e7fa2af 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2554,16 +2554,21 @@ let Predicates = [IsISA3_1, HasVSX] in { (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; } -let AddedComplexity = 400, Predicates = [IsISA3_1] in { - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src), - (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src), - (STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src), - (STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; - def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src), - (STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; -} +let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { + // Store element 0 of a VSX register to memory + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst), + (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), + (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), + (STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), + (STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), + (STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>; + def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), + (STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>; + } class xxevalPattern imm> : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll index 17617e9..9e8f8d07 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-O0 +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-O0 ; These test cases aims to test the builtins for the Power10 VSX vector ; instructions introduced in ISA 3.1. @@ -22,14 +22,6 @@ define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) { ; CHECK-NEXT: srwi r3, r3, 31 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: xvtlsbb cr0, v2 -; CHECK-O0-NEXT: mfocrf r3, 128 -; CHECK-O0-NEXT: srwi r3, r3, 31 -; CHECK-O0-NEXT: extsw r3, r3 -; CHECK-O0-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1) ret i32 %0 @@ -43,24 +35,22 @@ define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) { ; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31 ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: xvtlsbb cr0, v2 -; CHECK-O0-NEXT: mfocrf r3, 128 -; CHECK-O0-NEXT: rlwinm r3, r3, 3, 31, 31 -; CHECK-O0-NEXT: extsw r3, r3 -; CHECK-O0-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0) ret i32 %0 } define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sc: ; CHECK-O0: # %bb.0: # %entry @@ -79,10 +69,16 @@ entry: } define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_uc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_uc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_uc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_uc: ; CHECK-O0: # %bb.0: # %entry @@ -101,11 +97,18 @@ entry: } define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ss: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ss: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ss: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxsihx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ss: ; CHECK-O0: # %bb.0: # %entry @@ -125,11 +128,18 @@ entry: } define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_us: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_us: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_us: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxsihx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_us: ; CHECK-O0: # %bb.0: # %entry @@ -149,11 +159,18 @@ entry: } define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_si: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_si: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_si: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stfiwx f0, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_si: ; CHECK-O0: # %bb.0: # %entry @@ -173,11 +190,18 @@ entry: } define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ui: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ui: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stfiwx f0, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ui: ; CHECK-O0: # %bb.0: # %entry @@ -197,11 +221,17 @@ entry: } define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sll: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sll: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxsdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sll: ; CHECK-O0: # %bb.0: # %entry @@ -219,11 +249,17 @@ entry: } define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ull: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ull: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ull: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxsdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ull: ; CHECK-O0: # %bb.0: # %entry @@ -245,11 +281,6 @@ define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxvrbx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: lxvrbx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset %0 = load i8, i8* %add.ptr, align 1 @@ -264,12 +295,6 @@ define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture rea ; CHECK-NEXT: sldi r3, r3, 1 ; CHECK-NEXT: lxvrhx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_short: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lxvrhx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset %0 = load i16, i16* %add.ptr, align 2 @@ -284,12 +309,6 @@ define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture read ; CHECK-NEXT: sldi r3, r3, 2 ; CHECK-NEXT: lxvrwx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_word: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lxvrwx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset %0 = load i32, i32* %add.ptr, align 4 @@ -304,12 +323,6 @@ define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readon ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lxvrdx v2, r4, r3 ; CHECK-NEXT: blr -; -; CHECK-O0-LABEL: vec_xl_zext_dw: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: lxvrdx v2, r4, r3 -; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset %0 = load i64, i64* %add.ptr, align 8 @@ -319,13 +332,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) { -; CHECK-LABEL: vec_xl_sext_b: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lbzx r3, r4, r3 -; CHECK-NEXT: extsb r3, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_b: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lbzx r3, r4, r3 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_b: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lbzx r3, r4, r3 +; CHECK-BE-NEXT: extsb r3, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_b: ; CHECK-O0: # %bb.0: # %entry @@ -343,13 +364,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) { -; CHECK-LABEL: vec_xl_sext_h: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 1 -; CHECK-NEXT: lhax r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_h: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lhax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_h: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 1 +; CHECK-BE-NEXT: lhax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_h: ; CHECK-O0: # %bb.0: # %entry @@ -367,13 +396,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) { -; CHECK-LABEL: vec_xl_sext_w: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 2 -; CHECK-NEXT: lwax r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_w: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lwax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_w: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: lwax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_w: ; CHECK-O0: # %bb.0: # %entry @@ -391,13 +428,21 @@ entry: } define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) { -; CHECK-LABEL: vec_xl_sext_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: ldx r3, r4, r3 -; CHECK-NEXT: sradi r4, r3, 63 -; CHECK-NEXT: mtvsrdd v2, r4, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xl_sext_d: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: ldx r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_d: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 3 +; CHECK-BE-NEXT: ldx r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xl_sext_d: ; CHECK-O0: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll new file mode 100644 index 0000000..5fbcafec --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/store-rightmost-vector-elt.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-LE + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @test1(<4 x i32> %A, i32* %a) { +; CHECK-LE-LABEL: test1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrwx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %A, i32 0 + store i32 %vecext, i32* %a, align 4 + ret void +} + +define void @test2(<4 x float> %A, float* %a) { +; CHECK-LE-LABEL: test2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrwx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: stfiwx f0, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %A, i32 0 + store float %vecext, float* %a, align 4 + ret void +} + +define void @test3(<2 x double> %A, double* %a) { +; CHECK-LE-LABEL: test3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrdx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxsd v2, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <2 x double> %A, i32 0 + store double %vecext, double* %a, align 8 + ret void +} + +define void @test4(<2 x i64> %A, i64* %a) { +; CHECK-LE-LABEL: test4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrdx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxsd v2, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %A, i32 0 + store i64 %vecext, i64* %a, align 8 + ret void +} + +define void @test5(<8 x i16> %A, i16* %a) { +; CHECK-LE-LABEL: test5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrhx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 +; CHECK-BE-NEXT: stxsihx v2, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %A, i32 0 + store i16 %vecext, i16* %a, align 2 + ret void +} + +define void @test6(<16 x i8> %A, i8* %a) { +; CHECK-LE-LABEL: test6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, 0, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9 +; CHECK-BE-NEXT: stxsibx v2, 0, r5 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %A, i32 0 + store i8 %vecext, i8* %a, align 1 + ret void +} + -- 2.7.4