From 8737c74fab3aee833d85b7d235d2c47ebb4eed2e Mon Sep 17 00:00:00 2001 From: Kamau Bridgeman Date: Thu, 30 Sep 2021 11:36:54 -0500 Subject: [PATCH] [PowerPC][MMA] Allow MMA builtin types in pre-P10 compilation units This patch allows the use of __vector_quad and __vector_pair, PPC MMA builtin types, on all PowerPC 64-bit compilation units. When these types are made available the builtins that use them automatically become available so semantic checking for mma and pair vector memop __builtins is also expanded to ensure these builtin function call are only allowed on Power10 and new architectures. All related test cases are updated to ensure test coverage. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D109599 --- clang/include/clang/Sema/Sema.h | 3 +- clang/lib/AST/ASTContext.cpp | 5 +- clang/lib/Sema/Sema.cpp | 5 +- clang/lib/Sema/SemaChecking.cpp | 32 +- clang/test/AST/ast-dump-ppc-types.c | 21 +- clang/test/CodeGen/ppc-mma-types.c | 6 +- clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp | 6 +- clang/test/Sema/ppc-mma-builtins.c | 33 ++ clang/test/Sema/ppc-paired-vector-builtins.c | 28 ++ llvm/test/CodeGen/PowerPC/mma-acc-memops.ll | 424 +++++++++++++++++++++++++ 10 files changed, 531 insertions(+), 32 deletions(-) create mode 100644 clang/test/Sema/ppc-mma-builtins.c create mode 100644 clang/test/Sema/ppc-paired-vector-builtins.c diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a85e53a..0a68f6f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12705,7 +12705,8 @@ private: int ArgNum, unsigned ExpectedFieldNum, bool AllowName); bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall); - bool SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeDesc); + bool SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, + const char *TypeDesc); bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index e2ebe737..d1fd3ce0 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1444,13 +1444,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, #include "clang/Basic/AArch64SVEACLETypes.def" } - if (Target.getTriple().isPPC64() && - Target.hasFeature("paired-vector-memops")) { - if (Target.hasFeature("mma")) { + if (Target.getTriple().isPPC64()) { #define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \ InitBuiltinType(Id##Ty, BuiltinType::Id); #include "clang/Basic/PPCTypes.def" - } #define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \ InitBuiltinType(Id##Ty, BuiltinType::Id); #include "clang/Basic/PPCTypes.def" diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index d260a45..cf8dcbb 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -403,13 +403,10 @@ void Sema::Initialize() { #include "clang/Basic/AArch64SVEACLETypes.def" } - if (Context.getTargetInfo().getTriple().isPPC64() && - Context.getTargetInfo().hasFeature("paired-vector-memops")) { - if (Context.getTargetInfo().hasFeature("mma")) { + if (Context.getTargetInfo().getTriple().isPPC64()) { #define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \ addImplicitTypedef(#Name, Context.Id##Ty); #include "clang/Basic/PPCTypes.def" - } #define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \ addImplicitTypedef(#Name, Context.Id##Ty); #include "clang/Basic/PPCTypes.def" diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a6d26ac..0ee05c9 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3521,9 +3521,9 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case PPC::BI__builtin_ppc_store8r: return SemaFeatureCheck(*this, TheCall, "isa-v206-instructions", diag::err_ppc_builtin_only_on_arch, "7"); -#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ - case PPC::BI__builtin_##Name: \ - return SemaBuiltinPPCMMACall(TheCall, Types); +#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ + case PPC::BI__builtin_##Name: \ + return SemaBuiltinPPCMMACall(TheCall, BuiltinID, Types); #include "clang/Basic/BuiltinsPPC.def" } return SemaBuiltinConstantArgRange(TheCall, i, l, u); @@ -7481,11 +7481,35 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, /// Emit an error and return true on failure; return false on success. /// TypeStr is a string containing the type descriptor of the value returned by /// the builtin and the descriptors of the expected type of the arguments. -bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeStr) { +bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, + const char *TypeStr) { assert((TypeStr[0] != '\0') && "Invalid types in PPC MMA builtin declaration"); + switch (BuiltinID) { + default: + // This function is called in CheckPPCBuiltinFunctionCall where the + // BuiltinID is guaranteed to be an MMA or pair vector memop builtin, here + // we are isolating the pair vector memop builtins that can be used with mma + // off so the default case is every builtin that requires mma and paired + // vector memops. + if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops", + diag::err_ppc_builtin_only_on_arch, "10") || + SemaFeatureCheck(*this, TheCall, "mma", + diag::err_ppc_builtin_only_on_arch, "10")) + return true; + break; + case PPC::BI__builtin_vsx_lxvp: + case PPC::BI__builtin_vsx_stxvp: + case PPC::BI__builtin_vsx_assemble_pair: + case PPC::BI__builtin_vsx_disassemble_pair: + if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops", + diag::err_ppc_builtin_only_on_arch, "10")) + return true; + break; + } + unsigned Mask = 0; unsigned ArgNum = 0; diff --git a/clang/test/AST/ast-dump-ppc-types.c b/clang/test/AST/ast-dump-ppc-types.c index 013f935..26ae544 100644 --- a/clang/test/AST/ast-dump-ppc-types.c +++ b/clang/test/AST/ast-dump-ppc-types.c @@ -1,13 +1,9 @@ -// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \ +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ // RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s -// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \ -// RUN: -target-feature -mma -ast-dump %s | FileCheck %s \ -// RUN: --check-prefix=CHECK-NO-MMA -// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \ -// RUN: -target-feature -paired-vector-memops -ast-dump %s | FileCheck %s \ -// RUN: --check-prefix=CHECK-NO-PAIRED // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \ -// RUN: -ast-dump %s | FileCheck %s --check-prefix=CHECK-PWR9 +// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr8 \ +// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s | FileCheck %s \ // RUN: --check-prefix=CHECK-X86_64 // RUN: %clang_cc1 -triple arm-unknown-unknown -ast-dump %s | FileCheck %s \ @@ -24,15 +20,6 @@ // CHECK: TypedefDecl {{.*}} implicit __vector_pair '__vector_pair' // CHECK-NEXT: -BuiltinType {{.*}} '__vector_pair' -// CHECK-NO-MMA-NOT: __vector_quad -// CHECK-NO-MMA: __vector_pair - -// CHECK-NO-PAIRED-NOT: __vector_quad -// CHECK-NO-PAIRED-NOT: __vector_pair - -// CHECK-PWR9-NOT: __vector_quad -// CHECK-PWR9-NOT: __vector_pair - // CHECK-X86_64-NOT: __vector_quad // CHECK-X86_64-NOT: __vector_pair diff --git a/clang/test/CodeGen/ppc-mma-types.c b/clang/test/CodeGen/ppc-mma-types.c index 777f5e5..bce930f 100644 --- a/clang/test/CodeGen/ppc-mma-types.c +++ b/clang/test/CodeGen/ppc-mma-types.c @@ -1,5 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future \ +// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm -O3 -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 \ +// RUN: -emit-llvm -O3 -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 \ // RUN: -emit-llvm -O3 -o - %s | FileCheck %s // CHECK-LABEL: @test1( diff --git a/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp b/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp index 228c6b0..74e50ce 100644 --- a/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp +++ b/clang/test/CodeGenCXX/ppc-mangle-mma-types.cpp @@ -1,4 +1,8 @@ -// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future %s \ +// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr10 %s \ +// RUN: -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr9 %s \ +// RUN: -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu pwr8 %s \ // RUN: -emit-llvm -o - | FileCheck %s // CHECK: _Z2f1Pu13__vector_quad diff --git a/clang/test/Sema/ppc-mma-builtins.c b/clang/test/Sema/ppc-mma-builtins.c new file mode 100644 index 0000000..66cb542 --- /dev/null +++ b/clang/test/Sema/ppc-mma-builtins.c @@ -0,0 +1,33 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ +// RUN: -target-feature -mma -fsyntax-only %s -verify + +void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_pair res; + __builtin_vsx_assemble_pair(&res, vc, vc); +} + +void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp); +} + +void test3(const __vector_pair *vpp, signed long offset, const __vector_pair *vp2) { + __vector_pair vp = __builtin_vsx_lxvp(offset, vpp); + __builtin_vsx_stxvp(vp, offset, vp2); +} + +void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __builtin_mma_xxmtacc(&vq); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} + *((__vector_quad *)resp) = vq; +} + +void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} + *((__vector_quad *)resp) = vq; +} + + diff --git a/clang/test/Sema/ppc-paired-vector-builtins.c b/clang/test/Sema/ppc-paired-vector-builtins.c new file mode 100644 index 0000000..67010909 --- /dev/null +++ b/clang/test/Sema/ppc-paired-vector-builtins.c @@ -0,0 +1,28 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ +// RUN: -target-feature -paired-vector-memops -fsyntax-only %s -verify +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \ +// RUN: -fsyntax-only %s -verify + +void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_pair res; + __builtin_vsx_assemble_pair(&res, vc, vc); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} +} + +void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} +} + +void test3(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) { + __vector_pair vp = __builtin_vsx_lxvp(offset, vpp); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} + __builtin_vsx_stxvp(vp, offset, vp2); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} +} + +void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __builtin_mma_xxmtacc(&vq); // expected-error {{this builtin is only valid on POWER10 or later CPUs}} + *((__vector_quad *)resp) = vq; +} + + diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll index 6e3ea1b..ee97843 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -5,6 +5,18 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=BE-PAIRED +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=LE-PWR9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=LE-PWR8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=BE-PWR9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=BE-PWR8 @f = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 16 @g = common dso_local local_unnamed_addr global <256 x i1> zeroinitializer, align 16 @@ -35,6 +47,78 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv vs3, 176(r3) ; BE-PAIRED-NEXT: stxv vs2, 160(r3) ; BE-PAIRED-NEXT: blr +; +; LE-PWR9-LABEL: testLdSt: +; LE-PWR9: # %bb.0: # %entry +; LE-PWR9-NEXT: addis r3, r2, f@toc@ha +; LE-PWR9-NEXT: addi r3, r3, f@toc@l +; LE-PWR9-NEXT: lxv vs1, 96(r3) +; LE-PWR9-NEXT: lxv vs0, 64(r3) +; LE-PWR9-NEXT: lxv vs2, 112(r3) +; LE-PWR9-NEXT: stxv vs1, 160(r3) +; LE-PWR9-NEXT: lxv vs1, 80(r3) +; LE-PWR9-NEXT: stxv vs2, 176(r3) +; LE-PWR9-NEXT: stxv vs0, 128(r3) +; LE-PWR9-NEXT: stxv vs1, 144(r3) +; LE-PWR9-NEXT: blr +; +; LE-PWR8-LABEL: testLdSt: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: addis r3, r2, f@toc@ha +; LE-PWR8-NEXT: li r4, 96 +; LE-PWR8-NEXT: li r5, 112 +; LE-PWR8-NEXT: addi r3, r3, f@toc@l +; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; LE-PWR8-NEXT: li r4, 64 +; LE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; LE-PWR8-NEXT: li r5, 80 +; LE-PWR8-NEXT: lxvd2x vs2, r3, r4 +; LE-PWR8-NEXT: lxvd2x vs3, r3, r5 +; LE-PWR8-NEXT: li r4, 176 +; LE-PWR8-NEXT: li r5, 160 +; LE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; LE-PWR8-NEXT: li r4, 144 +; LE-PWR8-NEXT: stxvd2x vs0, r3, r5 +; LE-PWR8-NEXT: li r5, 128 +; LE-PWR8-NEXT: stxvd2x vs3, r3, r4 +; LE-PWR8-NEXT: stxvd2x vs2, r3, r5 +; LE-PWR8-NEXT: blr +; +; BE-PWR9-LABEL: testLdSt: +; BE-PWR9: # %bb.0: # %entry +; BE-PWR9-NEXT: addis r3, r2, f@toc@ha +; BE-PWR9-NEXT: addi r3, r3, f@toc@l +; BE-PWR9-NEXT: lxv vs1, 96(r3) +; BE-PWR9-NEXT: lxv vs0, 64(r3) +; BE-PWR9-NEXT: lxv vs2, 112(r3) +; BE-PWR9-NEXT: stxv vs1, 160(r3) +; BE-PWR9-NEXT: lxv vs1, 80(r3) +; BE-PWR9-NEXT: stxv vs2, 176(r3) +; BE-PWR9-NEXT: stxv vs0, 128(r3) +; BE-PWR9-NEXT: stxv vs1, 144(r3) +; BE-PWR9-NEXT: blr +; +; BE-PWR8-LABEL: testLdSt: +; BE-PWR8: # %bb.0: # %entry +; BE-PWR8-NEXT: addis r3, r2, f@toc@ha +; BE-PWR8-NEXT: li r4, 96 +; BE-PWR8-NEXT: li r5, 112 +; BE-PWR8-NEXT: addi r3, r3, f@toc@l +; BE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; BE-PWR8-NEXT: li r4, 64 +; BE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; BE-PWR8-NEXT: li r5, 80 +; BE-PWR8-NEXT: lxvd2x vs2, r3, r4 +; BE-PWR8-NEXT: lxvd2x vs3, r3, r5 +; BE-PWR8-NEXT: li r4, 176 +; BE-PWR8-NEXT: li r5, 160 +; BE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; BE-PWR8-NEXT: li r4, 144 +; BE-PWR8-NEXT: stxvd2x vs0, r3, r5 +; BE-PWR8-NEXT: li r5, 128 +; BE-PWR8-NEXT: stxvd2x vs3, r3, r4 +; BE-PWR8-NEXT: stxvd2x vs2, r3, r5 +; BE-PWR8-NEXT: blr entry: %arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 1 %0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64 @@ -78,6 +162,84 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv vs3, 48(r3) ; BE-PAIRED-NEXT: stxv vs2, 32(r3) ; BE-PAIRED-NEXT: blr +; +; LE-PWR9-LABEL: testXLdSt: +; LE-PWR9: # %bb.0: # %entry +; LE-PWR9-NEXT: addis r5, r2, f@toc@ha +; LE-PWR9-NEXT: sldi r3, r3, 6 +; LE-PWR9-NEXT: addi r5, r5, f@toc@l +; LE-PWR9-NEXT: add r6, r5, r3 +; LE-PWR9-NEXT: lxvx vs3, r5, r3 +; LE-PWR9-NEXT: sldi r3, r4, 6 +; LE-PWR9-NEXT: lxv vs0, 16(r6) +; LE-PWR9-NEXT: lxv vs1, 32(r6) +; LE-PWR9-NEXT: lxv vs2, 48(r6) +; LE-PWR9-NEXT: stxvx vs3, r5, r3 +; LE-PWR9-NEXT: add r3, r5, r3 +; LE-PWR9-NEXT: stxv vs2, 48(r3) +; LE-PWR9-NEXT: stxv vs1, 32(r3) +; LE-PWR9-NEXT: stxv vs0, 16(r3) +; LE-PWR9-NEXT: blr +; +; LE-PWR8-LABEL: testXLdSt: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: addis r5, r2, f@toc@ha +; LE-PWR8-NEXT: sldi r3, r3, 6 +; LE-PWR8-NEXT: li r6, 48 +; LE-PWR8-NEXT: li r8, 16 +; LE-PWR8-NEXT: li r9, 32 +; LE-PWR8-NEXT: addi r5, r5, f@toc@l +; LE-PWR8-NEXT: add r7, r5, r3 +; LE-PWR8-NEXT: lxvd2x vs0, r5, r3 +; LE-PWR8-NEXT: sldi r3, r4, 6 +; LE-PWR8-NEXT: lxvd2x vs1, r7, r6 +; LE-PWR8-NEXT: lxvd2x vs2, r7, r8 +; LE-PWR8-NEXT: add r4, r5, r3 +; LE-PWR8-NEXT: lxvd2x vs3, r7, r9 +; LE-PWR8-NEXT: stxvd2x vs0, r5, r3 +; LE-PWR8-NEXT: stxvd2x vs1, r4, r6 +; LE-PWR8-NEXT: stxvd2x vs3, r4, r9 +; LE-PWR8-NEXT: stxvd2x vs2, r4, r8 +; LE-PWR8-NEXT: blr +; +; BE-PWR9-LABEL: testXLdSt: +; BE-PWR9: # %bb.0: # %entry +; BE-PWR9-NEXT: addis r5, r2, f@toc@ha +; BE-PWR9-NEXT: sldi r3, r3, 6 +; BE-PWR9-NEXT: addi r5, r5, f@toc@l +; BE-PWR9-NEXT: add r6, r5, r3 +; BE-PWR9-NEXT: lxvx vs3, r5, r3 +; BE-PWR9-NEXT: sldi r3, r4, 6 +; BE-PWR9-NEXT: lxv vs0, 16(r6) +; BE-PWR9-NEXT: lxv vs1, 32(r6) +; BE-PWR9-NEXT: lxv vs2, 48(r6) +; BE-PWR9-NEXT: stxvx vs3, r5, r3 +; BE-PWR9-NEXT: add r3, r5, r3 +; BE-PWR9-NEXT: stxv vs2, 48(r3) +; BE-PWR9-NEXT: stxv vs1, 32(r3) +; BE-PWR9-NEXT: stxv vs0, 16(r3) +; BE-PWR9-NEXT: blr +; +; BE-PWR8-LABEL: testXLdSt: +; BE-PWR8: # %bb.0: # %entry +; BE-PWR8-NEXT: addis r5, r2, f@toc@ha +; BE-PWR8-NEXT: sldi r3, r3, 6 +; BE-PWR8-NEXT: li r6, 32 +; BE-PWR8-NEXT: li r7, 48 +; BE-PWR8-NEXT: li r9, 16 +; BE-PWR8-NEXT: addi r5, r5, f@toc@l +; BE-PWR8-NEXT: add r8, r5, r3 +; BE-PWR8-NEXT: lxvd2x vs2, r5, r3 +; BE-PWR8-NEXT: sldi r3, r4, 6 +; BE-PWR8-NEXT: lxvd2x vs0, r8, r6 +; BE-PWR8-NEXT: lxvd2x vs1, r8, r7 +; BE-PWR8-NEXT: add r4, r5, r3 +; BE-PWR8-NEXT: lxvd2x vs3, r8, r9 +; BE-PWR8-NEXT: stxvd2x vs2, r5, r3 +; BE-PWR8-NEXT: stxvd2x vs1, r4, r7 +; BE-PWR8-NEXT: stxvd2x vs0, r4, r6 +; BE-PWR8-NEXT: stxvd2x vs3, r4, r9 +; BE-PWR8-NEXT: blr entry: %arrayidx = getelementptr inbounds <512 x i1>, <512 x i1>* @f, i64 %SrcIdx %0 = load <512 x i1>, <512 x i1>* %arrayidx, align 64 @@ -112,6 +274,94 @@ define dso_local void @testUnalignedLdSt() { ; BE-PAIRED-NEXT: pstxv vs3, 67(r3), 0 ; BE-PAIRED-NEXT: pstxv vs2, 51(r3), 0 ; BE-PAIRED-NEXT: blr +; +; LE-PWR9-LABEL: testUnalignedLdSt: +; LE-PWR9: # %bb.0: # %entry +; LE-PWR9-NEXT: addis r3, r2, f@toc@ha +; LE-PWR9-NEXT: li r4, 11 +; LE-PWR9-NEXT: addi r3, r3, f@toc@l +; LE-PWR9-NEXT: lxvx vs0, r3, r4 +; LE-PWR9-NEXT: li r4, 27 +; LE-PWR9-NEXT: lxvx vs1, r3, r4 +; LE-PWR9-NEXT: li r4, 43 +; LE-PWR9-NEXT: lxvx vs2, r3, r4 +; LE-PWR9-NEXT: li r4, 59 +; LE-PWR9-NEXT: lxvx vs3, r3, r4 +; LE-PWR9-NEXT: li r4, 67 +; LE-PWR9-NEXT: stxvx vs3, r3, r4 +; LE-PWR9-NEXT: li r4, 51 +; LE-PWR9-NEXT: stxvx vs2, r3, r4 +; LE-PWR9-NEXT: li r4, 35 +; LE-PWR9-NEXT: stxvx vs1, r3, r4 +; LE-PWR9-NEXT: li r4, 19 +; LE-PWR9-NEXT: stxvx vs0, r3, r4 +; LE-PWR9-NEXT: blr +; +; LE-PWR8-LABEL: testUnalignedLdSt: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: addis r3, r2, f@toc@ha +; LE-PWR8-NEXT: li r4, 59 +; LE-PWR8-NEXT: li r5, 43 +; LE-PWR8-NEXT: addi r3, r3, f@toc@l +; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; LE-PWR8-NEXT: li r4, 11 +; LE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; LE-PWR8-NEXT: li r5, 27 +; LE-PWR8-NEXT: lxvd2x vs2, r3, r4 +; LE-PWR8-NEXT: lxvd2x vs3, r3, r5 +; LE-PWR8-NEXT: li r4, 51 +; LE-PWR8-NEXT: li r5, 67 +; LE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; LE-PWR8-NEXT: li r4, 35 +; LE-PWR8-NEXT: stxvd2x vs0, r3, r5 +; LE-PWR8-NEXT: li r5, 19 +; LE-PWR8-NEXT: stxvd2x vs3, r3, r4 +; LE-PWR8-NEXT: stxvd2x vs2, r3, r5 +; LE-PWR8-NEXT: blr +; +; BE-PWR9-LABEL: testUnalignedLdSt: +; BE-PWR9: # %bb.0: # %entry +; BE-PWR9-NEXT: addis r3, r2, f@toc@ha +; BE-PWR9-NEXT: li r4, 11 +; BE-PWR9-NEXT: addi r3, r3, f@toc@l +; BE-PWR9-NEXT: lxvx vs0, r3, r4 +; BE-PWR9-NEXT: li r4, 27 +; BE-PWR9-NEXT: lxvx vs1, r3, r4 +; BE-PWR9-NEXT: li r4, 43 +; BE-PWR9-NEXT: lxvx vs2, r3, r4 +; BE-PWR9-NEXT: li r4, 59 +; BE-PWR9-NEXT: lxvx vs3, r3, r4 +; BE-PWR9-NEXT: li r4, 67 +; BE-PWR9-NEXT: stxvx vs3, r3, r4 +; BE-PWR9-NEXT: li r4, 51 +; BE-PWR9-NEXT: stxvx vs2, r3, r4 +; BE-PWR9-NEXT: li r4, 35 +; BE-PWR9-NEXT: stxvx vs1, r3, r4 +; BE-PWR9-NEXT: li r4, 19 +; BE-PWR9-NEXT: stxvx vs0, r3, r4 +; BE-PWR9-NEXT: blr +; +; BE-PWR8-LABEL: testUnalignedLdSt: +; BE-PWR8: # %bb.0: # %entry +; BE-PWR8-NEXT: addis r3, r2, f@toc@ha +; BE-PWR8-NEXT: li r4, 43 +; BE-PWR8-NEXT: li r5, 59 +; BE-PWR8-NEXT: addi r3, r3, f@toc@l +; BE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; BE-PWR8-NEXT: li r4, 11 +; BE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; BE-PWR8-NEXT: li r5, 27 +; BE-PWR8-NEXT: lxvd2x vs2, r3, r4 +; BE-PWR8-NEXT: lxvd2x vs3, r3, r5 +; BE-PWR8-NEXT: li r4, 67 +; BE-PWR8-NEXT: li r5, 51 +; BE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; BE-PWR8-NEXT: li r4, 35 +; BE-PWR8-NEXT: stxvd2x vs0, r3, r5 +; BE-PWR8-NEXT: li r5, 19 +; BE-PWR8-NEXT: stxvd2x vs3, r3, r4 +; BE-PWR8-NEXT: stxvd2x vs2, r3, r5 +; BE-PWR8-NEXT: blr entry: %0 = bitcast <512 x i1>* @f to i8* %add.ptr = getelementptr inbounds i8, i8* %0, i64 11 @@ -141,6 +391,54 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv v3, 80(r3) ; BE-PAIRED-NEXT: stxv v2, 64(r3) ; BE-PAIRED-NEXT: blr +; +; LE-PWR9-LABEL: testLdStPair: +; LE-PWR9: # %bb.0: # %entry +; LE-PWR9-NEXT: addis r3, r2, g@toc@ha +; LE-PWR9-NEXT: addi r3, r3, g@toc@l +; LE-PWR9-NEXT: lxv vs0, 32(r3) +; LE-PWR9-NEXT: lxv vs1, 48(r3) +; LE-PWR9-NEXT: stxv vs1, 80(r3) +; LE-PWR9-NEXT: stxv vs0, 64(r3) +; LE-PWR9-NEXT: blr +; +; LE-PWR8-LABEL: testLdStPair: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: addis r3, r2, g@toc@ha +; LE-PWR8-NEXT: li r4, 32 +; LE-PWR8-NEXT: li r5, 48 +; LE-PWR8-NEXT: addi r3, r3, g@toc@l +; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; LE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; LE-PWR8-NEXT: li r4, 80 +; LE-PWR8-NEXT: li r5, 64 +; LE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; LE-PWR8-NEXT: stxvd2x vs0, r3, r5 +; LE-PWR8-NEXT: blr +; +; BE-PWR9-LABEL: testLdStPair: +; BE-PWR9: # %bb.0: # %entry +; BE-PWR9-NEXT: addis r3, r2, g@toc@ha +; BE-PWR9-NEXT: addi r3, r3, g@toc@l +; BE-PWR9-NEXT: lxv vs0, 32(r3) +; BE-PWR9-NEXT: lxv vs1, 48(r3) +; BE-PWR9-NEXT: stxv vs1, 80(r3) +; BE-PWR9-NEXT: stxv vs0, 64(r3) +; BE-PWR9-NEXT: blr +; +; BE-PWR8-LABEL: testLdStPair: +; BE-PWR8: # %bb.0: # %entry +; BE-PWR8-NEXT: addis r3, r2, g@toc@ha +; BE-PWR8-NEXT: li r4, 32 +; BE-PWR8-NEXT: li r5, 48 +; BE-PWR8-NEXT: addi r3, r3, g@toc@l +; BE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; BE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; BE-PWR8-NEXT: li r4, 80 +; BE-PWR8-NEXT: li r5, 64 +; BE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; BE-PWR8-NEXT: stxvd2x vs0, r3, r5 +; BE-PWR8-NEXT: blr entry: %arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 1 %0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64 @@ -176,6 +474,64 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxvx v2, r5, r3 ; BE-PAIRED-NEXT: stxv v3, 16(r4) ; BE-PAIRED-NEXT: blr +; +; LE-PWR9-LABEL: testXLdStPair: +; LE-PWR9: # %bb.0: # %entry +; LE-PWR9-NEXT: addis r5, r2, g@toc@ha +; LE-PWR9-NEXT: sldi r3, r3, 5 +; LE-PWR9-NEXT: sldi r4, r4, 5 +; LE-PWR9-NEXT: addi r5, r5, g@toc@l +; LE-PWR9-NEXT: add r6, r5, r3 +; LE-PWR9-NEXT: lxvx vs1, r5, r3 +; LE-PWR9-NEXT: lxv vs0, 16(r6) +; LE-PWR9-NEXT: add r6, r5, r4 +; LE-PWR9-NEXT: stxvx vs1, r5, r4 +; LE-PWR9-NEXT: stxv vs0, 16(r6) +; LE-PWR9-NEXT: blr +; +; LE-PWR8-LABEL: testXLdStPair: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: addis r5, r2, g@toc@ha +; LE-PWR8-NEXT: sldi r3, r3, 5 +; LE-PWR8-NEXT: li r7, 16 +; LE-PWR8-NEXT: addi r5, r5, g@toc@l +; LE-PWR8-NEXT: add r6, r5, r3 +; LE-PWR8-NEXT: lxvd2x vs1, r5, r3 +; LE-PWR8-NEXT: sldi r3, r4, 5 +; LE-PWR8-NEXT: lxvd2x vs0, r6, r7 +; LE-PWR8-NEXT: add r4, r5, r3 +; LE-PWR8-NEXT: stxvd2x vs1, r5, r3 +; LE-PWR8-NEXT: stxvd2x vs0, r4, r7 +; LE-PWR8-NEXT: blr +; +; BE-PWR9-LABEL: testXLdStPair: +; BE-PWR9: # %bb.0: # %entry +; BE-PWR9-NEXT: addis r5, r2, g@toc@ha +; BE-PWR9-NEXT: sldi r3, r3, 5 +; BE-PWR9-NEXT: sldi r4, r4, 5 +; BE-PWR9-NEXT: addi r5, r5, g@toc@l +; BE-PWR9-NEXT: add r6, r5, r3 +; BE-PWR9-NEXT: lxvx vs1, r5, r3 +; BE-PWR9-NEXT: lxv vs0, 16(r6) +; BE-PWR9-NEXT: add r6, r5, r4 +; BE-PWR9-NEXT: stxvx vs1, r5, r4 +; BE-PWR9-NEXT: stxv vs0, 16(r6) +; BE-PWR9-NEXT: blr +; +; BE-PWR8-LABEL: testXLdStPair: +; BE-PWR8: # %bb.0: # %entry +; BE-PWR8-NEXT: addis r5, r2, g@toc@ha +; BE-PWR8-NEXT: sldi r3, r3, 5 +; BE-PWR8-NEXT: li r7, 16 +; BE-PWR8-NEXT: addi r5, r5, g@toc@l +; BE-PWR8-NEXT: add r6, r5, r3 +; BE-PWR8-NEXT: lxvd2x vs0, r5, r3 +; BE-PWR8-NEXT: sldi r3, r4, 5 +; BE-PWR8-NEXT: lxvd2x vs1, r6, r7 +; BE-PWR8-NEXT: add r4, r5, r3 +; BE-PWR8-NEXT: stxvd2x vs0, r5, r3 +; BE-PWR8-NEXT: stxvd2x vs1, r4, r7 +; BE-PWR8-NEXT: blr entry: %arrayidx = getelementptr inbounds <256 x i1>, <256 x i1>* @g, i64 %SrcIdx %0 = load <256 x i1>, <256 x i1>* %arrayidx, align 64 @@ -202,6 +558,74 @@ define dso_local void @testUnalignedLdStPair() { ; BE-PAIRED-NEXT: pstxv v3, 35(r3), 0 ; BE-PAIRED-NEXT: pstxv v2, 19(r3), 0 ; BE-PAIRED-NEXT: blr +; +; LE-PWR9-LABEL: testUnalignedLdStPair: +; LE-PWR9: # %bb.0: # %entry +; LE-PWR9-NEXT: addis r3, r2, g@toc@ha +; LE-PWR9-NEXT: li r6, 19 +; LE-PWR9-NEXT: li r4, 11 +; LE-PWR9-NEXT: li r5, 35 +; LE-PWR9-NEXT: li r7, 27 +; LE-PWR9-NEXT: addi r3, r3, g@toc@l +; LE-PWR9-NEXT: lxvx vs0, r3, r6 +; LE-PWR9-NEXT: ldx r4, r3, r4 +; LE-PWR9-NEXT: ldx r5, r3, r5 +; LE-PWR9-NEXT: stdx r4, r3, r6 +; LE-PWR9-NEXT: stxvx vs0, r3, r7 +; LE-PWR9-NEXT: li r7, 43 +; LE-PWR9-NEXT: stdx r5, r3, r7 +; LE-PWR9-NEXT: blr +; +; LE-PWR8-LABEL: testUnalignedLdStPair: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: addis r3, r2, g@toc@ha +; LE-PWR8-NEXT: li r4, 19 +; LE-PWR8-NEXT: li r5, 11 +; LE-PWR8-NEXT: li r6, 35 +; LE-PWR8-NEXT: li r7, 43 +; LE-PWR8-NEXT: li r8, 27 +; LE-PWR8-NEXT: addi r3, r3, g@toc@l +; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; LE-PWR8-NEXT: ldx r5, r3, r5 +; LE-PWR8-NEXT: ldx r6, r3, r6 +; LE-PWR8-NEXT: stdx r6, r3, r7 +; LE-PWR8-NEXT: stdx r5, r3, r4 +; LE-PWR8-NEXT: stxvd2x vs0, r3, r8 +; LE-PWR8-NEXT: blr +; +; BE-PWR9-LABEL: testUnalignedLdStPair: +; BE-PWR9: # %bb.0: # %entry +; BE-PWR9-NEXT: addis r3, r2, g@toc@ha +; BE-PWR9-NEXT: li r6, 19 +; BE-PWR9-NEXT: li r4, 11 +; BE-PWR9-NEXT: li r5, 35 +; BE-PWR9-NEXT: li r7, 27 +; BE-PWR9-NEXT: addi r3, r3, g@toc@l +; BE-PWR9-NEXT: lxvx vs0, r3, r6 +; BE-PWR9-NEXT: ldx r4, r3, r4 +; BE-PWR9-NEXT: ldx r5, r3, r5 +; BE-PWR9-NEXT: stdx r4, r3, r6 +; BE-PWR9-NEXT: stxvx vs0, r3, r7 +; BE-PWR9-NEXT: li r7, 43 +; BE-PWR9-NEXT: stdx r5, r3, r7 +; BE-PWR9-NEXT: blr +; +; BE-PWR8-LABEL: testUnalignedLdStPair: +; BE-PWR8: # %bb.0: # %entry +; BE-PWR8-NEXT: addis r3, r2, g@toc@ha +; BE-PWR8-NEXT: li r4, 19 +; BE-PWR8-NEXT: li r5, 11 +; BE-PWR8-NEXT: li r6, 35 +; BE-PWR8-NEXT: li r7, 27 +; BE-PWR8-NEXT: addi r3, r3, g@toc@l +; BE-PWR8-NEXT: lxvd2x vs0, r3, r4 +; BE-PWR8-NEXT: ldx r5, r3, r5 +; BE-PWR8-NEXT: ldx r6, r3, r6 +; BE-PWR8-NEXT: stxvd2x vs0, r3, r7 +; BE-PWR8-NEXT: li r7, 43 +; BE-PWR8-NEXT: stdx r5, r3, r4 +; BE-PWR8-NEXT: stdx r6, r3, r7 +; BE-PWR8-NEXT: blr entry: %0 = bitcast <256 x i1>* @g to i8* %add.ptr = getelementptr inbounds i8, i8* %0, i64 11 -- 2.7.4