HasP8Crypto = true;
} else if (Feature == "+direct-move") {
HasDirectMove = true;
+ } else if (Feature == "+qpx") {
+ HasQPX = true;
} else if (Feature == "+htm") {
HasHTM = true;
} else if (Feature == "+float128") {
}
// ABI options.
- if (ABI == "elfv1")
+ if (ABI == "elfv1" || ABI == "elfv1-qpx")
Builder.defineMacro("_CALL_ELF", "1");
if (ABI == "elfv2")
Builder.defineMacro("_CALL_ELF", "2");
Builder.defineMacro("_ARCH_PWR10");
if (ArchDefs & ArchDefineA2)
Builder.defineMacro("_ARCH_A2");
+ if (ArchDefs & ArchDefineA2q) {
+ Builder.defineMacro("_ARCH_A2Q");
+ Builder.defineMacro("_ARCH_QP");
+ }
if (ArchDefs & ArchDefineE500)
Builder.defineMacro("__NO_LWSYNC__");
if (ArchDefs & ArchDefineFuture)
Builder.defineMacro("_ARCH_PWR_FUTURE");
+ if (getTriple().getVendor() == llvm::Triple::BGQ) {
+ Builder.defineMacro("__bg__");
+ Builder.defineMacro("__THW_BLUEGENE__");
+ Builder.defineMacro("__bgq__");
+ Builder.defineMacro("__TOS_BGQ__");
+ }
+
if (HasAltivec) {
Builder.defineMacro("__VEC__", "10206");
Builder.defineMacro("__ALTIVEC__");
.Case("ppc64le", true)
.Default(false);
+ Features["qpx"] = (CPU == "a2q");
Features["power9-vector"] = (CPU == "pwr9");
Features["crypto"] = llvm::StringSwitch<bool>(CPU)
.Case("ppc64le", true)
.Case("power8-vector", HasP8Vector)
.Case("crypto", HasP8Crypto)
.Case("direct-move", HasDirectMove)
+ .Case("qpx", HasQPX)
.Case("htm", HasHTM)
.Case("bpermd", HasBPERMD)
.Case("extdiv", HasExtDiv)
}
static constexpr llvm::StringLiteral ValidCPUNames[] = {
- {"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
- {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
- {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
- {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
- {"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"},
- {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"},
- {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"},
- {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"},
- {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"},
- {"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"}, {"powerpc64le"},
- {"ppc64le"}, {"future"}};
+ {"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
+ {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
+ {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
+ {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
+ {"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"},
+ {"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"},
+ {"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"},
+ {"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"},
+ {"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"},
+ {"pwr10"}, {"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"},
+ {"powerpc64le"}, {"ppc64le"}, {"future"}};
bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
ArchDefinePwr10 = 1 << 14,
ArchDefineFuture = 1 << 15,
ArchDefineA2 = 1 << 16,
+ ArchDefineA2q = 1 << 17,
ArchDefineE500 = 1 << 18
} ArchDefineTypes;
bool HasP8Vector = false;
bool HasP8Crypto = false;
bool HasDirectMove = false;
+ bool HasQPX = false;
bool HasHTM = false;
bool HasBPERMD = false;
bool HasExtDiv = false;
.Case("970", ArchDefineName | ArchDefinePwr4 | ArchDefinePpcgr |
ArchDefinePpcsq)
.Case("a2", ArchDefineA2)
+ .Case("a2q", ArchDefineName | ArchDefineA2 | ArchDefineA2q)
.Cases("power3", "pwr3", ArchDefinePpcgr)
.Cases("power4", "pwr4",
ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
.Case("970", "970")
.Case("G5", "g5")
.Case("a2", "a2")
+ .Case("a2q", "a2q")
.Case("e500", "e500")
.Case("e500mc", "e500mc")
.Case("e5500", "e5500")
if (T.isOSBinFormatELF()) {
switch (getToolChain().getArch()) {
case llvm::Triple::ppc64: {
+ // When targeting a processor that supports QPX, or if QPX is
+ // specifically enabled, default to using the ABI that supports QPX (so
+ // long as it is not specifically disabled).
+ bool HasQPX = false;
+ if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
+ HasQPX = A->getValue() == StringRef("a2q");
+ HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX);
+ if (HasQPX) {
+ ABIName = "elfv1-qpx";
+ break;
+ }
+
if (T.isMusl() || (T.isOSFreeBSD() && T.getOSMajorVersion() >= 13))
ABIName = "elfv2";
else
// PPCPWR8: "-target-cpu" "pwr8"
// RUN: %clang -target powerpc64-unknown-linux-gnu \
+// RUN: -### -S %s -mcpu=a2q 2>&1 | FileCheck -check-prefix=PPCA2Q %s
+// PPCA2Q: clang
+// PPCA2Q: "-cc1"
+// PPCA2Q: "-target-cpu" "a2q"
+
+// RUN: %clang -target powerpc64-unknown-linux-gnu \
// RUN: -### -S %s -mcpu=630 2>&1 | FileCheck -check-prefix=PPC630 %s
// PPC630: clang
// PPC630: "-cc1"
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1 %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
+// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
+// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
+// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1 %s
+// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2-BE %s
// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// CHECK-ELFv1: "-target-abi" "elfv1"
// CHECK-ELFv1-LE: "-mrelocation-model" "static"
// CHECK-ELFv1-LE: "-target-abi" "elfv1"
+// CHECK-ELFv1-QPX: "-mrelocation-model" "static"
+// CHECK-ELFv1-QPX: "-target-abi" "elfv1-qpx"
// CHECK-ELFv2: "-mrelocation-model" "static"
// CHECK-ELFv2: "-target-abi" "elfv2"
// CHECK-ELFv2-BE: "-mrelocation-model" "static"
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1-PIC %s
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
+// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
+// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
+// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
+// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1-PIC %s
+// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2-PIC %s
// RUN: %clang -fPIC -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// CHECK-ELFv1-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
// CHECK-ELFv1-PIC: "-target-abi" "elfv1"
+// CHECK-ELFv1-QPX-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
+// CHECK-ELFv1-QPX-PIC: "-target-abi" "elfv1-qpx"
// CHECK-ELFv2-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
// CHECK-ELFv2-PIC: "-target-abi" "elfv2"
// PPC: error: unknown target CPU 'not-a-cpu'
// PPC: note: valid target CPU values are: generic, 440, 450, 601, 602, 603,
// PPC-SAME: 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750,
-// PPC-SAME: 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4,
+// PPC-SAME: 8548, 970, g5, a2, a2q, e500, e500mc, e5500, power3, pwr3, power4,
// PPC-SAME: pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x,
// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, powerpc64,
// PPC-SAME: ppc64, powerpc64le, ppc64le, future
// PPC64LE:#define __ppc64__ 1
// PPC64LE:#define __ppc__ 1
//
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu a2q -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCA2Q %s
+//
+// PPCA2Q:#define _ARCH_A2 1
+// PPCA2Q:#define _ARCH_A2Q 1
+// PPCA2Q:#define _ARCH_PPC 1
+// PPCA2Q:#define _ARCH_PPC64 1
+// PPCA2Q:#define _ARCH_QP 1
+//
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-bgq-linux -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCBGQ %s
+//
+// PPCBGQ:#define __THW_BLUEGENE__ 1
+// PPCBGQ:#define __TOS_BGQ__ 1
+// PPCBGQ:#define __bg__ 1
+// PPCBGQ:#define __bgq__ 1
+//
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu 630 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPC630 %s
//
// PPC630:#define _ARCH_630 1
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1-qpx < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
- ``r``: A 32 or 64-bit integer register.
- ``b``: A 32 or 64-bit integer register, excluding ``R0`` (that is:
``R1-R31``).
-- ``f``: A 32 or 64-bit float register (``F0-F31``),
-- ``v``: For ``4 x f32`` or ``4 x f64`` types, a 128-bit altivec vector
- register (``V0-V31``).
+- ``f``: A 32 or 64-bit float register (``F0-F31``), or when QPX is enabled, a
+ 128 or 256-bit QPX register (``Q0-Q31``; aliases the ``F`` registers).
+- ``v``: For ``4 x f32`` or ``4 x f64`` types, when QPX is enabled, a
+ 128 or 256-bit QPX register (``Q0-Q31``), otherwise a 128-bit
+ altivec vector register (``V0-V31``).
+
+ .. FIXME: is this a bug that v accepts QPX registers? I think this
+ is supposed to only use the altivec vector registers?
- ``y``: Condition register (``CR0-CR7``).
- ``wc``: An individual CR bit in a CR register.
Apple,
PC,
SCEI,
+ BGP,
+ BGQ,
Freescale,
IBM,
ImaginationTechnologies,
Minix,
RTEMS,
NaCl, // Native Client
+ CNK, // BG/P Compute-Node Kernel
AIX,
CUDA, // NVIDIA CUDA
NVCL, // NVIDIA OpenCL
}
//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsics.
+//
+
+let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
+ /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
+ class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+ list<LLVMType> param_types,
+ list<IntrinsicProperty> properties>
+ : GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
+ Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsic Class Definitions.
+//
+
+/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
+/// vector and returns one. These intrinsics have no side effects.
+class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+
+/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
+/// vectors and returns one. These intrinsics have no side effects.
+class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+ [IntrNoMem]>;
+
+/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
+/// vectors and returns one. These intrinsics have no side effects.
+class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+ [IntrNoMem]>;
+
+/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and returns a v4f64.
+class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and returns a v4f64 permutation.
+class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and stores a v4f64.
+class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [], [llvm_v4f64_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
+ // Add Instructions
+ def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
+ def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
+ def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
+ def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
+
+ // Estimate Instructions
+ def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
+ def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
+ def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
+ def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
+
+ // Multiply Instructions
+ def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
+ def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
+ def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
+ def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
+
+ // Multiply-add instructions
+ def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
+ def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
+ def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
+ def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
+ def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
+ def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
+ def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
+ def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
+ def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
+ def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
+ def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
+ def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
+ def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
+ def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
+ def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
+ def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
+
+ // Select Instruction
+ def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
+
+ // Permute Instruction
+ def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
+
+ // Convert and Round Instructions
+ def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
+ def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
+ def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
+ def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
+ def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
+ def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
+ def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
+ def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
+ def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
+ def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
+ def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
+ def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
+ def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
+ def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
+ def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
+ def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
+ def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
+
+ // Move Instructions
+ def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
+ def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
+ def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
+ def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
+
+ // Compare Instructions
+ def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
+ def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
+ def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
+ def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
+
+ // Load instructions
+ def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
+ def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
+ def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
+ def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
+
+ def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
+ def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
+ def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
+ def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
+ def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
+ def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
+ def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
+ def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
+
+ def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
+ def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
+ def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
+ def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
+
+ // Store instructions
+ def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
+ def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
+ def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
+ def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
+
+ def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
+ def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
+ def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
+ def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
+ def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
+ def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
+
+ // Logical and permutation formation
+ def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
+ [llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
+ [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
// PowerPC HTM Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
case AMD: return "amd";
case Apple: return "apple";
+ case BGP: return "bgp";
+ case BGQ: return "bgq";
case CSR: return "csr";
case Freescale: return "fsl";
case IBM: return "ibm";
case AMDHSA: return "amdhsa";
case AMDPAL: return "amdpal";
case Ananas: return "ananas";
+ case CNK: return "cnk";
case CUDA: return "cuda";
case CloudABI: return "cloudabi";
case Contiki: return "contiki";
.Case("apple", Triple::Apple)
.Case("pc", Triple::PC)
.Case("scei", Triple::SCEI)
+ .Case("bgp", Triple::BGP)
+ .Case("bgq", Triple::BGQ)
.Case("fsl", Triple::Freescale)
.Case("ibm", Triple::IBM)
.Case("img", Triple::ImaginationTechnologies)
.StartsWith("minix", Triple::Minix)
.StartsWith("rtems", Triple::RTEMS)
.StartsWith("nacl", Triple::NaCl)
+ .StartsWith("cnk", Triple::CNK)
.StartsWith("aix", Triple::AIX)
.StartsWith("cuda", Triple::CUDA)
.StartsWith("nvcl", Triple::NVCL)
Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
}
+ void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
} else if (Name.startswith_lower("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
+ } else if (Name.startswith_lower("q") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = QFRegs[IntVal];
} else if (Name.startswith_lower("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];
PPCMacroFusion.cpp
PPCMIPeephole.cpp
PPCRegisterInfo.cpp
+ PPCQPXLoadSplat.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
PPCTargetObjectFile.cpp
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, QFRegs);
+}
+
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
// Read the instruction in the proper endianness.
uint64_t Inst = ReadFunc(Bytes.data());
- if (STI.getFeatureBits()[PPC::FeatureSPE]) {
+ if (STI.getFeatureBits()[PPC::FeatureQPX]) {
+ DecodeStatus result =
+ decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
+ if (result != MCDisassembler::Fail)
+ return result;
+ } else if (STI.getFeatureBits()[PPC::FeatureSPE]) {
DecodeStatus result =
- decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
+ decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
}
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
const char *RegName = getRegisterName(RegNo);
+ if (RegName[0] == 'q' /* QPX */) {
+ // The system toolchain on the BG/Q does not understand QPX register names
+ // in .cfi_* directives, so print the name of the floating-point
+ // subregister instead.
+ std::string RN(RegName);
+
+ RN[0] = 'f';
+ OS << RN;
+
+ return;
+ }
+
OS << RegName;
}
static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
+ static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
static const MCPhysReg RRegsNoR0[32] = \
PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
static const MCPhysReg XRegsNoX0[32] = \
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCBranchCoalescingPass();
+ FunctionPass *createPPCQPXLoadSplatPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
void initializePPCReduceCRLogicalsPass(PassRegistry&);
void initializePPCBSelPass(PassRegistry&);
void initializePPCBranchCoalescingPass(PassRegistry&);
+ void initializePPCQPXLoadSplatPass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);
"Enable PPC 4xx instructions">;
def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
"Enable PPC 6xx instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions",
+ [FeatureFPU]>;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD",
"POPCNTD_Fast",
"Enable the popcnt[dw] instructions">;
-// Note that for the a2 processor models we should not use popcnt[dw] by
+// Note that for the a2/a2q processor models we should not use popcnt[dw] by
// default. These processors do support the instructions, but they're
// microcoded, and the software emulation is about twice as fast.
def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+ [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
+ FeatureMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
+ PPC::QBRCRegClass.contains(Reg) ||
+ PPC::QFRCRegClass.contains(Reg) ||
+ PPC::QSRCRegClass.contains(Reg) ||
PPC::VFRCRegClass.contains(Reg) ||
PPC::VRRCRegClass.contains(Reg) ||
PPC::VSFRCRegClass.contains(Reg) ||
CCIfType<[f64], CCAssignToReg<[F1]>>,
CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,
+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2]>>>
CCIfSubtarget<"hasP9Vector()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ // QPX vectors are returned in QF1 and QF2.
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfType<[f128],
CCIfSubtarget<"hasP9Vector()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
+ // QPX vectors that are stored in double precision need 32-byte alignment.
+ CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
+
// Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>,
CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
// put vector arguments in vector registers before putting them on the stack.
let Entry = 1 in
def CC_PPC32_SVR4 : CallingConv<[
+ // QPX vectors mirror the scalar FP convention.
+ CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
+ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
+
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
- if (Subtarget->hasSPE())
+ if (Subtarget->hasQPX() || Subtarget->hasSPE())
return false;
EVT VecVT = LHS.getValueType();
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+ case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
SelectCCOp = PPC::SELECT_CC_F16;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ SelectCCOp = PPC::SELECT_CC_QFRC;
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ SelectCCOp = PPC::SELECT_CC_QSRC;
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
}
}
+ if (Subtarget.hasQPX()) {
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f64, Custom);
+
+ setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
+
+ addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f32, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
+
+ setOperationAction(ISD::AND , MVT::v4i1, Legal);
+ setOperationAction(ISD::OR , MVT::v4i1, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i1, Legal);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
+
+ setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
+ setOperationAction(ISD::STORE , MVT::v4i1, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+
+ addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+
+ // These need to set FE_INEXACT, and so cannot be vectorized here.
+ setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ } else {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ }
+
+ // TODO: Handle constrained floating-point operations of v4f64
+ }
+
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
- if (Subtarget.hasAltivec())
- getMaxByValAlign(Ty, Alignment, Align(16));
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Alignment, Subtarget.hasQPX() ? Align(32) : Align(16));
return Alignment.value();
}
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
case PPCISD::VABSD: return "PPCISD::VABSD";
+ case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
+ case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
+ case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
+ case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
+ case PPCISD::QBFLT: return "PPCISD::QBFLT";
+ case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+ if (Subtarget.hasQPX())
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+
return VT.changeVectorElementTypeToInteger();
}
return false;
}
- // PowerPC doesn't have preinc load/store instructions for vectors
- if (VT.isVector())
- return false;
+ // PowerPC doesn't have preinc load/store instructions for vectors (except
+ // for QPX, which does have preinc r+r forms).
+ if (VT.isVector()) {
+ if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
+ return false;
+ } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
+ }
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
// Common code will reject creating a pre-inc form if the base pointer
PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
PPC::F11, PPC::F12, PPC::F13};
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
+
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
Alignment = Align(16);
+ // QPX vector types stored in double-precision are padded to a 32 byte
+ // boundary.
+ else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
+ Alignment = Align(32);
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
/// stack slot (instead of being passed in registers). ArgOffset,
/// AvailableFPRs, and AvailableVRs must hold the current argument
/// position, and will be updated to account for this argument.
-static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
- unsigned PtrByteSize, unsigned LinkageSize,
- unsigned ParamAreaSize, unsigned &ArgOffset,
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize,
+ unsigned LinkageSize,
+ unsigned ParamAreaSize,
+ unsigned &ArgOffset,
unsigned &AvailableFPRs,
- unsigned &AvailableVRs) {
+ unsigned &AvailableVRs, bool HasQPX) {
bool UseMemory = false;
// Respect alignment of argument on the stack.
// However, if the argument is actually passed in an FPR or a VR,
// we don't use memory after all.
if (!Flags.isByVal()) {
- if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
+ // QPX registers overlap with the scalar FP registers.
+ (HasQPX && (ArgVT == MVT::v4f32 ||
+ ArgVT == MVT::v4f64 ||
+ ArgVT == MVT::v4i1)))
if (AvailableFPRs > 0) {
--AvailableFPRs;
return false;
RC = &PPC::VRRCRegClass;
break;
case MVT::v4f32:
- RC = &PPC::VRRCRegClass;
+ RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
break;
case MVT::v2f64:
case MVT::v2i64:
RC = &PPC::VRRCRegClass;
break;
+ case MVT::v4f64:
+ RC = &PPC::QFRCRegClass;
+ break;
+ case MVT::v4i1:
+ RC = &PPC::QBRCRegClass;
+ break;
}
SDValue ArgValue;
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
+ const unsigned Num_QFPR_Regs = Num_FPR_Regs;
// Do a first pass over the arguments to determine whether the ABI
// guarantees that our caller has allocated the parameter save area
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs))
+ NumBytes, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
HasParameterArea = true;
}
unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
case MVT::v2i64:
case MVT::v1i128:
case MVT::f128:
- // These can be scalar arguments or elements of a vector array type
- // passed directly. The latter are used to implement ELFv2 homogenous
- // vector aggregates.
- if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ if (!Subtarget.hasQPX()) {
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++VR_idx;
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+ needsLoad = true;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 16;
+ break;
+ } // not QPX
+
+ assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+ LLVM_FALLTHROUGH;
+
+ case MVT::v4f64:
+ case MVT::v4i1:
+ // QPX vectors are treated like their scalar floating-point subregisters
+ // (except that they're larger).
+ unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
+ if (QFPR_idx != Num_QFPR_Regs) {
+ const TargetRegisterClass *RC;
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
+ case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
+ default: RC = &PPC::QBRCRegClass; break;
+ }
+
+ unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++VR_idx;
+ ++QFPR_idx;
} else {
if (CallConv == CallingConv::Fast)
ComputeArgOffset();
needsLoad = true;
}
if (CallConv != CallingConv::Fast || needsLoad)
- ArgOffset += 16;
+ ArgOffset += Sz;
break;
}
for (const ISD::OutputArg& Param : Outs) {
if (Param.Flags.isNest()) continue;
- if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
- LinkageSize, ParamAreaSize, NumBytes,
- AvailableFPRs, AvailableVRs))
+ if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
return true;
}
return false;
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
+ const unsigned NumQFPRs = NumFPRs;
// On ELFv2, we can avoid allocating the parameter area if all the arguments
// can be passed to the callee in registers.
for (unsigned i = 0; i != NumOps; ++i) {
if (Outs[i].Flags.isNest()) continue;
if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
- PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytesTmp, AvailableFPRs, AvailableVRs))
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
HasParameterArea = true;
}
}
continue;
break;
case MVT::v4f32:
- if (++NumVRsUsed <= NumVRs)
- continue;
+ // When using QPX, this is handled like a FP register, otherwise, it
+ // is an Altivec register.
+ if (Subtarget.hasQPX()) {
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ } else {
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ }
break;
case MVT::f32:
case MVT::f64:
+ case MVT::v4f64: // QPX
+ case MVT::v4i1: // QPX
if (++NumFPRsUsed <= NumFPRs)
continue;
break;
case MVT::v2i64:
case MVT::v1i128:
case MVT::f128:
+ if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
if (!IsFastCall)
ArgOffset += 16;
break;
+ } // not QPX
+
+ assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+
+ LLVM_FALLTHROUGH;
+ case MVT::v4f64:
+ case MVT::v4i1: {
+ bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
+ if (CFlags.IsVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ SDValue Store =
+ DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
+ MemOpChains.push_back(Store);
+ if (QFPR_idx != NumQFPRs) {
+ SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
+ PtrOff, MachinePointerInfo());
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
+ }
+ ArgOffset += (IsF32 ? 16 : 32);
+ for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, dl, PtrVT));
+ SDValue Load =
+ DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs QPX params go into registers or on the stack.
+ if (QFPR_idx != NumQFPRs) {
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
+ } else {
+ if (IsFastCall)
+ ComputePtrOff();
+
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, CFlags.IsTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ if (IsFastCall)
+ ArgOffset += (IsF32 ? 16 : 32);
+ }
+
+ if (!IsFastCall)
+ ArgOffset += (IsF32 ? 16 : 32);
+ break;
+ }
}
}
const PPCSubtarget &Subtarget =
static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX support is not supported on AIX.");
const bool IsPPC64 = Subtarget.isPPC64();
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
const PPCSubtarget& Subtarget =
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX is not supported on AIX.");
if (Subtarget.hasAltivec())
report_fatal_error("Altivec support is unimplemented on AIX.");
}
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return LowerVectorLoad(Op, DAG);
assert(Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 loads");
}
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(1).getValueType().isVector())
+ return LowerVectorStore(Op, DAG);
+
assert(Op.getOperand(1).getValueType() == MVT::i1 &&
"Custom lowering only for i1 stores");
if (Op.getValueType() == MVT::f128)
return Op;
+ if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
+ if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
+ return SDValue();
+
+ SDValue Value = Op.getOperand(0);
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ if (Op.getValueType() != MVT::v4f64)
+ Value = DAG.getNode(ISD::FP_ROUND, dl,
+ Op.getValueType(), Value,
+ DAG.getIntPtrConstant(1, dl));
+ return Value;
+ }
+
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
+ // We first build an i32 vector, load it into a QPX register,
+ // then convert it to a floating-point vector and compare it
+ // to a zero vector to get the boolean result.
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ assert(BVN->getNumOperands() == 4 &&
+ "BUILD_VECTOR for v4i1 does not have 4 operands");
+
+ bool IsConst = true;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).isUndef()) continue;
+ if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
+ IsConst = false;
+ break;
+ }
+ }
+
+ if (IsConst) {
+ Constant *One =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
+ Constant *NegOne =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
+
+ Constant *CV[4];
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).isUndef())
+ CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
+ else if (isNullConstant(BVN->getOperand(i)))
+ CV[i] = NegOne;
+ else
+ CV[i] = One;
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
+
+ SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
+ SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
+ return DAG.getMemIntrinsicNode(
+ PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).isUndef()) continue;
+
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
+ if (StoreSize > 4) {
+ Stores.push_back(
+ DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset), MVT::i32));
+ } else {
+ SDValue StoreValue = BVN->getOperand(i);
+ if (StoreSize < 4)
+ StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
+ PtrInfo.getWithOffset(Offset)));
+ }
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty())
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Now load from v4i32 into the QPX register; this will extend it to
+ // v4i64 but not yet convert it to a floating point. Nevertheless, this
+ // is typed as v4f64 because the QPX register integer states are not
+ // explicitly represented.
+
+ SDValue Ops[] = {StoreChain,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
+ FIdx};
+ SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
+
+ SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+ LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
+ LoadedVect);
+
+ SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
+
+ return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
+ }
+
+ // All other QPX vectors are handled by generic code.
+ if (Subtarget.hasQPX())
+ return SDValue();
+
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
}
}
+ if (Subtarget.hasQPX()) {
+ if (VT.getVectorNumElements() != 4)
+ return SDValue();
+
+ if (V2.isUndef()) V2 = V1;
+
+ int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
+ if (AlignIdx != -1) {
+ return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
+ DAG.getConstant(AlignIdx, dl, MVT::i32));
+ } else if (SVOp->isSplat()) {
+ int SplatIdx = SVOp->getSplatIndex();
+ if (SplatIdx >= 4) {
+ std::swap(V1, V2);
+ SplatIdx -= 4;
+ }
+
+ return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
+ DAG.getConstant(SplatIdx, dl, MVT::i32));
+ }
+
+ // Lower this into a qvgpci/qvfperm pair.
+
+ // Compute the qvgpci literal
+ unsigned idx = 0;
+ for (unsigned i = 0; i < 4; ++i) {
+ int m = SVOp->getMaskElt(i);
+ unsigned mm = m >= 0 ? (unsigned) m : i;
+ idx |= mm << (3-i)*3;
+ }
+
+ SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
+ DAG.getConstant(idx, dl, MVT::i32));
+ return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
+ }
+
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
return Op;
}
+SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDNode *N = Op.getNode();
+
+ assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
+ "Unknown extract_vector_elt type");
+
+ SDValue Value = N->getOperand(0);
+
+ // The first part of this is like the store lowering except that we don't
+ // need to track the chain.
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+ Value);
+
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue StoreChain = DAG.getEntryNode();
+ SDValue Ops[] = {StoreChain,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
+ Value, FIdx};
+ SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Extract the value requested.
+ unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ SDValue IntVal =
+ DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
+
+ if (!Subtarget.useCRBits())
+ return IntVal;
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
+}
+
+/// Lowering for QPX v4i1 loads
+SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ SDValue LoadChain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+
+ if (Op.getValueType() == MVT::v4f64 ||
+ Op.getValueType() == MVT::v4f32) {
+ EVT MemVT = LN->getMemoryVT();
+ unsigned Alignment = LN->getAlignment();
+
+ // If this load is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Op.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SDValue Vals[4], LoadChains[4];
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Load;
+ if (ScalarVT != ScalarMemVT)
+ Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
+ BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx * Stride),
+ ScalarMemVT, MinAlign(Alignment, Idx * Stride),
+ LN->getMemOperand()->getFlags(), LN->getAAInfo());
+ else
+ Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx * Stride),
+ MinAlign(Alignment, Idx * Stride),
+ LN->getMemOperand()->getFlags(), LN->getAAInfo());
+
+ if (Idx == 0 && LN->isIndexed()) {
+ assert(LN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector load");
+ Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
+ LN->getAddressingMode());
+ }
+
+ Vals[Idx] = Load;
+ LoadChains[Idx] = Load.getValue(1);
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, dl,
+ BasePtr.getValueType()));
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
+
+ if (LN->isIndexed()) {
+ SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ SDValue RetOps[] = { Value, TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
+ assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
+
+ // To lower v4i1 from a byte array, we load the byte elements of the
+ // vector and then reuse the BUILD_VECTOR logic.
+
+ SDValue VectElmts[4], VectElmtChains[4];
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ VectElmts[i] = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
+ LN->getPointerInfo().getWithOffset(i), MVT::i8,
+ /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
+ VectElmtChains[i] = VectElmts[i].getValue(1);
+ }
+
+ LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
+ SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
+
+ SDValue RVals[] = { Value, LoadChain };
+ return DAG.getMergeValues(RVals, dl);
+}
+
+/// Lowering for QPX v4i1 stores
+SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue StoreChain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ SDValue Value = SN->getValue();
+
+ if (Value.getValueType() == MVT::v4f64 ||
+ Value.getValueType() == MVT::v4f32) {
+ EVT MemVT = SN->getMemoryVT();
+ unsigned Alignment = SN->getAlignment();
+
+ // If this store is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Value.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SDValue Stores[4];
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getVectorIdxConstant(Idx, dl));
+ SDValue Store;
+ if (ScalarVT != ScalarMemVT)
+ Store =
+ DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx * Stride),
+ ScalarMemVT, MinAlign(Alignment, Idx * Stride),
+ SN->getMemOperand()->getFlags(), SN->getAAInfo());
+ else
+ Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx * Stride),
+ MinAlign(Alignment, Idx * Stride),
+ SN->getMemOperand()->getFlags(), SN->getAAInfo());
+
+ if (Idx == 0 && SN->isIndexed()) {
+ assert(SN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector store");
+ Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
+ SN->getAddressingMode());
+ }
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, dl,
+ BasePtr.getValueType()));
+ Stores[Idx] = Store;
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ if (SN->isIndexed()) {
+ SDValue RetOps[] = { TF, Stores[0].getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ return TF;
+ }
+
+ assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
+ assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+ Value);
+
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ops[] = {StoreChain,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
+ Value, FIdx};
+ SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Move data into the byte array.
+ SDValue Loads[4], LoadChains[4];
+ for (unsigned i = 0; i < 4; ++i) {
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset));
+ LoadChains[i] = Loads[i].getValue(1);
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+
+ SDValue Stores[4];
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ Stores[i] = DAG.getTruncStore(
+ StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
+ MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
+ SN->getAAInfo());
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ return StoreChain;
+}
+
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (Op.getValueType() == MVT::v4i32) {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);
} else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
MI.getOpcode() == PPC::SELECT_CC_F16 ||
+ MI.getOpcode() == PPC::SELECT_CC_QFRC ||
+ MI.getOpcode() == PPC::SELECT_CC_QSRC ||
+ MI.getOpcode() == PPC::SELECT_CC_QBRC ||
MI.getOpcode() == PPC::SELECT_CC_VRRC ||
MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
+ MI.getOpcode() == PPC::SELECT_QFRC ||
+ MI.getOpcode() == PPC::SELECT_QSRC ||
+ MI.getOpcode() == PPC::SELECT_QBRC ||
MI.getOpcode() == PPC::SELECT_SPE ||
MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_VRRC ||
MI.getOpcode() == PPC::SELECT_F16 ||
MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_SPE ||
+ MI.getOpcode() == PPC::SELECT_QFRC ||
+ MI.getOpcode() == PPC::SELECT_QSRC ||
+ MI.getOpcode() == PPC::SELECT_QBRC ||
MI.getOpcode() == PPC::SELECT_VRRC ||
MI.getOpcode() == PPC::SELECT_VSFRC ||
MI.getOpcode() == PPC::SELECT_VSSRC ||
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_vsx_lxvw4x:
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfiw:
+ case Intrinsic::ppc_qpx_qvstfiwa:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_vsx_stxvw4x:
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
+ Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
+ Align ScalarABIAlignment = DAG.getDataLayout().getABITypeAlign(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
!Subtarget.hasP8Vector() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v4f32))) &&
+ VT == MVT::v4f32)) ||
+ (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
+ LD->getAlign() >= ScalarABIAlignment)) &&
LD->getAlign() < ABIAlignment) {
- // This is a type-legal unaligned Altivec load.
+ // This is a type-legal unaligned Altivec or QPX load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
bool isLittleEndian = Subtarget.isLittleEndian();
// optimization later.
Intrinsic::ID Intr, IntrLD, IntrPerm;
MVT PermCntlTy, PermTy, LDTy;
- Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
- : Intrinsic::ppc_altivec_lvsl;
- IntrLD = Intrinsic::ppc_altivec_lvx;
- IntrPerm = Intrinsic::ppc_altivec_vperm;
- PermCntlTy = MVT::v16i8;
- PermTy = MVT::v4i32;
- LDTy = MVT::v4i32;
+ if (Subtarget.hasAltivec()) {
+ Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl;
+ IntrLD = Intrinsic::ppc_altivec_lvx;
+ IntrPerm = Intrinsic::ppc_altivec_vperm;
+ PermCntlTy = MVT::v16i8;
+ PermTy = MVT::v4i32;
+ LDTy = MVT::v4i32;
+ } else {
+ Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
+ Intrinsic::ppc_qpx_qvlpcls;
+ IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
+ Intrinsic::ppc_qpx_qvlfs;
+ IntrPerm = Intrinsic::ppc_qpx_qvfperm;
+ PermCntlTy = MVT::v4f64;
+ PermTy = MVT::v4f64;
+ LDTy = MemVT.getSimpleVT();
+ }
SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != PermTy)
- Perm = Subtarget.hasAltivec()
- ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
- : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
- DAG.getTargetConstant(1, dl, MVT::i64));
+ Perm = Subtarget.hasAltivec() ?
+ DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
+ DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
+ DAG.getTargetConstant(1, dl, MVT::i64));
// second argument is 1 because this rounding
// is always exact.
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
: Intrinsic::ppc_altivec_lvsl);
- if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
+ if ((IID == Intr ||
+ IID == Intrinsic::ppc_qpx_qvlpcld ||
+ IID == Intrinsic::ppc_qpx_qvlpcls) &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
- int Bits = 4 /* 16 byte alignment */;
+ int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
+ 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
if (DAG.MaskedValueIsZero(Add->getOperand(1),
APInt::getAllOnesValue(Bits /* alignment */)
UE = BasePtr->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- IID) {
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
// We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
return std::make_pair(0U, &PPC::F4RCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
}
break;
case 'v':
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
if (Subtarget.hasAltivec())
return std::make_pair(0U, &PPC::VRRCRegClass);
break;
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
case Intrinsic::ppc_vsx_lxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ VT = MVT::v2f32;
+ break;
default:
VT = MVT::v4i32;
break;
Info.flags = MachineMemOperand::MOLoad;
return true;
}
+ case Intrinsic::ppc_qpx_qvlfda:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ case Intrinsic::ppc_qpx_qvlfiwaa:
+ case Intrinsic::ppc_qpx_qvlfiwza: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = Align(1);
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfiw:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_altivec_stvebx:
case Intrinsic::ppc_vsx_stxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ VT = MVT::v2f32;
+ break;
default:
VT = MVT::v4i32;
break;
Info.flags = MachineMemOperand::MOStore;
return true;
}
+ case Intrinsic::ppc_qpx_qvstfda:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ case Intrinsic::ppc_qpx_qvstfiwa: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = Align(1);
+ Info.flags = MachineMemOperand::MOStore;
+ return true;
+ }
default:
break;
}
EVT PPCTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Op.size() >= 32 &&
+ (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
+ !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
if (Subtarget.hasAltivec() && Op.size() >= 16 &&
if (VT == MVT::v2i64)
return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
- if (Subtarget.hasVSX())
+ if (Subtarget.hasVSX() || Subtarget.hasQPX())
return true;
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
switch (Opc) {
case PPCISD::FNMSUB:
- if (!Op.hasOneUse() || !isTypeLegal(VT))
+ // TODO: QPX subtarget is deprecated. No transformation here.
+ if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
break;
const TargetOptions &Options = getTargetMachine().Options;
bool LegalOps = !DCI.isBeforeLegalizeOps();
SDLoc Loc(N);
- if (!isOperationLegal(ISD::FMA, VT))
+ // TODO: QPX subtarget is deprecated. No transformation here.
+ if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
return SDValue();
// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
/// => VABSDUW((XVNEGSP a), (XVNEGSP b))
VABSD,
+ /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+ QVFPERM,
+
+ /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+ QVGPCI,
+
+ /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+ QVALIGNI,
+
+ /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+ QVESPLATI,
+
+ /// QBFLT = Access the underlying QPX floating-point boolean
+ /// representation.
+ QBFLT,
+
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
+ /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+ /// The 4xf32 load used for v4i1 constants.
+ QVLFSb,
+
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
/// except they ensure that the compare input is zero-extended for
/// sub-word versions because the atomic loads zero-extend.
let FRA = 0;
}
+// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
let Inst{31} = 0;
}
+// Used for QPX
+class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+ let FRC = 0;
+}
+
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
let Inst{23-31} = xo;
}
+// Z23-Form (used by QPX)
+class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<2> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
+class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<12> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6-10} = FRT;
+ let Inst{11-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
case PPC::XVMULDP:
case PPC::XVMULSP:
case PPC::XSMULSP:
+ // QPX Add:
+ case PPC::QVFADD:
+ case PPC::QVFADDS:
+ case PPC::QVFADDSs:
+ // QPX Multiply:
+ case PPC::QVFMUL:
+ case PPC::QVFMULS:
+ case PPC::QVFMULSs:
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
// Fixed point:
// Multiply:
case PPC::MULHD:
{PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
{PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
{PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
- {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}};
+ {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
+ {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
+ {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
// Check if an opcode is a FMA instruction. If it is, return the index in array
// FMAOpIdxInfo. Otherwise, return -1.
case PPC::LI8:
case PPC::LIS:
case PPC::LIS8:
+ case PPC::QVGPCI:
case PPC::ADDIStocHA:
case PPC::ADDIStocHA8:
case PPC::ADDItocL:
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
+ else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMR;
+ else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRs;
+ else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
OpcodeIndex = SOK_VectorFloat4Spill;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_VRSaveSpill;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadFloat8Spill;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadFloat4Spill;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadBitSpill;
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SpillToVSR;
} else {
SOK_VectorFloat8Spill,
SOK_VectorFloat4Spill,
SOK_VRSaveSpill,
+ SOK_QuadFloat8Spill,
+ SOK_QuadFloat4Spill,
+ SOK_QuadBitSpill,
SOK_SpillToVSR,
SOK_SPESpill,
SOK_LastOpcodeSpill // This must be last on the enum.
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
- PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD, PPC::EVLDD \
+ PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
+ PPC::SPILLTOVSR_LD, PPC::EVLDD \
}
#define Pwr9LoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
- PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD \
+ PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
+ PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
}
#define Pwr8StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
- PPC::SPILLTOVSR_ST, PPC::EVSTDD \
+ PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
+ PPC::EVSTDD \
}
#define Pwr9StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
- PPC::SPILL_VRSAVE, PPC::SPILLTOVSR_ST \
+ PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
+ PPC::SPILLTOVSR_ST \
}
// Initialize arrays for load and store spill opcodes on supported subtargets.
}
static bool isSameClassPhysRegCopy(unsigned Opcode) {
- unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
- PPC::VOR, PPC::XXLOR, PPC::XXLORf,
- PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR,
- PPC::EVOR, -1U};
+ unsigned CopyOpcodes[] =
+ { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+ PPC::CROR, PPC::EVOR, -1U };
for (int i = 0; CopyOpcodes[i] != -1U; i++)
if (Opcode == CopyOpcodes[i])
return true;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
+def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
+def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
+def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
+def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
+
+def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
+
+def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
+ [SDNPHasChain, SDNPMayLoad]>;
+
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
+include "PPCInstrQPX.td"
include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
--- /dev/null
+//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the QPX extension to the PowerPC instruction set.
+// Reference:
+// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegQFRCAsmOperand : AsmOperandClass {
+ let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
+}
+def qfrc : RegisterOperand<QFRC> {
+ let ParserMatchClass = PPCRegQFRCAsmOperand;
+}
+def PPCRegQSRCAsmOperand : AsmOperandClass {
+ let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
+}
+def qsrc : RegisterOperand<QSRC> {
+ let ParserMatchClass = PPCRegQSRCAsmOperand;
+}
+def PPCRegQBRCAsmOperand : AsmOperandClass {
+ let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
+}
+def qbrc : RegisterOperand<QBRC> {
+ let ParserMatchClass = PPCRegQBRCAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// QPXA1_Int - A AForm_1 intrinsic definition.
+class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
+class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA2_Int - A AForm_2 intrinsic definition.
+class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXA3_Int - A AForm_3 intrinsic definition.
+class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
+// QPXA4_Int - A AForm_4a intrinsic definition.
+class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+// QPXX18_Int - A XForm_18 intrinsic definition.
+class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXX19_Int - A XForm_19 intrinsic definition.
+class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Frags.
+
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncst node:$val,
+ node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def fround_inexact : PatFrag<(ops node:$val), (fpround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
+}]>;
+
+def fround_exact : PatFrag<(ops node:$val), (fpround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
+}]>;
+
+let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
+ def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasQPX : Predicate<"Subtarget->hasQPX()">;
+let Predicates = [HasQPX] in {
+let DecoderNamespace = "QPX" in {
+let hasSideEffects = 0 in { // QPX instructions don't have side effects.
+let Uses = [RM] in {
+ // Add Instructions
+ let isCommutable = 1 in {
+ def QVFADD : AForm_2<4, 21,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
+ def QVFADDSs : AForm_2<0, 21,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
+ }
+ def QVFSUB : AForm_2<4, 20,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
+ def QVFSUBSs : AForm_2<0, 20,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
+
+ // Estimate Instructions
+ def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfre $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
+ def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
+ let isCodeGenOnly = 1 in
+ def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfres $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
+
+ def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
+ def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
+ let isCodeGenOnly = 1 in
+ def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
+
+ // Multiply Instructions
+ let isCommutable = 1 in {
+ def QVFMUL : AForm_3<4, 25,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
+ def QVFMULSs : AForm_3<0, 25,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
+ "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
+ }
+ def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
+ def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
+
+ // Multiply-add instructions
+ def QVFMADD : AForm_1<4, 29,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
+ "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
+ def QVFMADDSs : AForm_1<0, 29,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
+ "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
+ def QVFNMADD : AForm_1<4, 31,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
+ "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
+ def QVFNMADDSs : AForm_1<0, 31,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
+ "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ v4f32:$FRB)))]>;
+ def QVFMSUB : AForm_1<4, 28,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
+ "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
+ def QVFMSUBSs : AForm_1<0, 28,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
+ "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB)))]>;
+ def QVFNMSUB : AForm_1<4, 30,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
+ "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB))))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
+ def QVFNMSUBSs : AForm_1<0, 30,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
+ "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB))))]>;
+ def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
+ def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
+ def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
+ def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
+ def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
+ def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
+ def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
+ def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
+
+ // Select Instruction
+ let isCodeGenOnly = 1 in
+ def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
+ def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
+ (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (vselect v4i1:$FRA,
+ v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
+ (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (vselect v4i1:$FRA,
+ v4f32:$FRC, v4f32:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
+ (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4i1:$FRT, (vselect v4i1:$FRA,
+ v4i1:$FRC, v4i1:$FRB))]>;
+
+ // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+ // instruction selection into a branch sequence.
+ def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
+
+ // Convert and Round Instructions
+ def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
+ let isCodeGenOnly = 1 in
+ def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
+ def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
+ def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
+ def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
+ def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
+ def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
+ def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
+ def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
+ let isCodeGenOnly = 1 in
+ def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
+ def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
+ def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
+
+ let isCodeGenOnly = 1 in
+ def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
+ def QVFRSPs : XForm_19<4, 12,
+ (outs qsrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsp $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
+
+ def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
+
+ def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fround v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fround v4f32:$FRB))]>;
+
+ def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
+
+ def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
+
+ // Move Instructions
+ def QVFMR : XForm_19<4, 72,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
+ let isCodeGenOnly = 1 in {
+ def QVFMRs : XForm_19<4, 72,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
+ def QVFMRb : XForm_19<4, 72,
+ (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
+ }
+ def QVFNEG : XForm_19<4, 40,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNEGs : XForm_19<4, 40,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
+ def QVFABS : XForm_19<4, 264,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFABSs : XForm_19<4, 264,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
+ def QVFNABS : XForm_19<4, 136,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNABSs : XForm_19<4, 136,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
+ def QVFCPSGN : XForm_18<4, 8,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCPSGNs : XForm_18<4, 8,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
+
+ def QVALIGNI : Z23Form_1<4, 5,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIs : Z23Form_1<4, 5,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIb : Z23Form_1<4, 5,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
+ (i32 imm:$idx)))]>;
+
+ def QVESPLATI : Z23Form_2<4, 37,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIs : Z23Form_2<4, 37,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIb : Z23Form_2<4, 37,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
+
+ def QVFPERM : AForm_1<4, 6,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFPERMs : AForm_1<4, 6,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
+
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+ def QVGPCI : Z23Form_3<4, 133,
+ (outs qfrc:$FRT), (ins u12imm:$idx),
+ "qvgpci $FRT, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
+
+ // Compare Instruction
+ let isCodeGenOnly = 1 in
+ def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
+ def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
+ def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
+ def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
+ def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVFLOGICAL : XForm_20<4, 4,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ def QVFLOGICALb : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ let isCodeGenOnly = 1 in
+ def QVFLOGICALs : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+
+ // Load indexed instructions
+ let mayLoad = 1 in {
+ def QVLFDX : XForm_1_memOp<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (load xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFDXb : XForm_1_memOp<31, 583,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFDXA : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFDUX : XForm_1<31, 615,
+ (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+ let RC = 1 in
+ def QVLFDUXA : XForm_1<31, 615,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSX : XForm_1_memOp<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVLFSXb : XForm_1<31, 519,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFSXs : XForm_1_memOp<31, 519,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f32:$FRT, (load xoaddr:$src))]>;
+
+ let RC = 1 in
+ def QVLFSXA : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSUX : XForm_1<31, 551,
+ (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+ let RC = 1 in
+ def QVLFSUXA : XForm_1<31, 551,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDX : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDXA : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDUX : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDUXA : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSX : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLFCSXs : XForm_1<31, 7,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFCSXA : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSUX : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCSUXA : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWAX : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWAXA : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWZX : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWZXA : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
+ }
+
+
+ def QVLPCLDX : XForm_1<31, 582,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCLSX : XForm_1<31, 518,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLPCLSXint : XForm_11<31, 518,
+ (outs qfrc:$FRT), (ins G8RC:$src),
+ "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
+ def QVLPCRDX : XForm_1<31, 70,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCRSX : XForm_1<31, 6,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def QVSTFDX : XForm_8_memOp<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD,
+ [(store qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFDXb : XForm_8_memOp<31, 711,
+ (outs), (ins qbrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFDXA : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFDUXA : XForm_8<31, 743,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDXI : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDXIA : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUXI : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDUXIA : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSX : XForm_8_memOp<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFSXs : XForm_8_memOp<31, 647,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(store qsrc:$FRT, xoaddr:$dst)]>;
+
+ let RC = 1 in
+ def QVSTFSXA : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+ let isCodeGenOnly = 1 in
+ def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFSUXA : XForm_8<31, 679,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSXI : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSXIA : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUXI : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSUXIA : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDX : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXA : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSX : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVSTFCSXs : XForm_8<31, 135,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFCSXA : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUX : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXA : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUX : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXA : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDXI : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXIA : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSXI : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSXIA : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUXI : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXIA : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUXI : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXIA : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFIWX : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFIWXA : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
+ }
+}
+
+} // neverHasSideEffects
+}
+
+def : InstAlias<"qvfclr $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
+def : InstAlias<"qvfand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
+def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
+def : InstAlias<"qvfctfb $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
+def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
+def : InstAlias<"qvfor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
+def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
+def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
+def : InstAlias<"qvfnot $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
+def : InstAlias<"qvforc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
+def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
+def : InstAlias<"qvfset $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
+
+//===----------------------------------------------------------------------===//
+// Additional QPX Patterns
+//
+
+def : Pat<(v4f64 (scalar_to_vector f64:$A)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (extractelt v4f64:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+
+def : Pat<(f64 (extractelt v4f64:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
+def : Pat<(f64 (extractelt v4f64:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
+def : Pat<(f64 (extractelt v4f64:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
+
+def : Pat<(f32 (extractelt v4f32:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
+
+def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERM $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERMs $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+
+def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFPERM $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
+ (QVFCPSGN $A, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
+ (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
+def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
+ (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
+
+def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
+def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
+def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
+
+def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
+def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
+def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
+def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
+
+def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
+def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
+
+def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
+ (QVFADD $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
+ (QVFSUB $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
+ (QVFMUL $A, $B)>;
+
+// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
+ (QVFNMSUB $A, $C, $B)>;
+def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
+ (QVFNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (QVFNMSUBSs $A, $C, $B)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (QVFNMSUBSs $A, $C, $B)>;
+
+def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMSUB $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMSUB $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
+ (QVLFDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
+ (QVLFSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
+ (QVLFCDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
+ (QVLFCDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
+ (QVLFCSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
+ (QVLFCSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
+ (QVLFIWAXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
+ (QVLFIWAX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
+ (QVLFIWZXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
+ (QVLFIWZX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
+ (QVLPCLDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
+ (QVLPCLSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
+ (QVLPCRDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
+ (QVLPCRSX xoaddr:$src)>;
+
+def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
+ (QVSTFDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
+ (QVSTFSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
+ (QVSTFCDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
+ (QVSTFCDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
+ (QVSTFCSXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
+ (QVSTFCSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
+ (QVSTFDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
+ (QVSTFIWXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
+ (QVSTFIWX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
+ (QVSTFSXA $T, xoaddr:$dst)>;
+
+def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFDUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
+
+def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)),
+ (QVFLOGICAL $A, $B, imm:$idx)>;
+def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
+ (QVGPCI imm:$idx)>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
+ (QVFCMPEQb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
+ (QVFCMPGTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
+ (QVFCMPLTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 10))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
+ (QVFCMPEQbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
+ (QVFCMPGTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
+ (QVFCMPLTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
+
+def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 4))>;
+def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 8))>;
+def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 9))>;
+def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 13))>;
+def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 14))>;
+
+def : Pat<(and v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 1))>;
+def : Pat<(or v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 7))>;
+def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 6))>;
+def : Pat<(not v4i1:$FRA),
+ (QVFLOGICALb $FRA, $FRA, (i32 10))>;
+
+def : Pat<(v4f64 (fpextend v4f32:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f32 (fround_exact v4f64:$src)),
+ (COPY_TO_REGCLASS $src, QSRC)>;
+
+// Extract the underlying floating-point values from the
+// QPX (-1.0, 1.0) boolean representation.
+def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+ (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),
+ (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+ (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),
+ (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
+ (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+ (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),
+ (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+ (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),
+ (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
+ (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+ (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),
+ (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+ (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),
+ (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
+ (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+ (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),
+ (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+ (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),
+ (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
+ (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+ (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),
+ (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+ (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),
+ (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
+ (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+ (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),
+ (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+ (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),
+ (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
+ (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+} // end HasQPX
+
+let Predicates = [HasQPX, NoNaNsFPMath] in {
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
+}
+
+let Predicates = [HasQPX, NaNsFPMath] in {
+// When either of these operands is NaN, we should return the other operand.
+// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
+// to explicitly or with a NaN test on the second operand.
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+}
--- /dev/null
+//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The QPX vector registers overlay the scalar floating-point registers, and
+// any scalar floating-point loads splat their value across all vector lanes.
+// Thus, if we have a scalar load followed by a splat, we can remove the splat
+// (i.e. replace the load with a load-and-splat pseudo instruction).
+//
+// This pass must run after anything that might do store-to-load forwarding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-qpx-load-splat"
+
+STATISTIC(NumSimplified, "Number of QPX load splats simplified");
+
+namespace {
+ struct PPCQPXLoadSplat : public MachineFunctionPass {
+ static char ID;
+ PPCQPXLoadSplat() : MachineFunctionPass(ID) {
+ initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return "PowerPC QPX Load Splat Simplification";
+ }
+ };
+ char PPCQPXLoadSplat::ID = 0;
+}
+
+INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
+ "PowerPC QPX Load Splat Simplification",
+ false, false)
+
+FunctionPass *llvm::createPPCQPXLoadSplatPass() {
+ return new PPCQPXLoadSplat();
+}
+
+bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ bool MadeChange = false;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
+ MachineBasicBlock *MBB = &*MFI;
+ SmallVector<MachineInstr *, 4> Splats;
+
+ for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
+ MachineInstr *MI = &*MBBI;
+
+ if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
+ Splats.clear();
+ continue;
+ }
+
+ // We're looking for a sequence like this:
+ // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
+ // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
+
+ for (auto SI = Splats.begin(); SI != Splats.end();) {
+ MachineInstr *SMI = *SI;
+ Register SplatReg = SMI->getOperand(0).getReg();
+ Register SrcReg = SMI->getOperand(1).getReg();
+
+ if (MI->modifiesRegister(SrcReg, TRI)) {
+ switch (MI->getOpcode()) {
+ default:
+ SI = Splats.erase(SI);
+ continue;
+ case PPC::LFS:
+ case PPC::LFD:
+ case PPC::LFSU:
+ case PPC::LFDU:
+ case PPC::LFSUX:
+ case PPC::LFDUX:
+ case PPC::LFSX:
+ case PPC::LFDX:
+ case PPC::LFIWAX:
+ case PPC::LFIWZX:
+ if (SplatReg != SrcReg) {
+ // We need to change the load to define the scalar subregister of
+ // the QPX splat source register.
+ unsigned SubRegIndex =
+ TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
+ Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
+
+ // Substitute both the explicit defined register, and also the
+ // implicit def of the containing QPX register.
+ MI->getOperand(0).setReg(SplatSubReg);
+ MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
+ }
+
+ SI = Splats.erase(SI);
+
+ // If SMI is directly after MI, then MBBI's base iterator is
+ // pointing at SMI. Adjust MBBI around the call to erase SMI to
+ // avoid invalidating MBBI.
+ ++MBBI;
+ SMI->eraseFromParent();
+ --MBBI;
+
+ ++NumSimplified;
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // If this instruction defines the splat register, then we cannot move
+ // the previous definition above it. If it reads from the splat
+ // register, then it must already be alive from some previous
+ // definition, and if the splat register is different from the source
+ // register, then this definition must not be the load for which we're
+ // searching.
+ if (MI->modifiesRegister(SplatReg, TRI) ||
+ (SrcReg != SplatReg &&
+ MI->readsRegister(SplatReg, TRI))) {
+ SI = Splats.erase(SI);
+ continue;
+ }
+
+ ++SI;
+ }
+
+ if (MI->getOpcode() != PPC::QVESPLATI &&
+ MI->getOpcode() != PPC::QVESPLATIs &&
+ MI->getOpcode() != PPC::QVESPLATIb)
+ continue;
+ if (MI->getOperand(2).getImm() != 0)
+ continue;
+
+ // If there are other uses of the scalar value after this, replacing
+ // those uses might be non-trivial.
+ if (!MI->getOperand(1).isKill())
+ continue;
+
+ Splats.push_back(MI);
+ }
+ }
+
+ return MadeChange;
+}
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
+ case PPC::QFRCRegClassID:
+ case PPC::QSRCRegClassID:
+ case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
switch (RegName[0]) {
case 'r':
case 'f':
+ case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;
let HWEncoding{4-0} = num;
}
+// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
+class QFPR<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// QPX Floating-point registers
+foreach Index = 0-31 in {
+ def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
// Register class for single precision scalars in VSX registers
def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
+// For QPX
+def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
+ (sequence "QF%u", 31, 14))>;
+def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
+def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
+ // These are actually stored as floating-point values where a positive
+ // number is true and anything else (including NaN) is false.
+ let Size = 256;
+}
+
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
let CompleteModel = 1;
- // Do not support SPE (Signal Processing Engine), prefixed instructions on
- // Power 9, PC relative mem ops, or instructions introduced in ISA 3.1.
- let UnsupportedFeatures = [HasSPE, PrefixInstrs, PCRelativeMemops, IsISA3_1];
+ // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
+ // Engine), prefixed instructions on Power 9, PC relative mem ops, or
+ // instructions introduced in ISA 3.1.
+ let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
+ IsISA3_1];
+
}
let SchedModel = P9Model in {
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
+static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
+ cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
+ cl::Hidden);
+
static cl::opt<bool>
EnableMachinePipeliner("ppc-enable-pipeliner",
cl::desc("Enable Machine Pipeliner for PPC"),
HasAltivec = false;
HasSPE = false;
HasFPU = false;
+ HasQPX = false;
HasVSX = false;
NeedsTwoConstNR = false;
HasP8Vector = false;
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
HasDirectMove = false;
+ IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
HasFusion = false;
if (HasSPE && IsPPC64)
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
- if (HasSPE && (HasAltivec || HasVSX || HasFPU))
+ if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
report_fatal_error(
"SPE and traditional floating point cannot both be enabled.\n", false);
if (!HasSPE)
HasFPU = true;
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ IsQPXStackUnaligned = QPXStackUnaligned;
StackAlignment = getPlatformStackAlignment();
// Determine endianness.
bool HasAltivec;
bool HasFPU;
bool HasSPE;
+ bool HasQPX;
bool HasVSX;
bool NeedsTwoConstNR;
bool HasP8Vector;
POPCNTDKind HasPOPCNTD;
+ /// When targeting QPX running a stock PPC64 Linux kernel where the stack
+ /// alignment has not been changed, we need to keep the 16-byte alignment
+ /// of the stack.
+ bool IsQPXStackUnaligned;
+
const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
bool hasAltivec() const { return HasAltivec; }
bool hasSPE() const { return HasSPE; }
bool hasFPU() const { return HasFPU; }
+ bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool needsTwoConstNR() const { return NeedsTwoConstNR; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool hasDirectMove() const { return HasDirectMove; }
+ bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
Align getPlatformStackAlignment() const {
+ if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
+ return Align(32);
+
return Align(16);
}
const Triple &getTargetTriple() const { return TargetTriple; }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
cl::desc("Disable VSX Swap Removal for PPC"));
static cl::
+opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
+ cl::desc("Disable QPX load splat simplification"));
+
+static cl::
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for PPC"));
initializePPCReduceCRLogicalsPass(PR);
initializePPCBSelPass(PR);
initializePPCBranchCoalescingPass(PR);
+ initializePPCQPXLoadSplatPass(PR);
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
// Lower generic MASSV routines to PowerPC subtarget-specific entries.
addPass(createPPCLowerMASSVEntriesPass());
-
- // If explicitly requested, add explicit data prefetch intrinsics.
+
+ // For the BG/Q (or if explicitly requested), add explicit data prefetch
+ // intrinsics.
+ bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
+ getOptLevel() != CodeGenOpt::None;
if (EnablePrefetch.getNumOccurrences() > 0)
+ UsePrefetching = EnablePrefetch;
+ if (UsePrefetching)
addPass(createLoopDataPrefetchPass());
if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
}
void PPCPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(&IfConverterID);
+
+ // This optimization must happen after anything that might do store-to-load
+ // forwarding. Here we're after RA (and, thus, when spills are inserted)
+ // but before post-RA scheduling.
+ if (!DisableQPXLoadSplat)
+ addPass(createPPCQPXLoadSplatPass());
+ }
}
void PPCPassConfig::addPreEmitPass() {
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// This is currently only used for the data prefetch pass
+// This is currently only used for the data prefetch pass which is only enabled
+// for BG/Q by default.
static cl::opt<unsigned>
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
cl::desc("The loop prefetch cache line size"));
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
}
+ case Intrinsic::ppc_qpx_qvlfs:
+ // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Type *VTy =
+ VectorType::get(IC.Builder.getFloatTy(),
+ cast<VectorType>(II.getType())->getElementCount());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
+ PointerType::getUnqual(VTy));
+ Value *Load = IC.Builder.CreateLoad(VTy, Ptr);
+ return new FPExtInst(Load, II.getType());
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(0), Align(32), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
+ Value *Ptr = IC.Builder.CreateBitCast(
+ II.getArgOperand(0), PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, "", false, Align(32));
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Type *VTy = VectorType::get(
+ IC.Builder.getFloatTy(),
+ cast<VectorType>(II.getArgOperand(0)->getType())->getElementCount());
+ Value *TOp = IC.Builder.CreateFPTrunc(II.getArgOperand(0), VTy);
+ Type *OpPtrTy = PointerType::getUnqual(VTy);
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(TOp, Ptr, false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(1), Align(32), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(32));
+ }
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
- // On the A2, always unroll aggressively.
+ // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
+ // on combining the loads generated for consecutive accesses, and failure to
+ // do so is particularly expensive. This makes it much more likely (compared
+ // to only using concatenation unrolling).
if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
+ if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
}
unsigned PPCTTIImpl::getPrefetchDistance() const {
+ // This seems like a reasonable default for the BG/Q (this pass is enabled, by
+ // default, only on the BG/Q).
return 300;
}
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- // PPC, for both Altivec/VSX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
return Cost;
+ } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
+ // Floating point scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return Cost;
+
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
if (ST->hasP9Altivec()) {
if (ISD == ISD::INSERT_VECTOR_ELT)
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
bool IsVSXType = ST->hasVSX() &&
(LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
+ bool IsQPXType = ST->hasQPX() &&
+ (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
// VSX has 32b/64b load instructions. Legalization can handle loading of
// 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
// for Altivec types using the VSX instructions, but that's more expensive
// than using the permutation-based load sequence. On the P8, that's no
// longer true.
- if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
+ if (Opcode == Instruction::Load &&
+ ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
*Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
- // PPC, for both Altivec/VSX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). For each result vector, we need one shuffle per incoming
// vector (except that the first shuffle can take two incoming vectors
// For PowerPC, we need to deal with alignment of stack arguments -
// they are mostly aligned to 8 bytes, but vectors and i128 arrays
// are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
- // For that reason, we compute current offset from stack pointer (which is
- // always properly aligned), and offset for the first vararg, then subtract
- // them.
+ // and QPX vectors are aligned to 32 bytes. For that reason, we
+ // compute current offset from stack pointer (which is always properly
+ // aligned), and offset for the first vararg, then subtract them.
unsigned VAArgBase;
Triple TargetTriple(F.getParent()->getTargetTriple());
// Parameter save area starts at 48 bytes from frame pointer for ABIv1,
// and 32 bytes for ABIv2. This is usually determined by target
// endianness, but in theory could be overridden by function attribute.
+ // For simplicity, we ignore it here (it'd only matter for QPX vectors).
if (TargetTriple.getArch() == Triple::ppc64)
VAArgBase = 48;
else
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
@X = external global [16000 x double], align 32
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
}
+define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: test_l_qv4float
+; CHECK: cost of 2 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: test_l_qv8float
+; CHECK: cost of 4 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: test_l_qv4double
+; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
+entry:
+ %r = load <8 x double>, <8 x double>* %p, align 8
+ ret <8 x double> %r
+
+; CHECK-LABEL: test_l_qv8double
+; CHECK: cost of 4 for instruction: %r = load <8 x double>, <8 x double>* %p, align 8
+}
+
define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
entry:
store <16 x i8> %v, <16 x i8>* %p, align 1
; CHECK: cost of 2 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
}
+define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_qv4float
+; CHECK: cost of 7 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
+}
+
+define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
+entry:
+ store <8 x float> %v, <8 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_qv8float
+; CHECK: cost of 15 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
+}
+
+define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_qv4double
+; CHECK: cost of 7 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
+}
+
+define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
+entry:
+ store <8 x double> %v, <8 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_qv8double
+; CHECK: cost of 15 for instruction: store <8 x double> %v, <8 x double>* %p, align 8
+}
+
attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind "target-cpu"="a2q" }
attributes #2 = { nounwind "target-cpu"="pwr8" }
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -enable-misched < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
;
; PR14315: misched should not move the physreg copy of %t below the calls.
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
- attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0, !1}
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
- attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
!llvm.ident = !{!0}
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @bar(i8* %a) nounwind;
+define i32 @foo() nounwind {
+ %p = alloca i8, i8 115
+ store i8 0, i8* %p
+ %r = call i32 @bar(i8* %p)
+ ret i32 %r
+}
+
+; Without QPX, the allocated stack frame is 240 bytes, but with QPX
+; (because we require 32-byte alignment), it is 256 bytes.
+; CHECK-A2: @foo
+; CHECK-A2: stdu 1, -240(1)
+; CHECK-A2Q: @foo
+; CHECK-A2Q: stdu 1, -256(1)
+; CHECK-BGQ: @foo
+; CHECK-BGQ: stdu 1, -256(1)
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 -mattr=+qpx | FileCheck %s
+
+define void @foo() {
+entry:
+ ret void
+}
+
+; CHECK: @foo
+
; Function Attrs: nounwind argmemonly
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind argmemonly }
attributes #2 = { nounwind }
-; RUN: llc -verify-machineinstrs < %s -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
define i32 @zytest(i32 %a) nounwind {
entry:
}
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
ret i64 %2
}
- attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
ret i64 %cond
}
- attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
ret i64 %xor
}
- attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s --check-prefix=QPX
declare float @fabsf(float)
; CHECK-NOT: xsmindp
; CHECK: blr
+; QPX-LABEL: test1v:
+; QPX: mtctr
+; QPX-NOT: bl fminf
+; QPX: blr
+
define void @test1a(float %f, float* %fp) {
entry:
br label %loop_body
; CHECK-NOT: xsmaxdp
; CHECK: blr
+; QPX-LABEL: test2v:
+; QPX: mtctr
+; QPX-NOT: bl fmax
+; QPX: blr
+
define void @test2a(float %f, float* %fp) {
entry:
br label %loop_body
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q | FileCheck %s --check-prefixes=CHECK,CHECK-A2Q
; Verify that we do NOT generate the mtctr instruction for loop trip counts < 4
; The latency of the mtctr is only justified if there are more than 4 comparisons that are removed as a result.
}
; Function Attrs: norecurse nounwind
+; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
+; a2q should use mtctr, but pwr8 should not use mtctr.
define signext i32 @testTripCount2NonSmallLoop() {
; CHECK-LABEL: testTripCount2NonSmallLoop:
+; CHECK-A2Q: mtctr
; CHECK-PWR8-NOT: mtctr
; CHECK: blr
ret i32 %conv
}
+; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
+; a2q should use mtctr, but pwr8 should not use mtctr.
define signext i32 @testTripCount5() {
; CHECK-LABEL: testTripCount5:
; CHECK-PWR8-NOT: mtctr
+; CHECK-A2Q: mtctr
entry:
%.prea = load i32, i32* @a, align 4
; that were both inputs to the inline asm and also early-clobber outputs).
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712 = type { %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32 }
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%"class.Foam::messageStream.6" = type <{ %"class.Foam::string.5", i32, i32, i32, [4 x i8] }>
%"class.Foam::string.5" = type { %"class.std::basic_string.4" }
declare void @_ZN4Foam6reduceIiNS_5sumOpIiEEEEvRKNS_4ListINS_8UPstream11commsStructEEERT_RKT0_ii() #0
-attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
; RUN: llc -verify-machineinstrs -O0 -relocation-model=pic < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%"class.std::__1::__tree_node.130.151" = type { %"class.std::__1::__tree_node_base.base.128.149", %"class.boost::serialization::extended_type_info.129.150"* }
%"class.std::__1::__tree_node_base.base.128.149" = type <{ %"class.std::__1::__tree_end_node.127.148", %"class.std::__1::__tree_node_base.126.147"*, %"class.std::__1::__tree_node_base.126.147"*, i8 }>
ret float %25
}
-attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
define linkonce_odr double @test1(ppc_fp128 %input) {
entry:
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::__exception_ptr::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
%"class.std::__1::__shared_count" = type { i32 (...)**, i64 }
declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #7 = { nobuiltin nounwind }
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%struct.BG_CoordinateMapping_t = type { [4 x i8] }
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
entry:
-; RUN: llc -enable-ppc-prefetching=true -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
; Function Attrs: nounwind
define void @foo(double* %x, double* nocapture readonly %y) #0 {
-; RUN: llc -verify-machineinstrs -enable-ppc-prefetching=true -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
; Function Attrs: nounwind
define void @foo(double* nocapture %a, double* nocapture readonly %b) #0 {
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BGQ
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; CHECK-LABEL: @foo
+; CHECK-BGQ-DAG: dcbt 4, 5
; CHECK-DAG: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
; CHECK-DAG: fadd [[REG2:[0-9]+]], [[REG1]], 0
; CHECK-DAG: stfdu [[REG2]], 8({{[0-9]+}})
for.body7: ; preds = %for.body, %for.body7
%i3.017 = phi i32 [ %inc9, %for.body7 ], [ 0, %for.body ]
- tail call void bitcast (void (...)* @bar to void ()*)() #0
+ tail call void bitcast (void (...)* @bar to void ()*)() #2
%inc9 = add nuw nsw i32 %i3.017, 1
%exitcond = icmp eq i32 %inc9, 1024
br i1 %exitcond, label %for.cond.cleanup6, label %for.body7
}
-declare void @bar(...)
+declare void @bar(...) #1
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { "target-cpu"="a2q" }
+attributes #2 = { nounwind }
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
-attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
+; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 < %s | FileCheck %s -check-prefix=FIXPOINT
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds1:
; CHECK: # %bb.0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds2:
; CHECK: # %bb.0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds3:
; CHECK: # %bb.0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds4:
; CHECK: # %bb.0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
; CHECK-LABEL: reassociate_mamaa_double:
; CHECK: # %bb.0:
+; CHECK-QPX-DAG: fmadd [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-QPX-DAG: fmadd [[REG1:[0-9]+]], 6, 5, 1
+; CHECK-QPX: fadd 1, [[REG0]], [[REG1]]
; CHECK-PWR-DAG: xsmaddadp 1, 6, 5
; CHECK-PWR-DAG: xsmaddadp 2, 4, 3
; CHECK-PWR: xsadddp 1, 2, 1
define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
; CHECK-LABEL: reassociate_mamaa_vec:
; CHECK: # %bb.0:
+; CHECK-QPX-DAG: qvfmadds [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-QPX-DAG: qvfmadds [[REG1:[0-9]+]], 6, 5, 1
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR-DAG: xvmaddasp [[REG0:[0-9]+]], 39, 38
; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36
; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
; CHECK-LABEL: reassociate_mamama_double:
; CHECK: # %bb.0:
+; CHECK-QPX: fmadd [[REG0:[0-9]+]], 2, 1, 7
+; CHECK-QPX-DAG: fmul [[REG1:[0-9]+]], 4, 3
+; CHECK-QPX-DAG: fmadd [[REG2:[0-9]+]], 6, 5, [[REG0]]
+; CHECK-QPX-DAG: fmadd [[REG3:[0-9]+]], 9, 8, [[REG1]]
+; CHECK-QPX: fadd 1, [[REG2]], [[REG3]]
; CHECK-PWR: xsmaddadp 7, 2, 1
; CHECK-PWR-DAG: xsmuldp [[REG0:[0-9]+]], 4, 3
; CHECK-PWR-DAG: xsmaddadp 7, 6, 5
declare void @bar(double) #1
-attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #2 = { nounwind }
-; RUN: opt -ee-instrument < %s | opt -inline | llc -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: opt -ee-instrument < %s | opt -inline | llc | FileCheck %s
; The run-line mimics how Clang might run the instrumentation passes.
target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
define void @leaf_function() #0 {
; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR7
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR8
+; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s -check-prefix=A2Q
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; PWR8: lxvw4x
; PWR8: stxvw4x
; PWR8: blr
+
+; A2Q-LABEL: @foo1
+; A2Q-NOT: bl memcpy
+; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
}
; Function Attrs: nounwind
; PWR8: lxvw4x
; PWR8: stxvw4x
; PWR8: blr
+
+; A2Q-LABEL: @foo2
+; A2Q-NOT: bl memcpy
+; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
}
; Function Attrs: nounwind
; PWR8-NOT: bl memset
; PWR8: stxvw4x
; PWR8: blr
+
+; A2Q-LABEL: @bar1
+; A2Q-NOT: bl memset
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
}
; Function Attrs: nounwind
; PWR8-NOT: bl memset
; PWR8: stxvw4x
; PWR8: blr
+
+; A2Q-LABEL: @bar2
+; A2Q-NOT: bl memset
+; A2Q: qvstfdx
+; A2Q: blr
}
; Function Attrs: nounwind
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck %s -check-prefix=CHECK-O0
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @test_qpx() unnamed_addr #0 align 2 {
+entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = trunc i32 %0 to i8
+ call void @llvm.memset.p0i8.i64(i8* align 32 null, i8 %1, i64 64, i1 false)
+ ret void
+
+; CHECK-LABEL: @test_qpx
+; CHECK: qvstfdx
+; CHECK: qvstfdx
+; CHECK: blr
+
+; CHECK-O0-LABEL: @test_qpx
+; CHECK-O0-NOT: qvstfdx
+; CHECK-O0: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
+
+; Function Attrs: nounwind
+define void @test_vsx() unnamed_addr #2 align 2 {
+entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = trunc i32 %0 to i8
+ call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i1 false)
+ ret void
+
+; CHECK-LABEL: @test_vsx
+; CHECK: stxvw4x
+; CHECK: stxvw4x
+; CHECK: blr
+
+; CHECK-O0-LABEL: @test_vsx
+; CHECK-O0-NOT: stxvw4x
+; CHECK-O0: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-cpu"="pwr7" }
+
; RUN: llc -verify-machineinstrs < %s -enable-misched -pre-RA-sched=source -scheditins=false \
-; RUN: -disable-ifcvt-triangle-false -disable-post-ra -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
;
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
; %val1 is a load live out of %entry. It should be hoisted
; above the add.
; RUN: llc < %s -enable-misched -verify-machineinstrs
; PR14302
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
@b = external global [16000 x double], align 32
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
declare zeroext i1 @ri1()
declare void @se1()
; Left the target features in this test because it is important that caller has
; -pcrelative-memops while callee has +pcrelative-memops
-attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
-attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
-attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-spe" }
+attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
+attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
+attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-qpx,-spe" }
attributes #3 = { nounwind }
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+popcntd < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOWPC
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC
+; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q -mattr=+popcntd < %s | FileCheck %s
define i64 @_cntb64(i64 %x) nounwind readnone {
%cnt = tail call i64 @llvm.ppc.popcntb(i64 %x)
; STOP-AFTER-BRANCH-COALESCING-NOT: "ppc-branch-coalescing" pass is not registered.
; STOP-AFTER-BRANCH-COALESCING: Branch Coalescing
+
+; Test pass name: ppc-qpx-load-splat.
+; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-QPX-LOAD-SPLAT
+; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: -ppc-qpx-load-splat
+; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
+; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: PowerPC QPX Load Splat Simplification
+
+; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-QPX-LOAD-SPLAT
+; STOP-AFTER-QPX-LOAD-SPLAT: -ppc-qpx-load-splat
+; STOP-AFTER-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
+; STOP-AFTER-QPX-LOAD-SPLAT: PowerPC QPX Load Splat Simplification
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
-; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
-; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
+; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
+; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -code-model=small | FileCheck %s -check-prefix=SCM
; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
attributes #0 = { noinline nounwind }
attributes #1 = { nounwind }
+; vector <4 x i1> test
+
+define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
+define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
+ tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
+ ret void
+
+; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
+; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
+
+; CHECK-SCO-LABEL: caller_v4i1_reorder:
+; CHECK-SCO: bl callee_v4i1
+
+; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
+; CHECK-SCO-HASQPX: b callee_v4i1
+}
+
define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
declare fastcc void @bar([2 x i64], [2 x i64]) unnamed_addr #1 align 2
attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.ident = !{!0}
ret void
}
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%t1 = type { %t2*, %t3* }
%t2 = type <{ %t3*, i32, [4 x i8] }>
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @s452(i32 %inp1) nounwind {
+entry:
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %entry
+ %conv.4 = sitofp i32 %inp1 to double
+ %conv.5 = sitofp i32 %inp1 to double
+ %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
+ %v = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
+ %vv = fmul <2 x double> %v, %v
+ %add7.4 = fadd <2 x double> %vv, %vv
+ store <2 x double> %add7.4, <2 x double>* undef, align 16
+ br i1 undef, label %for.end, label %for.body4
+
+for.end: ; preds = %for.body4
+ unreachable
+; CHECK-LABEL: @s452
+; CHECK: lfiwax [[REG1:[0-9]+]],
+; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
+; FIXME: We could 'promote' this to a vector earlier and remove this splat.
+; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
+; CHECK: qvfmul
+; CHECK: qvfadd
+; CHECK: qvesplati {{[0-9]+}},
+; FIXME: We can use qvstfcdx here instead of two stores.
+; CHECK: stfd
+; CHECK: stfd
+}
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
+ %v1 = insertelement <4 x double> undef, double %f1, i32 0
+ %v2 = insertelement <4 x double> %v1, double %f2, i32 1
+ %v3 = insertelement <4 x double> %v2, double %f3, i32 2
+ %v4 = insertelement <4 x double> %v3, double %f4, i32 3
+ ret <4 x double> %v4
+
+; CHECK-LABEL: @foo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
+ %v1 = insertelement <4 x float> undef, float %f1, i32 0
+ %v2 = insertelement <4 x float> %v1, float %f2, i32 1
+ %v3 = insertelement <4 x float> %v2, float %f3, i32 2
+ %v4 = insertelement <4 x float> %v3, float %f4, i32 3
+ ret <4 x float> %v4
+
+; CHECK-LABEL: @goo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+declare <4 x double> @foo(<4 x double> %p)
+
+define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
+entry:
+ %v = call <4 x double> @foo(<4 x double> %p)
+ %w = call <4 x double> @foo(<4 x double> %q)
+ %x = fadd <4 x double> %v, %w
+ ret <4 x double> %x
+
+; CHECK-LABEL: @bar
+; CHECK: qvstfdx 2,
+; CHECK: bl foo
+; CHECK: qvstfdx 1,
+; CHECK: qvlfdx 1,
+; CHECK: bl foo
+; CHECK: qvlfdx [[REG:[0-9]+]],
+; CHECK: qvfadd 1, [[REG]], 1
+}
+
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: norecurse nounwind readonly
+define <4 x double> @foo(double* nocapture readonly %a) #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvdsx v2, 0, r3
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: blr
+entry:
+ %0 = load double, double* %a, align 8
+ %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
+ %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %shuffle.i
+}
+
+define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
+; CHECK-LABEL: foox:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi r4, r4, 3
+; CHECK-NEXT: lxvdsx v2, r3, r4
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: blr
+entry:
+ %p = getelementptr double, double* %a, i64 %idx
+ %0 = load double, double* %p, align 8
+ %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
+ %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %shuffle.i
+}
+
+define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
+; CHECK-LABEL: fooxu:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi r4, r4, 3
+; CHECK-NEXT: add r6, r3, r4
+; CHECK-NEXT: std r6, 0(r5)
+; CHECK-NEXT: lxvdsx v2, r3, r4
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: blr
+entry:
+ %p = getelementptr double, double* %a, i64 %idx
+ %0 = load double, double* %p, align 8
+ %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
+ %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
+ store double* %p, double** %pptr, align 8
+ ret <4 x double> %shuffle.i
+}
+
+define <4 x float> @foof(float* nocapture readonly %a) #0 {
+; CHECK-LABEL: foof:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lfiwzx f0, 0, r3
+; CHECK-NEXT: xxspltw v2, vs0, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = load float, float* %a, align 4
+ %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
+ %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %shuffle.i
+}
+
+define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
+; CHECK-LABEL: foofx:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi r4, r4, 2
+; CHECK-NEXT: lfiwzx f0, r3, r4
+; CHECK-NEXT: xxspltw v2, vs0, 1
+; CHECK-NEXT: blr
+entry:
+ %p = getelementptr float, float* %a, i64 %idx
+ %0 = load float, float* %p, align 4
+ %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
+ %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %shuffle.i
+}
+
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(<4 x double>* %p) {
+entry:
+ %v = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x double> @bar(<4 x double>* %p) {
+entry:
+ %v = load <4 x double>, <4 x double>* %p, align 32
+ ret <4 x double> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfdx
+
--- /dev/null
+; RUN: llc -verify-machineinstrs -stop-after=finalize-isel < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define <2 x double> @test_qvfmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
+; CHECK: test_qvfmadd
+; CHECK: QVFMADD %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <2 x double> %2, %1
+ %5 = fadd reassoc nsz <2 x double> %4, %0
+ ret <2 x double> %5
+}
+
+define <4 x float> @test_qvfmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
+; CHECK: test_qvfmadds
+; CHECK: QVFMADDSs %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <4 x float> %2, %1
+ %5 = fadd reassoc nsz <4 x float> %4, %0
+ ret <4 x float> %5
+}
+
+define <2 x double> @test_qvfnmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
+; CHECK: test_qvfnmadd
+; CHECK: QVFNMADD %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <2 x double> %2, %1
+ %5 = fadd reassoc nsz <2 x double> %4, %0
+ %6 = fneg reassoc nsz <2 x double> %5
+ ret <2 x double> %6
+}
+
+define <4 x float> @test_qvfnmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
+; CHECK: test_qvfnmadds
+; CHECK: QVFNMADDSs %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <4 x float> %2, %1
+ %5 = fadd reassoc nsz <4 x float> %4, %0
+ %6 = fneg reassoc nsz <4 x float> %5
+ ret <4 x float> %6
+}
+
+define <2 x double> @test_qvfmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
+; CHECK: test_qvfmsub
+; CHECK: QVFMSUB %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <2 x double> %2, %1
+ %5 = fsub reassoc nsz <2 x double> %4, %0
+ ret <2 x double> %5
+}
+
+define <4 x float> @test_qvfmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
+; CHECK: test_qvfmsubs
+; CHECK: QVFMSUBSs %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <4 x float> %2, %1
+ %5 = fsub reassoc nsz <4 x float> %4, %0
+ ret <4 x float> %5
+}
+
+define <2 x double> @test_qvfnmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
+; CHECK: test_qvfnmsub
+; CHECK: QVFNMSUB %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <2 x double> %2, %1
+ %5 = fsub reassoc nsz <2 x double> %4, %0
+ %6 = fneg reassoc nsz <2 x double> %5
+ ret <2 x double> %6
+}
+
+define <4 x float> @test_qvfnmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
+; CHECK: test_qvfnmsubs
+; CHECK: QVFNMSUBSs %2, %1, %0, implicit $rm
+;
+ %4 = fmul reassoc nsz <4 x float> %2, %1
+ %5 = fsub reassoc nsz <4 x float> %4, %0
+ %6 = fneg reassoc nsz <4 x float> %5
+ ret <4 x float> %6
+}
+
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: foo_fmf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-NEXT: qvfrsqrte 3, 2
+; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
+; CHECK-NEXT: qvlfdx 0, 0, 3
+; CHECK-NEXT: qvfmul 4, 3, 3
+; CHECK-NEXT: qvfmsub 2, 2, 0, 2
+; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
+; CHECK-NEXT: qvfmul 3, 3, 4
+; CHECK-NEXT: qvfmul 4, 3, 3
+; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
+; CHECK-NEXT: qvfmul 0, 3, 0
+; CHECK-NEXT: qvfmul 1, 1, 0
+; CHECK-NEXT: blr
+entry:
+ %x = call ninf afn reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %r = fdiv arcp reassoc <4 x double> %a, %x
+ ret <4 x double> %r
+}
+
+define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: foo_safe:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 5, 2, 3
+; CHECK-NEXT: qvesplati 3, 2, 1
+; CHECK-NEXT: qvesplati 4, 2, 2
+; CHECK-NEXT: fsqrt 2, 2
+; CHECK-NEXT: fsqrt 5, 5
+; CHECK-NEXT: fsqrt 4, 4
+; CHECK-NEXT: fsqrt 3, 3
+; CHECK-NEXT: qvesplati 6, 1, 3
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: fdiv 2, 1, 2
+; CHECK-NEXT: fdiv 5, 6, 5
+; CHECK-NEXT: qvesplati 6, 1, 2
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fdiv 4, 6, 4
+; CHECK-NEXT: fdiv 1, 1, 3
+; CHECK-NEXT: qvfperm 3, 4, 5, 0
+; CHECK-NEXT: qvfperm 0, 2, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 3, 1
+; CHECK-NEXT: blr
+entry:
+ %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %r = fdiv <4 x double> %a, %x
+ ret <4 x double> %r
+}
+
+define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
+; CHECK-LABEL: foof_fmf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-NEXT: qvfrsqrtes 3, 2
+; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
+; CHECK-NEXT: qvlfsx 0, 0, 3
+; CHECK-NEXT: qvfmuls 4, 3, 3
+; CHECK-NEXT: qvfmsubs 2, 2, 0, 2
+; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0
+; CHECK-NEXT: qvfmuls 0, 3, 0
+; CHECK-NEXT: qvfmul 1, 1, 0
+; CHECK-NEXT: blr
+entry:
+ %x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %y = fpext <4 x float> %x to <4 x double>
+ %r = fdiv arcp reassoc nsz <4 x double> %a, %y
+ ret <4 x double> %r
+}
+
+define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind {
+; CHECK-LABEL: foof_safe:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 0, 2, 3
+; CHECK-NEXT: qvesplati 3, 2, 2
+; CHECK-NEXT: fsqrts 4, 2
+; CHECK-NEXT: qvesplati 2, 2, 1
+; CHECK-NEXT: fsqrts 0, 0
+; CHECK-NEXT: fsqrts 3, 3
+; CHECK-NEXT: fsqrts 2, 2
+; CHECK-NEXT: qvgpci 5, 275
+; CHECK-NEXT: qvgpci 6, 101
+; CHECK-NEXT: qvfperm 0, 3, 0, 5
+; CHECK-NEXT: qvesplati 3, 1, 2
+; CHECK-NEXT: qvfperm 2, 4, 2, 5
+; CHECK-NEXT: qvfperm 0, 2, 0, 6
+; CHECK-NEXT: qvesplati 2, 1, 3
+; CHECK-NEXT: qvesplati 4, 0, 3
+; CHECK-NEXT: fdiv 2, 2, 4
+; CHECK-NEXT: qvesplati 4, 0, 2
+; CHECK-NEXT: fdiv 3, 3, 4
+; CHECK-NEXT: qvesplati 4, 1, 1
+; CHECK-NEXT: fdiv 1, 1, 0
+; CHECK-NEXT: qvesplati 0, 0, 1
+; CHECK-NEXT: fdiv 0, 4, 0
+; CHECK-NEXT: qvfperm 2, 3, 2, 5
+; CHECK-NEXT: qvfperm 0, 1, 0, 5
+; CHECK-NEXT: qvfperm 1, 0, 2, 6
+; CHECK-NEXT: blr
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %y = fpext <4 x float> %x to <4 x double>
+ %r = fdiv <4 x double> %a, %y
+ ret <4 x double> %r
+}
+
+define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: food_fmf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
+; CHECK-NEXT: qvfrsqrte 3, 2
+; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l
+; CHECK-NEXT: qvlfdx 0, 0, 3
+; CHECK-NEXT: qvfmul 4, 3, 3
+; CHECK-NEXT: qvfmsub 2, 2, 0, 2
+; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
+; CHECK-NEXT: qvfmul 3, 3, 4
+; CHECK-NEXT: qvfmul 4, 3, 3
+; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
+; CHECK-NEXT: qvfmul 0, 3, 0
+; CHECK-NEXT: qvfrsp 0, 0
+; CHECK-NEXT: qvfmuls 1, 1, 0
+; CHECK-NEXT: blr
+entry:
+ %x = call afn ninf reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %y = fptrunc <4 x double> %x to <4 x float>
+ %r = fdiv arcp reassoc <4 x float> %a, %y
+ ret <4 x float> %r
+}
+
+define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: food_safe:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 0, 2, 3
+; CHECK-NEXT: qvesplati 3, 2, 2
+; CHECK-NEXT: fsqrt 4, 2
+; CHECK-NEXT: qvesplati 2, 2, 1
+; CHECK-NEXT: fsqrt 0, 0
+; CHECK-NEXT: fsqrt 3, 3
+; CHECK-NEXT: fsqrt 2, 2
+; CHECK-NEXT: qvgpci 5, 275
+; CHECK-NEXT: qvgpci 6, 101
+; CHECK-NEXT: qvfperm 0, 3, 0, 5
+; CHECK-NEXT: qvesplati 3, 1, 2
+; CHECK-NEXT: qvfperm 2, 4, 2, 5
+; CHECK-NEXT: qvfperm 0, 2, 0, 6
+; CHECK-NEXT: qvesplati 2, 1, 3
+; CHECK-NEXT: qvfrsp 0, 0
+; CHECK-NEXT: qvesplati 4, 0, 3
+; CHECK-NEXT: fdivs 2, 2, 4
+; CHECK-NEXT: qvesplati 4, 0, 2
+; CHECK-NEXT: fdivs 3, 3, 4
+; CHECK-NEXT: qvesplati 4, 1, 1
+; CHECK-NEXT: fdivs 1, 1, 0
+; CHECK-NEXT: qvesplati 0, 0, 1
+; CHECK-NEXT: fdivs 0, 4, 0
+; CHECK-NEXT: qvfperm 2, 3, 2, 5
+; CHECK-NEXT: qvfperm 0, 1, 0, 5
+; CHECK-NEXT: qvfperm 1, 0, 2, 6
+; CHECK-NEXT: blr
+entry:
+ %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %y = fptrunc <4 x double> %x to <4 x float>
+ %r = fdiv <4 x float> %a, %y
+ ret <4 x float> %r
+}
+
+define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
+; CHECK-LABEL: goo_fmf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
+; CHECK-NEXT: qvfrsqrtes 3, 2
+; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
+; CHECK-NEXT: qvlfsx 0, 0, 3
+; CHECK-NEXT: qvfmuls 4, 3, 3
+; CHECK-NEXT: qvfmsubs 2, 2, 0, 2
+; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0
+; CHECK-NEXT: qvfmuls 0, 3, 0
+; CHECK-NEXT: qvfmuls 1, 1, 0
+; CHECK-NEXT: blr
+entry:
+ %x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %r = fdiv arcp reassoc nsz <4 x float> %a, %x
+ ret <4 x float> %r
+}
+
+define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind {
+; CHECK-LABEL: goo_safe:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 5, 2, 3
+; CHECK-NEXT: qvesplati 3, 2, 1
+; CHECK-NEXT: qvesplati 4, 2, 2
+; CHECK-NEXT: fsqrts 2, 2
+; CHECK-NEXT: fsqrts 5, 5
+; CHECK-NEXT: fsqrts 4, 4
+; CHECK-NEXT: fsqrts 3, 3
+; CHECK-NEXT: qvesplati 6, 1, 3
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: fdivs 2, 1, 2
+; CHECK-NEXT: fdivs 5, 6, 5
+; CHECK-NEXT: qvesplati 6, 1, 2
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fdivs 4, 6, 4
+; CHECK-NEXT: fdivs 1, 1, 3
+; CHECK-NEXT: qvfperm 3, 4, 5, 0
+; CHECK-NEXT: qvfperm 0, 2, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 3, 1
+; CHECK-NEXT: blr
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %r = fdiv <4 x float> %a, %x
+ ret <4 x float> %r
+}
+
+define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: foo2_fmf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI8_0@toc@ha
+; CHECK-NEXT: qvfre 3, 2
+; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l
+; CHECK-NEXT: qvlfdx 0, 0, 3
+; CHECK-NEXT: qvfmadd 0, 2, 3, 0
+; CHECK-NEXT: qvfnmsub 0, 3, 0, 3
+; CHECK-NEXT: qvfmul 3, 1, 0
+; CHECK-NEXT: qvfnmsub 1, 2, 3, 1
+; CHECK-NEXT: qvfmadd 1, 0, 1, 3
+; CHECK-NEXT: blr
+entry:
+ %r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
+ ret <4 x double> %r
+}
+
+define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: foo2_safe:
+; CHECK: # %bb.0:
+; CHECK-NEXT: qvesplati 3, 2, 3
+; CHECK-NEXT: qvesplati 4, 1, 3
+; CHECK-NEXT: qvesplati 5, 2, 2
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: fdiv 3, 4, 3
+; CHECK-NEXT: qvesplati 4, 1, 2
+; CHECK-NEXT: fdiv 4, 4, 5
+; CHECK-NEXT: fdiv 5, 1, 2
+; CHECK-NEXT: qvesplati 2, 2, 1
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fdiv 1, 1, 2
+; CHECK-NEXT: qvfperm 2, 4, 3, 0
+; CHECK-NEXT: qvfperm 0, 5, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 2, 1
+; CHECK-NEXT: blr
+ %r = fdiv <4 x double> %a, %b
+ ret <4 x double> %r
+}
+
+define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
+; CHECK-LABEL: goo2_fmf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvfres 0, 2
+; CHECK-NEXT: qvfmuls 3, 1, 0
+; CHECK-NEXT: qvfnmsubs 1, 2, 3, 1
+; CHECK-NEXT: qvfmadds 1, 0, 1, 3
+; CHECK-NEXT: blr
+entry:
+ %r = fdiv arcp reassoc ninf <4 x float> %a, %b
+ ret <4 x float> %r
+}
+
+define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
+; CHECK-LABEL: goo2_safe:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 3, 2, 3
+; CHECK-NEXT: qvesplati 4, 1, 3
+; CHECK-NEXT: qvesplati 5, 2, 2
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: fdivs 3, 4, 3
+; CHECK-NEXT: qvesplati 4, 1, 2
+; CHECK-NEXT: fdivs 4, 4, 5
+; CHECK-NEXT: fdivs 5, 1, 2
+; CHECK-NEXT: qvesplati 2, 2, 1
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fdivs 1, 1, 2
+; CHECK-NEXT: qvfperm 2, 4, 3, 0
+; CHECK-NEXT: qvfperm 0, 5, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 2, 1
+; CHECK-NEXT: blr
+entry:
+ %r = fdiv <4 x float> %a, %b
+ ret <4 x float> %r
+}
+
+define <4 x double> @foo3_fmf_denorm_on(<4 x double> %a) #0 {
+; CHECK-LABEL: foo3_fmf_denorm_on:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI12_0@toc@ha
+; CHECK-NEXT: qvfrsqrte 0, 1
+; CHECK-NEXT: addi 3, 3, .LCPI12_0@toc@l
+; CHECK-NEXT: qvlfdx 2, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI12_1@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI12_1@toc@l
+; CHECK-NEXT: qvfmul 3, 0, 0
+; CHECK-NEXT: qvfmsub 4, 1, 2, 1
+; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
+; CHECK-NEXT: qvfmul 0, 0, 3
+; CHECK-NEXT: qvfmul 3, 0, 0
+; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
+; CHECK-NEXT: qvfmul 0, 0, 2
+; CHECK-NEXT: qvlfdx 2, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI12_2@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI12_2@toc@l
+; CHECK-NEXT: qvlfdx 3, 0, 3
+; CHECK-NEXT: qvfmul 0, 0, 1
+; CHECK-NEXT: qvfabs 1, 1
+; CHECK-NEXT: qvfcmplt 1, 1, 2
+; CHECK-NEXT: qvfsel 1, 1, 3, 0
+; CHECK-NEXT: blr
+entry:
+ %r = call reassoc ninf afn <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+ ret <4 x double> %r
+}
+
+define <4 x double> @foo3_fmf_denorm_off(<4 x double> %a) #1 {
+; CHECK-LABEL: foo3_fmf_denorm_off:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI13_0@toc@ha
+; CHECK-NEXT: qvfrsqrte 0, 1
+; CHECK-NEXT: addi 3, 3, .LCPI13_0@toc@l
+; CHECK-NEXT: qvlfdx 2, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI13_1@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI13_1@toc@l
+; CHECK-NEXT: qvfmul 3, 0, 0
+; CHECK-NEXT: qvfmsub 4, 1, 2, 1
+; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
+; CHECK-NEXT: qvfmul 0, 0, 3
+; CHECK-NEXT: qvfmul 3, 0, 0
+; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
+; CHECK-NEXT: qvfmul 0, 0, 2
+; CHECK-NEXT: qvlfdx 2, 0, 3
+; CHECK-NEXT: qvfmul 0, 0, 1
+; CHECK-NEXT: qvfcmpeq 1, 1, 2
+; CHECK-NEXT: qvfsel 1, 1, 2, 0
+; CHECK-NEXT: blr
+entry:
+ %r = call afn reassoc ninf <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+ ret <4 x double> %r
+}
+
+define <4 x double> @foo3_safe_denorm_on(<4 x double> %a) #0 {
+; CHECK-LABEL: foo3_safe_denorm_on:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 2, 1, 3
+; CHECK-NEXT: qvesplati 3, 1, 2
+; CHECK-NEXT: fsqrt 4, 1
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fsqrt 2, 2
+; CHECK-NEXT: fsqrt 3, 3
+; CHECK-NEXT: fsqrt 1, 1
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: qvfperm 2, 3, 2, 0
+; CHECK-NEXT: qvfperm 0, 4, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 2, 1
+; CHECK-NEXT: blr
+entry:
+ %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+ ret <4 x double> %r
+}
+
+define <4 x double> @foo3_safe_denorm_off(<4 x double> %a) #1 {
+; CHECK-LABEL: foo3_safe_denorm_off:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 2, 1, 3
+; CHECK-NEXT: qvesplati 3, 1, 2
+; CHECK-NEXT: fsqrt 4, 1
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fsqrt 2, 2
+; CHECK-NEXT: fsqrt 3, 3
+; CHECK-NEXT: fsqrt 1, 1
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: qvfperm 2, 3, 2, 0
+; CHECK-NEXT: qvfperm 0, 4, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 2, 1
+; CHECK-NEXT: blr
+entry:
+ %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+ ret <4 x double> %r
+}
+
+define <4 x float> @goo3_fmf_denorm_on(<4 x float> %a) #0 {
+; CHECK-LABEL: goo3_fmf_denorm_on:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI16_1@toc@ha
+; CHECK-NEXT: qvfrsqrtes 2, 1
+; CHECK-NEXT: addi 3, 3, .LCPI16_1@toc@l
+; CHECK-NEXT: qvlfsx 0, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI16_0@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI16_0@toc@l
+; CHECK-NEXT: qvfmuls 4, 2, 2
+; CHECK-NEXT: qvfmsubs 3, 1, 0, 1
+; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0
+; CHECK-NEXT: qvlfsx 3, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI16_2@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI16_2@toc@l
+; CHECK-NEXT: qvlfsx 4, 0, 3
+; CHECK-NEXT: qvfmuls 0, 2, 0
+; CHECK-NEXT: qvfabs 2, 1
+; CHECK-NEXT: qvfmuls 0, 0, 1
+; CHECK-NEXT: qvfcmplt 1, 2, 3
+; CHECK-NEXT: qvfsel 1, 1, 4, 0
+; CHECK-NEXT: blr
+entry:
+ %r = call reassoc afn ninf nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %r
+}
+
+define <4 x float> @goo3_fmf_denorm_off(<4 x float> %a) #1 {
+; CHECK-LABEL: goo3_fmf_denorm_off:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI17_1@toc@ha
+; CHECK-NEXT: qvfrsqrtes 2, 1
+; CHECK-NEXT: addi 3, 3, .LCPI17_1@toc@l
+; CHECK-NEXT: qvlfsx 0, 0, 3
+; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l
+; CHECK-NEXT: qvfmuls 4, 2, 2
+; CHECK-NEXT: qvfmsubs 3, 1, 0, 1
+; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0
+; CHECK-NEXT: qvlfsx 3, 0, 3
+; CHECK-NEXT: qvfmuls 0, 2, 0
+; CHECK-NEXT: qvfmuls 0, 0, 1
+; CHECK-NEXT: qvfcmpeq 1, 1, 3
+; CHECK-NEXT: qvfsel 1, 1, 3, 0
+; CHECK-NEXT: blr
+entry:
+ %r = call reassoc ninf afn nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %r
+}
+
+define <4 x float> @goo3_safe(<4 x float> %a) nounwind {
+; CHECK-LABEL: goo3_safe:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 2, 1, 3
+; CHECK-NEXT: qvesplati 3, 1, 2
+; CHECK-NEXT: fsqrts 4, 1
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: fsqrts 2, 2
+; CHECK-NEXT: fsqrts 3, 3
+; CHECK-NEXT: fsqrts 1, 1
+; CHECK-NEXT: qvgpci 0, 275
+; CHECK-NEXT: qvfperm 2, 3, 2, 0
+; CHECK-NEXT: qvfperm 0, 4, 1, 0
+; CHECK-NEXT: qvgpci 1, 101
+; CHECK-NEXT: qvfperm 1, 0, 2, 1
+; CHECK-NEXT: blr
+entry:
+ %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %r
+}
+
+attributes #0 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x float> @test1(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test1:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test2(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test2:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test3(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test3:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test3:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test4(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test4:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test4:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test5(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test5:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test6(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test6:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test9(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test9:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test10(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test10:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define <4 x float> @foo(<4 x float>* %p) {
+entry:
+ %v = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x float> @bar(<4 x float>* %p) {
+entry:
+ %v = load <4 x float>, <4 x float>* %p, align 16
+ ret <4 x float> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfsx
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
+entry:
+ %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+ %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+ %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+ %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test2
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+ %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
+entry:
+ %q = load <4 x i1>, <4 x i1>* %t, align 16
+ %v = and <4 x i1> %a, %q
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvfand 1, 1, [[REG4]]
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+ store <4 x i1> %a, <4 x i1>* @R
+ ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ %s = extractelement <4 x i1> %a, i32 3
+ %q = and i1 %r, %s
+ ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+ %r = extractelement <3 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+ %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+ %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
+ ret <3 x float> %r
+
+; CHECK-LABEL: @test9
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define void @foo(<4 x float> %v, <4 x float>* %p) {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+}
+
+; CHECK: @foo
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: blr
+
+define void @bar(<4 x float> %v, <4 x float>* %p) {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 16
+ ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfsx
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
+entry:
+ %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+ %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+ %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+ %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test2
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+ %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
+entry:
+ %q = load <4 x i1>, <4 x i1>* %t, align 16
+ %v = and <4 x i1> %a, %q
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvfand 1, 1, [[REG4]]
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+ store <4 x i1> %a, <4 x i1>* @R
+ ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ %s = extractelement <4 x i1> %a, i32 3
+ %q = and i1 %r, %s
+ ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+ %r = extractelement <3 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+ %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+ %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
+ ret <3 x double> %r
+
+; CHECK-LABEL: @test9
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
--- /dev/null
+; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @gsl_sf_legendre_Pl_deriv_array(<4 x i32> %inp1, <4 x double> %inp2) #0 {
+entry:
+ br label %vector.body198
+
+vector.body198: ; preds = %vector.body198, %for.body46.lr.ph
+ %0 = icmp ne <4 x i32> %inp1, zeroinitializer
+ %1 = select <4 x i1> %0, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
+ %2 = fmul <4 x double> %inp2, %1
+ %3 = fmul <4 x double> %inp2, %2
+ %4 = fmul <4 x double> %3, %inp2
+ store <4 x double> %4, <4 x double>* undef, align 8
+ br label %return
+
+; CHECK-LABEL: @gsl_sf_legendre_Pl_deriv_array
+; CHECK: qvlfiwzx
+; CHECK: qvfcfidu
+; CHECK: qvfcmpeq
+; CHECK: qvfsel
+; CHECK: qvfmul
+
+return: ; preds = %if.else.i
+ ret void
+}
+
+attributes #0 = { nounwind }
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define void @foo(<4 x double> %v, <4 x double>* %p) {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+}
+
+; CHECK: @foo
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: blr
+
+define void @bar(<4 x double> %v, <4 x double>* %p) {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 32
+ ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfdx
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+ br label %vector.body
+
+; CHECK-LABEL: @foo
+; Make sure that the offset constants we use are all even (only the last should be odd).
+; CHECK-DAG: li {{[0-9]+}}, 1056
+; CHECK-DAG: li {{[0-9]+}}, 1088
+; CHECK-DAG: li {{[0-9]+}}, 1152
+; CHECK-DAG: li {{[0-9]+}}, 1216
+; CHECK-DAG: li {{[0-9]+}}, 1280
+; CHECK-DAG: li {{[0-9]+}}, 1344
+; CHECK-DAG: li {{[0-9]+}}, 1408
+; CHECK-DAG: li {{[0-9]+}}, 1472
+; CHECK-DAG: li {{[0-9]+}}, 1536
+; CHECK-DAG: li {{[0-9]+}}, 1600
+; CHECK-DAG: li {{[0-9]+}}, 1568
+; CHECK-DAG: li {{[0-9]+}}, 1664
+; CHECK-DAG: li {{[0-9]+}}, 1632
+; CHECK-DAG: li {{[0-9]+}}, 1728
+; CHECK-DAG: li {{[0-9]+}}, 1696
+; CHECK-DAG: li {{[0-9]+}}, 1792
+; CHECK-DAG: li {{[0-9]+}}, 1760
+; CHECK-DAG: li {{[0-9]+}}, 1856
+; CHECK-DAG: li {{[0-9]+}}, 1824
+; CHECK-DAG: li {{[0-9]+}}, 1920
+; CHECK-DAG: li {{[0-9]+}}, 1888
+; CHECK-DAG: li {{[0-9]+}}, 1984
+; CHECK-DAG: li {{[0-9]+}}, 1952
+; CHECK-DAG: li {{[0-9]+}}, 2016
+; CHECK-DAG: li {{[0-9]+}}, 1024
+; CHECK-DAG: li {{[0-9]+}}, 1120
+; CHECK-DAG: li {{[0-9]+}}, 1184
+; CHECK-DAG: li {{[0-9]+}}, 1248
+; CHECK-DAG: li {{[0-9]+}}, 1312
+; CHECK-DAG: li {{[0-9]+}}, 1376
+; CHECK-DAG: li {{[0-9]+}}, 1440
+; CHECK-DAG: li {{[0-9]+}}, 1504
+; CHECK-DAG: li {{[0-9]+}}, 2047
+; CHECK: blr
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
+ %0 = shl i64 %index, 1
+ %1 = getelementptr inbounds double, double* %b, i64 %0
+ %2 = bitcast double* %1 to <8 x double>*
+ %wide.vec = load <8 x double>, <8 x double>* %2, align 8
+ %strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %4 = getelementptr inbounds double, double* %a, i64 %index
+ %5 = bitcast double* %4 to <4 x double>*
+ store <4 x double> %3, <4 x double>* %5, align 8
+ %index.next = or i64 %index, 4
+ %6 = shl i64 %index.next, 1
+ %7 = getelementptr inbounds double, double* %b, i64 %6
+ %8 = bitcast double* %7 to <8 x double>*
+ %wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8
+ %strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %10 = getelementptr inbounds double, double* %a, i64 %index.next
+ %11 = bitcast double* %10 to <4 x double>*
+ store <4 x double> %9, <4 x double>* %11, align 8
+ %index.next.1 = or i64 %index, 8
+ %12 = shl i64 %index.next.1, 1
+ %13 = getelementptr inbounds double, double* %b, i64 %12
+ %14 = bitcast double* %13 to <8 x double>*
+ %wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8
+ %strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %16 = getelementptr inbounds double, double* %a, i64 %index.next.1
+ %17 = bitcast double* %16 to <4 x double>*
+ store <4 x double> %15, <4 x double>* %17, align 8
+ %index.next.2 = or i64 %index, 12
+ %18 = shl i64 %index.next.2, 1
+ %19 = getelementptr inbounds double, double* %b, i64 %18
+ %20 = bitcast double* %19 to <8 x double>*
+ %wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8
+ %strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %22 = getelementptr inbounds double, double* %a, i64 %index.next.2
+ %23 = bitcast double* %22 to <4 x double>*
+ store <4 x double> %21, <4 x double>* %23, align 8
+ %index.next.3 = or i64 %index, 16
+ %24 = shl i64 %index.next.3, 1
+ %25 = getelementptr inbounds double, double* %b, i64 %24
+ %26 = bitcast double* %25 to <8 x double>*
+ %wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8
+ %strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %28 = getelementptr inbounds double, double* %a, i64 %index.next.3
+ %29 = bitcast double* %28 to <4 x double>*
+ store <4 x double> %27, <4 x double>* %29, align 8
+ %index.next.4 = or i64 %index, 20
+ %30 = shl i64 %index.next.4, 1
+ %31 = getelementptr inbounds double, double* %b, i64 %30
+ %32 = bitcast double* %31 to <8 x double>*
+ %wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8
+ %strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %34 = getelementptr inbounds double, double* %a, i64 %index.next.4
+ %35 = bitcast double* %34 to <4 x double>*
+ store <4 x double> %33, <4 x double>* %35, align 8
+ %index.next.5 = or i64 %index, 24
+ %36 = shl i64 %index.next.5, 1
+ %37 = getelementptr inbounds double, double* %b, i64 %36
+ %38 = bitcast double* %37 to <8 x double>*
+ %wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8
+ %strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %40 = getelementptr inbounds double, double* %a, i64 %index.next.5
+ %41 = bitcast double* %40 to <4 x double>*
+ store <4 x double> %39, <4 x double>* %41, align 8
+ %index.next.6 = or i64 %index, 28
+ %42 = shl i64 %index.next.6, 1
+ %43 = getelementptr inbounds double, double* %b, i64 %42
+ %44 = bitcast double* %43 to <8 x double>*
+ %wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8
+ %strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %46 = getelementptr inbounds double, double* %a, i64 %index.next.6
+ %47 = bitcast double* %46 to <4 x double>*
+ store <4 x double> %45, <4 x double>* %47, align 8
+ %index.next.7 = or i64 %index, 32
+ %48 = shl i64 %index.next.7, 1
+ %49 = getelementptr inbounds double, double* %b, i64 %48
+ %50 = bitcast double* %49 to <8 x double>*
+ %wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8
+ %strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %52 = getelementptr inbounds double, double* %a, i64 %index.next.7
+ %53 = bitcast double* %52 to <4 x double>*
+ store <4 x double> %51, <4 x double>* %53, align 8
+ %index.next.8 = or i64 %index, 36
+ %54 = shl i64 %index.next.8, 1
+ %55 = getelementptr inbounds double, double* %b, i64 %54
+ %56 = bitcast double* %55 to <8 x double>*
+ %wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8
+ %strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %58 = getelementptr inbounds double, double* %a, i64 %index.next.8
+ %59 = bitcast double* %58 to <4 x double>*
+ store <4 x double> %57, <4 x double>* %59, align 8
+ %index.next.9 = or i64 %index, 40
+ %60 = shl i64 %index.next.9, 1
+ %61 = getelementptr inbounds double, double* %b, i64 %60
+ %62 = bitcast double* %61 to <8 x double>*
+ %wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8
+ %strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %64 = getelementptr inbounds double, double* %a, i64 %index.next.9
+ %65 = bitcast double* %64 to <4 x double>*
+ store <4 x double> %63, <4 x double>* %65, align 8
+ %index.next.10 = or i64 %index, 44
+ %66 = shl i64 %index.next.10, 1
+ %67 = getelementptr inbounds double, double* %b, i64 %66
+ %68 = bitcast double* %67 to <8 x double>*
+ %wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8
+ %strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %70 = getelementptr inbounds double, double* %a, i64 %index.next.10
+ %71 = bitcast double* %70 to <4 x double>*
+ store <4 x double> %69, <4 x double>* %71, align 8
+ %index.next.11 = or i64 %index, 48
+ %72 = shl i64 %index.next.11, 1
+ %73 = getelementptr inbounds double, double* %b, i64 %72
+ %74 = bitcast double* %73 to <8 x double>*
+ %wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8
+ %strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %76 = getelementptr inbounds double, double* %a, i64 %index.next.11
+ %77 = bitcast double* %76 to <4 x double>*
+ store <4 x double> %75, <4 x double>* %77, align 8
+ %index.next.12 = or i64 %index, 52
+ %78 = shl i64 %index.next.12, 1
+ %79 = getelementptr inbounds double, double* %b, i64 %78
+ %80 = bitcast double* %79 to <8 x double>*
+ %wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8
+ %strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %82 = getelementptr inbounds double, double* %a, i64 %index.next.12
+ %83 = bitcast double* %82 to <4 x double>*
+ store <4 x double> %81, <4 x double>* %83, align 8
+ %index.next.13 = or i64 %index, 56
+ %84 = shl i64 %index.next.13, 1
+ %85 = getelementptr inbounds double, double* %b, i64 %84
+ %86 = bitcast double* %85 to <8 x double>*
+ %wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8
+ %strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %88 = getelementptr inbounds double, double* %a, i64 %index.next.13
+ %89 = bitcast double* %88 to <4 x double>*
+ store <4 x double> %87, <4 x double>* %89, align 8
+ %index.next.14 = or i64 %index, 60
+ %90 = shl i64 %index.next.14, 1
+ %91 = getelementptr inbounds double, double* %b, i64 %90
+ %92 = bitcast double* %91 to <8 x double>*
+ %wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8
+ %strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %94 = getelementptr inbounds double, double* %a, i64 %index.next.14
+ %95 = bitcast double* %94 to <4 x double>*
+ store <4 x double> %93, <4 x double>* %95, align 8
+ %index.next.15 = add nsw i64 %index, 64
+ %96 = icmp eq i64 %index.next.15, 1600
+ br i1 %96, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup: ; preds = %vector.body
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
--- /dev/null
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 32
+ ret <4 x double> %r
+; CHECK: qvlfdx
+; CHECK: blr
+}
+
+define <4 x double> @bar(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
+ %s = load <4 x double>, <4 x double>* %b, align 32
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+; CHECK: qvlpcldx
+; CHECK: qvlfdx
+; CHECK: qvfperm
+; CHECK: blr
+}
+
+define <4 x double> @bar1(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
+ %s = load <4 x double>, <4 x double>* %b, align 8
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar2(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
+ %s = load <4 x double>, <4 x double>* %b, align 32
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar3(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
+ %s = load <4 x double>, <4 x double>* %b, align 8
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar4(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
+ %s = load <4 x double>, <4 x double>* %b, align 8
+ %c = getelementptr <4 x double>, <4 x double>* %b, i32 1
+ %t = load <4 x double>, <4 x double>* %c, align 8
+ %u = fadd <4 x double> %r, %s
+ %v = fadd <4 x double> %u, %t
+ ret <4 x double> %v
+}
+
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
define void @test() align 2 {
entry:
ret i64 %cond
}
- attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
-; RUN: llc -verify-machineinstrs < %s -enable-misched -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -enable-misched -enable-aa-sched-mi -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -enable-misched -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -enable-misched -enable-aa-sched-mi -mcpu=a2 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
@aa = external global [256 x [256 x double]], align 32
@bb = external global [256 x [256 x double]], align 32
; CHECK: blr
}
+define <4 x double> @testqv4doubleslt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleult(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublesle(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublesle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleule(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleeq(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleeq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: qvfmr 1, 6
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublesge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublesge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleuge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublesgt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublesgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleugt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublene(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+ ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublene
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: qvfmr 1, 6
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatslt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatult(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatsle(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatsle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatule(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floateq(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floateq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: qvfmr 1, 6
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatsge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatsge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatuge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatsgt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatsgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatugt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatne(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+ ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatne
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: qvfmr 1, 6
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1slt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1slt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ult(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1sle(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1sle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ule(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1eq(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1eq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: qvfmr 1, 6
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1sge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1sge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1uge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1uge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1sgt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1sgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ugt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
+; CHECK: .LBB[[BB1]]:
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB2]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ne(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+ %cmp1 = fcmp oeq float %c3, %c4
+ %cmp3tmp = fcmp oeq float %c1, %c2
+ %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+ %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+ ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ne
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: qvfmr 1, 6
+; CHECK: blr
+}
+
attributes #0 = { nounwind readnone "target-cpu"="pwr7" }
+attributes #1 = { nounwind readnone "target-cpu"="a2q" }
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux < %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s
; Check that llc does not crash due to an illegal APInt operation
ret i32 %call2.i.sink
}
- attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
...
---
ret i32 %call2.i.sink
}
- attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
...
---
ret void
}
-attributes #0 = { norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
}
+define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
+; CHECK-LABEL: test_l_qv4float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 4, 15
+; CHECK-NEXT: qvlpclsx 0, 0, 3
+; CHECK-NEXT: qvlfsx 1, 3, 4
+; CHECK-NEXT: qvlfsx 2, 0, 3
+; CHECK-NEXT: qvfperm 1, 2, 1, 0
+; CHECK-NEXT: blr
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+}
+
+define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
+; CHECK-LABEL: test_l_qv8float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 4, 31
+; CHECK-NEXT: qvlpclsx 1, 0, 3
+; CHECK-NEXT: qvlfsx 0, 3, 4
+; CHECK-NEXT: li 4, 16
+; CHECK-NEXT: qvlfsx 3, 3, 4
+; CHECK-NEXT: qvlfsx 4, 0, 3
+; CHECK-NEXT: qvfperm 2, 3, 0, 1
+; CHECK-NEXT: qvfperm 1, 4, 3, 1
+; CHECK-NEXT: blr
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+}
+
+define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
+; CHECK-LABEL: test_l_qv4double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 4, 31
+; CHECK-NEXT: qvlpcldx 0, 0, 3
+; CHECK-NEXT: qvlfdx 1, 3, 4
+; CHECK-NEXT: qvlfdx 2, 0, 3
+; CHECK-NEXT: qvfperm 1, 2, 1, 0
+; CHECK-NEXT: blr
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+}
+
+define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
+; CHECK-LABEL: test_l_qv8double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 4, 63
+; CHECK-NEXT: qvlpcldx 1, 0, 3
+; CHECK-NEXT: qvlfdx 0, 3, 4
+; CHECK-NEXT: li 4, 32
+; CHECK-NEXT: qvlfdx 3, 3, 4
+; CHECK-NEXT: qvlfdx 4, 0, 3
+; CHECK-NEXT: qvfperm 2, 3, 0, 1
+; CHECK-NEXT: qvfperm 1, 4, 3, 1
+; CHECK-NEXT: blr
+entry:
+ %r = load <8 x double>, <8 x double>* %p, align 8
+ ret <8 x double> %r
+
+}
+
define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
; CHECK-LABEL: test_s_v16i8:
; CHECK: # %bb.0: # %entry
}
+define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
+; CHECK-LABEL: test_s_qv4float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 0, 1, 3
+; CHECK-NEXT: stfs 1, 0(3)
+; CHECK-NEXT: stfs 0, 12(3)
+; CHECK-NEXT: qvesplati 0, 1, 2
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: stfs 0, 8(3)
+; CHECK-NEXT: stfs 1, 4(3)
+; CHECK-NEXT: blr
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+
+}
+
+define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
+; CHECK-LABEL: test_s_qv8float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 0, 2, 3
+; CHECK-NEXT: stfs 2, 16(3)
+; CHECK-NEXT: stfs 0, 28(3)
+; CHECK-NEXT: qvesplati 0, 2, 2
+; CHECK-NEXT: qvesplati 2, 2, 1
+; CHECK-NEXT: stfs 1, 0(3)
+; CHECK-NEXT: stfs 0, 24(3)
+; CHECK-NEXT: qvesplati 0, 1, 3
+; CHECK-NEXT: stfs 2, 20(3)
+; CHECK-NEXT: qvesplati 2, 1, 2
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: stfs 0, 12(3)
+; CHECK-NEXT: stfs 2, 8(3)
+; CHECK-NEXT: stfs 1, 4(3)
+; CHECK-NEXT: blr
+entry:
+ store <8 x float> %v, <8 x float>* %p, align 4
+ ret void
+
+}
+
+define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
+; CHECK-LABEL: test_s_qv4double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 0, 1, 3
+; CHECK-NEXT: stfd 1, 0(3)
+; CHECK-NEXT: stfd 0, 24(3)
+; CHECK-NEXT: qvesplati 0, 1, 2
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: stfd 0, 16(3)
+; CHECK-NEXT: stfd 1, 8(3)
+; CHECK-NEXT: blr
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+
+}
+
+define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
+; CHECK-LABEL: test_s_qv8double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: qvesplati 0, 2, 3
+; CHECK-NEXT: stfd 2, 32(3)
+; CHECK-NEXT: stfd 0, 56(3)
+; CHECK-NEXT: qvesplati 0, 2, 2
+; CHECK-NEXT: qvesplati 2, 2, 1
+; CHECK-NEXT: stfd 1, 0(3)
+; CHECK-NEXT: stfd 0, 48(3)
+; CHECK-NEXT: qvesplati 0, 1, 3
+; CHECK-NEXT: stfd 2, 40(3)
+; CHECK-NEXT: qvesplati 2, 1, 2
+; CHECK-NEXT: qvesplati 1, 1, 1
+; CHECK-NEXT: stfd 0, 24(3)
+; CHECK-NEXT: stfd 2, 16(3)
+; CHECK-NEXT: stfd 1, 8(3)
+; CHECK-NEXT: blr
+entry:
+ store <8 x double> %v, <8 x double>* %p, align 8
+ ret void
+
+}
+
attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind "target-cpu"="a2q" }
attributes #2 = { nounwind "target-cpu"="pwr8" }
declare void @__cxa_call_unexpected(i8*) local_unnamed_addr
-attributes #0 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
; CHECK: store <2 x i64> zeroinitializer, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to <2 x i64>*), align 8
; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
+; Check QPX vector argument.
+define i32 @bar3() "target-features"="+qpx" {
+ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i32 2, <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>)
+ ret i32 %1
+}
+
+; That one is even stranger: the parameter save area starts at offset 48 from
+; (32-byte aligned) stack pointer, the vector parameter is at 96 bytes from
+; the stack pointer, so its offset from parameter save area is misaligned.
+; CHECK-LABEL: @bar3
+; CHECK: store i32 0, i32* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 4) to i32*), align 8
+; CHECK: store i32 0, i32* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 12) to i32*), align 8
+; CHECK: store <4 x i64> zeroinitializer, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 40) to <4 x i64>*), align 8
+; CHECK: store {{.*}} 72, {{.*}} @__msan_va_arg_overflow_size_tls
+
; Check i64 array.
define i32 @bar4() {
%1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2])
--- /dev/null
+# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s
+
+# CHECK: qvfabs 3, 5
+0x10 0x60 0x2a 0x10
+
+# CHECK: qvfadd 3, 4, 5
+0x10 0x64 0x28 0x2a
+
+# CHECK: qvfadds 3, 4, 5
+0x00 0x64 0x28 0x2a
+
+# CHECK: qvfandc 3, 4, 5
+0x10 0x64 0x2a 0x08
+
+# CHECK: qvfand 3, 4, 5
+0x10 0x64 0x28 0x88
+
+# CHECK: qvfcfid 3, 5
+0x10 0x60 0x2e 0x9c
+
+# CHECK: qvfcfids 3, 5
+0x00 0x60 0x2e 0x9c
+
+# CHECK: qvfcfidu 3, 5
+0x10 0x60 0x2f 0x9c
+
+# CHECK: qvfcfidus 3, 5
+0x00 0x60 0x2f 0x9c
+
+# CHECK: qvfclr 3
+0x10 0x63 0x18 0x08
+
+# CHECK: qvfcpsgn 3, 4, 5
+0x10 0x64 0x28 0x10
+
+# CHECK: qvfctfb 3, 4
+0x10 0x64 0x22 0x88
+
+# CHECK: qvfctid 3, 5
+0x10 0x60 0x2e 0x5c
+
+# CHECK: qvfctidu 3, 5
+0x10 0x60 0x2f 0x5c
+
+# CHECK: qvfctiduz 3, 5
+0x10 0x60 0x2f 0x5e
+
+# CHECK: qvfctidz 3, 5
+0x10 0x60 0x2e 0x5e
+
+# CHECK: qvfctiw 3, 5
+0x10 0x60 0x28 0x1c
+
+# CHECK: qvfctiwu 3, 5
+0x10 0x60 0x29 0x1c
+
+# CHECK: qvfctiwuz 3, 5
+0x10 0x60 0x29 0x1e
+
+# CHECK: qvfctiwz 3, 5
+0x10 0x60 0x28 0x1e
+
+# CHECK: qvfequ 3, 4, 5
+0x10 0x64 0x2c 0x88
+
+# CHECK: qvflogical 3, 4, 5, 12
+0x10 0x64 0x2e 0x08
+
+# CHECK: qvfmadd 3, 4, 6, 5
+0x10 0x64 0x29 0xba
+
+# CHECK: qvfmadds 3, 4, 6, 5
+0x00 0x64 0x29 0xba
+
+# CHECK: qvfmr 3, 5
+0x10 0x60 0x28 0x90
+
+# CHECK: qvfmsub 3, 4, 6, 5
+0x10 0x64 0x29 0xb8
+
+# CHECK: qvfmsubs 3, 4, 6, 5
+0x00 0x64 0x29 0xb8
+
+# CHECK: qvfmul 3, 4, 6
+0x10 0x64 0x01 0xb2
+
+# CHECK: qvfmuls 3, 4, 6
+0x00 0x64 0x01 0xb2
+
+# CHECK: qvfnabs 3, 5
+0x10 0x60 0x29 0x10
+
+# CHECK: qvfnand 3, 4, 5
+0x10 0x64 0x2f 0x08
+
+# CHECK: qvfneg 3, 5
+0x10 0x60 0x28 0x50
+
+# CHECK: qvfnmadd 3, 4, 6, 5
+0x10 0x64 0x29 0xbe
+
+# CHECK: qvfnmadds 3, 4, 6, 5
+0x00 0x64 0x29 0xbe
+
+# CHECK: qvfnmsub 3, 4, 6, 5
+0x10 0x64 0x29 0xbc
+
+# CHECK: qvfnmsubs 3, 4, 6, 5
+0x00 0x64 0x29 0xbc
+
+# CHECK: qvfnor 3, 4, 5
+0x10 0x64 0x2c 0x08
+
+# CHECK: qvfnot 3, 4
+0x10 0x64 0x25 0x08
+
+# CHECK: qvforc 3, 4, 5
+0x10 0x64 0x2e 0x88
+
+# CHECK: qvfor 3, 4, 5
+0x10 0x64 0x2b 0x88
+
+# CHECK: qvfperm 3, 4, 5, 6
+0x10 0x64 0x29 0x8c
+
+# CHECK: qvfre 3, 5
+0x10 0x60 0x28 0x30
+
+# CHECK: qvfres 3, 5
+0x00 0x60 0x28 0x30
+
+# CHECK: qvfrim 3, 5
+0x10 0x60 0x2b 0xd0
+
+# CHECK: qvfrin 3, 5
+0x10 0x60 0x2b 0x10
+
+# CHECK: qvfrip 3, 5
+0x10 0x60 0x2b 0x90
+
+# CHECK: qvfriz 3, 5
+0x10 0x60 0x2b 0x50
+
+# CHECK: qvfrsp 3, 5
+0x10 0x60 0x28 0x18
+
+# CHECK: qvfrsqrte 3, 5
+0x10 0x60 0x28 0x34
+
+# CHECK: qvfrsqrtes 3, 5
+0x00 0x60 0x28 0x34
+
+# CHECK: qvfsel 3, 4, 6, 5
+0x10 0x64 0x29 0xae
+
+# CHECK: qvfset 3
+0x10 0x63 0x1f 0x88
+
+# CHECK: qvfsub 3, 4, 5
+0x10 0x64 0x28 0x28
+
+# CHECK: qvfsubs 3, 4, 5
+0x00 0x64 0x28 0x28
+
+# CHECK: qvfxmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x92
+
+# CHECK: qvfxmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x92
+
+# CHECK: qvfxmul 3, 4, 6
+0x10 0x64 0x01 0xa2
+
+# CHECK: qvfxmuls 3, 4, 6
+0x00 0x64 0x01 0xa2
+
+# CHECK: qvfxor 3, 4, 5
+0x10 0x64 0x2b 0x08
+
+# CHECK: qvfxxcpnmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x86
+
+# CHECK: qvfxxcpnmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x86
+
+# CHECK: qvfxxmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x82
+
+# CHECK: qvfxxmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x82
+
+# CHECK: qvfxxnpmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x96
+
+# CHECK: qvfxxnpmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x96
+
+# CHECK: qvlfcduxa 3, 9, 11
+0x7c 0x69 0x58 0xcf
+
+# CHECK: qvlfcdux 3, 9, 11
+0x7c 0x69 0x58 0xce
+
+# CHECK: qvlfcdxa 3, 10, 11
+0x7c 0x6a 0x58 0x8f
+
+# CHECK: qvlfcdx 3, 10, 11
+0x7c 0x6a 0x58 0x8e
+
+# CHECK: qvlfcsuxa 3, 9, 11
+0x7c 0x69 0x58 0x4f
+
+# CHECK: qvlfcsux 3, 9, 11
+0x7c 0x69 0x58 0x4e
+
+# CHECK: qvlfcsxa 3, 10, 11
+0x7c 0x6a 0x58 0x0f
+
+# CHECK: qvlfcsx 3, 10, 11
+0x7c 0x6a 0x58 0x0e
+
+# CHECK: qvlfduxa 3, 9, 11
+0x7c 0x69 0x5c 0xcf
+
+# CHECK: qvlfdux 3, 9, 11
+0x7c 0x69 0x5c 0xce
+
+# CHECK: qvlfdxa 3, 10, 11
+0x7c 0x6a 0x5c 0x8f
+
+# CHECK: qvlfdx 3, 10, 11
+0x7c 0x6a 0x5c 0x8e
+
+# CHECK: qvlfiwaxa 3, 10, 11
+0x7c 0x6a 0x5e 0xcf
+
+# CHECK: qvlfiwax 3, 10, 11
+0x7c 0x6a 0x5e 0xce
+
+# CHECK: qvlfiwzxa 3, 10, 11
+0x7c 0x6a 0x5e 0x8f
+
+# CHECK: qvlfiwzx 3, 10, 11
+0x7c 0x6a 0x5e 0x8e
+
+# CHECK: qvlfsuxa 3, 9, 11
+0x7c 0x69 0x5c 0x4f
+
+# CHECK: qvlfsux 3, 9, 11
+0x7c 0x69 0x5c 0x4e
+
+# CHECK: qvlfsxa 3, 10, 11
+0x7c 0x6a 0x5c 0x0f
+
+# CHECK: qvlfsx 3, 10, 11
+0x7c 0x6a 0x5c 0x0e
+
+# CHECK: qvlpcldx 3, 10, 11
+0x7c 0x6a 0x5c 0x8c
+
+# CHECK: qvlpclsx 3, 10, 11
+0x7c 0x6a 0x5c 0x0c
+
+# CHECK: qvlpcrdx 3, 10, 11
+0x7c 0x6a 0x58 0x8c
+
+# CHECK: qvlpcrsx 3, 10, 11
+0x7c 0x6a 0x58 0x0c
+
+# CHECK: qvstfcduxa 2, 9, 11
+0x7c 0x49 0x59 0xcf
+
+# CHECK: qvstfcduxia 2, 9, 11
+0x7c 0x49 0x59 0xcb
+
+# CHECK: qvstfcduxi 2, 9, 11
+0x7c 0x49 0x59 0xca
+
+# CHECK: qvstfcdux 2, 9, 11
+0x7c 0x49 0x59 0xce
+
+# CHECK: qvstfcdxa 2, 10, 11
+0x7c 0x4a 0x59 0x8f
+
+# CHECK: qvstfcdxia 2, 10, 11
+0x7c 0x4a 0x59 0x8b
+
+# CHECK: qvstfcdxi 2, 10, 11
+0x7c 0x4a 0x59 0x8a
+
+# CHECK: qvstfcdx 2, 10, 11
+0x7c 0x4a 0x59 0x8e
+
+# CHECK: qvstfcsuxa 2, 9, 11
+0x7c 0x49 0x59 0x4f
+
+# CHECK: qvstfcsuxia 2, 9, 11
+0x7c 0x49 0x59 0x4b
+
+# CHECK: qvstfcsuxi 2, 9, 11
+0x7c 0x49 0x59 0x4a
+
+# CHECK: qvstfcsux 2, 9, 11
+0x7c 0x49 0x59 0x4e
+
+# CHECK: qvstfcsxa 2, 10, 11
+0x7c 0x4a 0x59 0x0f
+
+# CHECK: qvstfcsxia 2, 10, 11
+0x7c 0x4a 0x59 0x0b
+
+# CHECK: qvstfcsxi 2, 10, 11
+0x7c 0x4a 0x59 0x0a
+
+# CHECK: qvstfcsx 2, 10, 11
+0x7c 0x4a 0x59 0x0e
+
+# CHECK: qvstfduxa 2, 9, 11
+0x7c 0x49 0x5d 0xcf
+
+# CHECK: qvstfduxia 2, 9, 11
+0x7c 0x49 0x5d 0xcb
+
+# CHECK: qvstfduxi 2, 9, 11
+0x7c 0x49 0x5d 0xca
+
+# CHECK: qvstfdux 2, 9, 11
+0x7c 0x49 0x5d 0xce
+
+# CHECK: qvstfdxa 2, 10, 11
+0x7c 0x4a 0x5d 0x8f
+
+# CHECK: qvstfdxia 2, 10, 11
+0x7c 0x4a 0x5d 0x8b
+
+# CHECK: qvstfdxi 2, 10, 11
+0x7c 0x4a 0x5d 0x8a
+
+# CHECK: qvstfdx 2, 10, 11
+0x7c 0x4a 0x5d 0x8e
+
+# CHECK: qvstfiwxa 2, 10, 11
+0x7c 0x4a 0x5f 0x8f
+
+# CHECK: qvstfiwx 2, 10, 11
+0x7c 0x4a 0x5f 0x8e
+
+# CHECK: qvstfsuxa 2, 9, 11
+0x7c 0x49 0x5d 0x4f
+
+# CHECK: qvstfsuxia 2, 9, 11
+0x7c 0x49 0x5d 0x4b
+
+# CHECK: qvstfsuxi 2, 9, 11
+0x7c 0x49 0x5d 0x4a
+
+# CHECK: qvstfsux 2, 9, 11
+0x7c 0x49 0x5d 0x4e
+
+# CHECK: qvstfsxa 2, 10, 11
+0x7c 0x4a 0x5d 0x0f
+
+# CHECK: qvstfsxia 2, 10, 11
+0x7c 0x4a 0x5d 0x0b
+
+# CHECK: qvstfsxi 2, 10, 11
+0x7c 0x4a 0x5d 0x0a
+
+# CHECK: qvstfsx 2, 10, 11
+0x7c 0x4a 0x5d 0x0e
+
--- /dev/null
+# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s
+
+# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10]
+ qvfabs %q3, %q5
+
+# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10]
+ qvfabs 3, 5
+# CHECK: qvfadd 3, 4, 5 # encoding: [0x10,0x64,0x28,0x2a]
+ qvfadd 3, 4, 5
+# CHECK: qvfadds 3, 4, 5 # encoding: [0x00,0x64,0x28,0x2a]
+ qvfadds 3, 4, 5
+# CHECK: qvfandc 3, 4, 5 # encoding: [0x10,0x64,0x2a,0x08]
+ qvfandc 3, 4, 5
+# CHECK: qvfand 3, 4, 5 # encoding: [0x10,0x64,0x28,0x88]
+ qvfand 3, 4, 5
+# CHECK: qvfcfid 3, 5 # encoding: [0x10,0x60,0x2e,0x9c]
+ qvfcfid 3, 5
+# CHECK: qvfcfids 3, 5 # encoding: [0x00,0x60,0x2e,0x9c]
+ qvfcfids 3, 5
+# CHECK: qvfcfidu 3, 5 # encoding: [0x10,0x60,0x2f,0x9c]
+ qvfcfidu 3, 5
+# CHECK: qvfcfidus 3, 5 # encoding: [0x00,0x60,0x2f,0x9c]
+ qvfcfidus 3, 5
+# CHECK: qvfclr 3 # encoding: [0x10,0x63,0x18,0x08]
+ qvfclr 3
+# CHECK: qvfcpsgn 3, 4, 5 # encoding: [0x10,0x64,0x28,0x10]
+ qvfcpsgn 3, 4, 5
+# CHECK: qvfctfb 3, 4 # encoding: [0x10,0x64,0x22,0x88]
+ qvfctfb 3, 4
+# CHECK: qvfctid 3, 5 # encoding: [0x10,0x60,0x2e,0x5c]
+ qvfctid 3, 5
+# CHECK: qvfctidu 3, 5 # encoding: [0x10,0x60,0x2f,0x5c]
+ qvfctidu 3, 5
+# CHECK: qvfctiduz 3, 5 # encoding: [0x10,0x60,0x2f,0x5e]
+ qvfctiduz 3, 5
+# CHECK: qvfctidz 3, 5 # encoding: [0x10,0x60,0x2e,0x5e]
+ qvfctidz 3, 5
+# CHECK: qvfctiw 3, 5 # encoding: [0x10,0x60,0x28,0x1c]
+ qvfctiw 3, 5
+# CHECK: qvfctiwu 3, 5 # encoding: [0x10,0x60,0x29,0x1c]
+ qvfctiwu 3, 5
+# CHECK: qvfctiwuz 3, 5 # encoding: [0x10,0x60,0x29,0x1e]
+ qvfctiwuz 3, 5
+# CHECK: qvfctiwz 3, 5 # encoding: [0x10,0x60,0x28,0x1e]
+ qvfctiwz 3, 5
+# CHECK: qvfequ 3, 4, 5 # encoding: [0x10,0x64,0x2c,0x88]
+ qvfequ 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 12 # encoding: [0x10,0x64,0x2e,0x08]
+ qvflogical 3, 4, 5, 12
+# CHECK: qvfmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xba]
+ qvfmadd 3, 4, 6, 5
+# CHECK: qvfmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xba]
+ qvfmadds 3, 4, 6, 5
+# CHECK: qvfmr 3, 5 # encoding: [0x10,0x60,0x28,0x90]
+ qvfmr 3, 5
+# CHECK: qvfmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xb8]
+ qvfmsub 3, 4, 6, 5
+# CHECK: qvfmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xb8]
+ qvfmsubs 3, 4, 6, 5
+# CHECK: qvfmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xb2]
+ qvfmul 3, 4, 6
+# CHECK: qvfmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xb2]
+ qvfmuls 3, 4, 6
+# CHECK: qvfnabs 3, 5 # encoding: [0x10,0x60,0x29,0x10]
+ qvfnabs 3, 5
+# CHECK: qvfnand 3, 4, 5 # encoding: [0x10,0x64,0x2f,0x08]
+ qvfnand 3, 4, 5
+# CHECK: qvfneg 3, 5 # encoding: [0x10,0x60,0x28,0x50]
+ qvfneg 3, 5
+# CHECK: qvfnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbe]
+ qvfnmadd 3, 4, 6, 5
+# CHECK: qvfnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbe]
+ qvfnmadds 3, 4, 6, 5
+# CHECK: qvfnmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbc]
+ qvfnmsub 3, 4, 6, 5
+# CHECK: qvfnmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbc]
+ qvfnmsubs 3, 4, 6, 5
+# CHECK: qvfnor 3, 4, 5 # encoding: [0x10,0x64,0x2c,0x08]
+ qvfnor 3, 4, 5
+# CHECK: qvfnot 3, 4 # encoding: [0x10,0x64,0x25,0x08]
+ qvfnot 3, 4
+# CHECK: qvforc 3, 4, 5 # encoding: [0x10,0x64,0x2e,0x88]
+ qvforc 3, 4, 5
+# CHECK: qvfor 3, 4, 5 # encoding: [0x10,0x64,0x2b,0x88]
+ qvfor 3, 4, 5
+# CHECK: qvfperm 3, 4, 5, 6 # encoding: [0x10,0x64,0x29,0x8c]
+ qvfperm 3, 4, 5, 6
+# CHECK: qvfre 3, 5 # encoding: [0x10,0x60,0x28,0x30]
+ qvfre 3, 5
+# CHECK: qvfres 3, 5 # encoding: [0x00,0x60,0x28,0x30]
+ qvfres 3, 5
+# CHECK: qvfrim 3, 5 # encoding: [0x10,0x60,0x2b,0xd0]
+ qvfrim 3, 5
+# CHECK: qvfrin 3, 5 # encoding: [0x10,0x60,0x2b,0x10]
+ qvfrin 3, 5
+# CHECK: qvfrip 3, 5 # encoding: [0x10,0x60,0x2b,0x90]
+ qvfrip 3, 5
+# CHECK: qvfriz 3, 5 # encoding: [0x10,0x60,0x2b,0x50]
+ qvfriz 3, 5
+# CHECK: qvfrsp 3, 5 # encoding: [0x10,0x60,0x28,0x18]
+ qvfrsp 3, 5
+# CHECK: qvfrsqrte 3, 5 # encoding: [0x10,0x60,0x28,0x34]
+ qvfrsqrte 3, 5
+# CHECK: qvfrsqrtes 3, 5 # encoding: [0x00,0x60,0x28,0x34]
+ qvfrsqrtes 3, 5
+# CHECK: qvfsel 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xae]
+ qvfsel 3, 4, 6, 5
+# CHECK: qvfset 3 # encoding: [0x10,0x63,0x1f,0x88]
+ qvfset 3
+# CHECK: qvfsub 3, 4, 5 # encoding: [0x10,0x64,0x28,0x28]
+ qvfsub 3, 4, 5
+# CHECK: qvfsubs 3, 4, 5 # encoding: [0x00,0x64,0x28,0x28]
+ qvfsubs 3, 4, 5
+# CHECK: qvfxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x92]
+ qvfxmadd 3, 4, 6, 5
+# CHECK: qvfxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x92]
+ qvfxmadds 3, 4, 6, 5
+# CHECK: qvfxmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xa2]
+ qvfxmul 3, 4, 6
+# CHECK: qvfxmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xa2]
+ qvfxmuls 3, 4, 6
+# CHECK: qvfxor 3, 4, 5 # encoding: [0x10,0x64,0x2b,0x08]
+ qvfxor 3, 4, 5
+# CHECK: qvfxxcpnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x86]
+ qvfxxcpnmadd 3, 4, 6, 5
+# CHECK: qvfxxcpnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x86]
+ qvfxxcpnmadds 3, 4, 6, 5
+# CHECK: qvfxxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x82]
+ qvfxxmadd 3, 4, 6, 5
+# CHECK: qvfxxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x82]
+ qvfxxmadds 3, 4, 6, 5
+# CHECK: qvfxxnpmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x96]
+ qvfxxnpmadd 3, 4, 6, 5
+# CHECK: qvfxxnpmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x96]
+ qvfxxnpmadds 3, 4, 6, 5
+# CHECK: qvlfcduxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xcf]
+ qvlfcduxa 3, 9, 11
+# CHECK: qvlfcdux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xce]
+ qvlfcdux 3, 9, 11
+# CHECK: qvlfcdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8f]
+ qvlfcdxa 3, 10, 11
+# CHECK: qvlfcdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8e]
+ qvlfcdx 3, 10, 11
+# CHECK: qvlfcsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4f]
+ qvlfcsuxa 3, 9, 11
+# CHECK: qvlfcsux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4e]
+ qvlfcsux 3, 9, 11
+# CHECK: qvlfcsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0f]
+ qvlfcsxa 3, 10, 11
+# CHECK: qvlfcsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0e]
+ qvlfcsx 3, 10, 11
+# CHECK: qvlfduxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xcf]
+ qvlfduxa 3, 9, 11
+# CHECK: qvlfdux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xce]
+ qvlfdux 3, 9, 11
+# CHECK: qvlfdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8f]
+ qvlfdxa 3, 10, 11
+# CHECK: qvlfdx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8e]
+ qvlfdx 3, 10, 11
+# CHECK: qvlfiwaxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xcf]
+ qvlfiwaxa 3, 10, 11
+# CHECK: qvlfiwax 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xce]
+ qvlfiwax 3, 10, 11
+# CHECK: qvlfiwzxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8f]
+ qvlfiwzxa 3, 10, 11
+# CHECK: qvlfiwzx 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8e]
+ qvlfiwzx 3, 10, 11
+# CHECK: qvlfsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4f]
+ qvlfsuxa 3, 9, 11
+# CHECK: qvlfsux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4e]
+ qvlfsux 3, 9, 11
+# CHECK: qvlfsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0f]
+ qvlfsxa 3, 10, 11
+# CHECK: qvlfsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0e]
+ qvlfsx 3, 10, 11
+# CHECK: qvlpcldx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8c]
+ qvlpcldx 3, 10, 11
+# CHECK: qvlpclsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0c]
+ qvlpclsx 3, 10, 11
+# CHECK: qvlpcrdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8c]
+ qvlpcrdx 3, 10, 11
+# CHECK: qvlpcrsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0c]
+ qvlpcrsx 3, 10, 11
+# CHECK: qvstfcduxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcf]
+ qvstfcduxa 2, 9, 11
+# CHECK: qvstfcduxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcb]
+ qvstfcduxia 2, 9, 11
+# CHECK: qvstfcduxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xca]
+ qvstfcduxi 2, 9, 11
+# CHECK: qvstfcdux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xce]
+ qvstfcdux 2, 9, 11
+# CHECK: qvstfcdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8f]
+ qvstfcdxa 2, 10, 11
+# CHECK: qvstfcdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8b]
+ qvstfcdxia 2, 10, 11
+# CHECK: qvstfcdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8a]
+ qvstfcdxi 2, 10, 11
+# CHECK: qvstfcdx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8e]
+ qvstfcdx 2, 10, 11
+# CHECK: qvstfcsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4f]
+ qvstfcsuxa 2, 9, 11
+# CHECK: qvstfcsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4b]
+ qvstfcsuxia 2, 9, 11
+# CHECK: qvstfcsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4a]
+ qvstfcsuxi 2, 9, 11
+# CHECK: qvstfcsux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4e]
+ qvstfcsux 2, 9, 11
+# CHECK: qvstfcsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0f]
+ qvstfcsxa 2, 10, 11
+# CHECK: qvstfcsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0b]
+ qvstfcsxia 2, 10, 11
+# CHECK: qvstfcsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0a]
+ qvstfcsxi 2, 10, 11
+# CHECK: qvstfcsx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0e]
+ qvstfcsx 2, 10, 11
+# CHECK: qvstfduxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcf]
+ qvstfduxa 2, 9, 11
+# CHECK: qvstfduxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcb]
+ qvstfduxia 2, 9, 11
+# CHECK: qvstfduxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xca]
+ qvstfduxi 2, 9, 11
+# CHECK: qvstfdux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xce]
+ qvstfdux 2, 9, 11
+# CHECK: qvstfdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8f]
+ qvstfdxa 2, 10, 11
+# CHECK: qvstfdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8b]
+ qvstfdxia 2, 10, 11
+# CHECK: qvstfdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8a]
+ qvstfdxi 2, 10, 11
+# CHECK: qvstfdx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8e]
+ qvstfdx 2, 10, 11
+# CHECK: qvstfiwxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8f]
+ qvstfiwxa 2, 10, 11
+# CHECK: qvstfiwx 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8e]
+ qvstfiwx 2, 10, 11
+# CHECK: qvstfsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4f]
+ qvstfsuxa 2, 9, 11
+# CHECK: qvstfsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4b]
+ qvstfsuxia 2, 9, 11
+# CHECK: qvstfsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4a]
+ qvstfsuxi 2, 9, 11
+# CHECK: qvstfsux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4e]
+ qvstfsux 2, 9, 11
+# CHECK: qvstfsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0f]
+ qvstfsxa 2, 10, 11
+# CHECK: qvstfsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0b]
+ qvstfsxia 2, 10, 11
+# CHECK: qvstfsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0a]
+ qvstfsxi 2, 10, 11
+# CHECK: qvstfsx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0e]
+ qvstfsx 2, 10, 11
+
; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
define void @test(i32 signext %n) {
; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -dse -enable-dse-memoryssa -enable-dse-partial-store-merging=false < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%"struct.std::complex" = type { { float, float } }
; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
%"struct.std::complex" = type { { float, float } }
; RUN: opt -passes="function(ee-instrument),function(ee-instrument),cgscc(inline),function(post-inline-ee-instrument),function(post-inline-ee-instrument)" -S < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
define void @leaf_function() #0 {
entry:
--- /dev/null
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.ppc.qpx.qvlfs(i8*) #1
+
+define <4 x double> @test1(<4 x float>* %h) #0 {
+entry:
+ %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+ %hv = bitcast <4 x float>* %h1 to i8*
+ %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
+
+; CHECK-LABEL: @test1
+; CHECK: @llvm.ppc.qpx.qvlfs
+; CHECK: ret <4 x double>
+
+ %v0 = load <4 x float>, <4 x float>* %h, align 8
+ %v0e = fpext <4 x float> %v0 to <4 x double>
+ %a = fadd <4 x double> %v0e, %vl
+ ret <4 x double> %a
+}
+
+define <4 x double> @test1a(<4 x float>* align 16 %h) #0 {
+entry:
+ %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+ %hv = bitcast <4 x float>* %h1 to i8*
+ %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
+
+; CHECK-LABEL: @test1a
+; CHECK-NOT: @llvm.ppc.qpx.qvlfs
+; CHECK-NOT: load <4 x double>
+; CHECK: ret <4 x double>
+
+ %v0 = load <4 x float>, <4 x float>* %h, align 8
+ %v0e = fpext <4 x float> %v0 to <4 x double>
+ %a = fadd <4 x double> %v0e, %vl
+ ret <4 x double> %a
+}
+
+declare void @llvm.ppc.qpx.qvstfs(<4 x double>, i8*) #0
+
+define <4 x float> @test2(<4 x float>* %h, <4 x double> %d) #0 {
+entry:
+ %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+ %hv = bitcast <4 x float>* %h1 to i8*
+ call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
+
+ %v0 = load <4 x float>, <4 x float>* %h, align 8
+ ret <4 x float> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: @llvm.ppc.qpx.qvstfs
+; CHECK: ret <4 x float>
+}
+
+define <4 x float> @test2a(<4 x float>* align 16 %h, <4 x double> %d) #0 {
+entry:
+ %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+ %hv = bitcast <4 x float>* %h1 to i8*
+ call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
+
+ %v0 = load <4 x float>, <4 x float>* %h, align 8
+ ret <4 x float> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: fptrunc <4 x double> %d to <4 x float>
+; CHECK-NOT: @llvm.ppc.qpx.qvstfs
+; CHECK-NOT: store <4 x double>
+; CHECK: ret <4 x float>
+}
+
+declare <4 x double> @llvm.ppc.qpx.qvlfd(i8*) #1
+
+define <4 x double> @test1l(<4 x double>* %h) #0 {
+entry:
+ %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+ %hv = bitcast <4 x double>* %h1 to i8*
+ %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1l
+; CHECK: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+ %v0 = load <4 x double>, <4 x double>* %h, align 8
+ %a = fadd <4 x double> %v0, %vl
+ ret <4 x double> %a
+}
+
+define <4 x double> @test1ln(<4 x double>* align 16 %h) #0 {
+entry:
+ %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+ %hv = bitcast <4 x double>* %h1 to i8*
+ %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1ln
+; CHECK: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+ %v0 = load <4 x double>, <4 x double>* %h, align 8
+ %a = fadd <4 x double> %v0, %vl
+ ret <4 x double> %a
+}
+
+define <4 x double> @test1la(<4 x double>* align 32 %h) #0 {
+entry:
+ %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+ %hv = bitcast <4 x double>* %h1 to i8*
+ %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1la
+; CHECK-NOT: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+ %v0 = load <4 x double>, <4 x double>* %h, align 8
+ %a = fadd <4 x double> %v0, %vl
+ ret <4 x double> %a
+}
+
+declare void @llvm.ppc.qpx.qvstfd(<4 x double>, i8*) #0
+
+define <4 x double> @test2l(<4 x double>* %h, <4 x double> %d) #0 {
+entry:
+ %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+ %hv = bitcast <4 x double>* %h1 to i8*
+ call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+ %v0 = load <4 x double>, <4 x double>* %h, align 8
+ ret <4 x double> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+define <4 x double> @test2ln(<4 x double>* align 16 %h, <4 x double> %d) #0 {
+entry:
+ %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+ %hv = bitcast <4 x double>* %h1 to i8*
+ call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+ %v0 = load <4 x double>, <4 x double>* %h, align 8
+ ret <4 x double> %v0
+
+; CHECK-LABEL: @test2ln
+; CHECK: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+define <4 x double> @test2la(<4 x double>* align 32 %h, <4 x double> %d) #0 {
+entry:
+ %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+ %hv = bitcast <4 x double>* %h1 to i8*
+ call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+ %v0 = load <4 x double>, <4 x double>* %h, align 8
+ ret <4 x double> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK-NOT: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
-; RUN: opt -mcpu=a2 -loop-data-prefetch -mtriple=powerpc64le-unknown-linux -enable-ppc-prefetching -S < %s | FileCheck %s
-; RUN: opt -mcpu=a2 -passes=loop-data-prefetch -mtriple=powerpc64le-unknown-linux -enable-ppc-prefetching -S < %s | FileCheck %s
+; RUN: opt -mcpu=a2 -loop-data-prefetch -S < %s | FileCheck %s
+; RUN: opt -mcpu=a2 -passes=loop-data-prefetch -S < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
define void @foo(double* nocapture %a, double* nocapture readonly %b) {
entry:
; RUN: opt -loop-simplify -S %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
define fastcc void @do_update_md([3 x float]* nocapture readonly %x) #0 {
entry:
; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
define void @test1() nounwind {
; Ensure that we don't crash when the trip count == -1.
--- /dev/null
+; RUN: opt -S -basic-aa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 8
+ %mul = fmul double %0, 2.000000e+00
+ %mul3 = fmul double %0, %mul
+ %arrayidx5 = getelementptr inbounds double, double* %c, i64 %indvars.iv
+ %1 = load double, double* %arrayidx5, align 8
+ %mul6 = fmul double %1, 3.000000e+00
+ %mul9 = fmul double %1, %mul6
+ %add = fadd double %mul3, %mul9
+ %mul12 = fmul double %0, 4.000000e+00
+ %mul15 = fmul double %mul12, %1
+ %add16 = fadd double %mul15, %add
+ %add17 = fadd double %add16, 1.000000e+00
+ %arrayidx19 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add17, double* %arrayidx19, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
; RUN: opt -S -loop-vectorize < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
; Function Attrs: nounwind
define zeroext i32 @test() #0 {
; Function Attrs: nounwind
declare void @llvm.va_end(i8*) #2
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
; RUN: opt -S -ipsccp < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux"
+target triple = "powerpc64-bgq-linux"
define void @test(i32 signext %n) {
EXPECT_EQ(Triple::Linux, T.getOS());
EXPECT_EQ(Triple::Musl, T.getEnvironment());
+ T = Triple("powerpc-bgp-linux");
+ EXPECT_EQ(Triple::ppc, T.getArch());
+ EXPECT_EQ(Triple::BGP, T.getVendor());
+ EXPECT_EQ(Triple::Linux, T.getOS());
+ EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+ T = Triple("powerpc-bgp-cnk");
+ EXPECT_EQ(Triple::ppc, T.getArch());
+ EXPECT_EQ(Triple::BGP, T.getVendor());
+ EXPECT_EQ(Triple::CNK, T.getOS());
+ EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+ T = Triple("ppc-bgp-linux");
+ EXPECT_EQ(Triple::ppc, T.getArch());
+ EXPECT_EQ(Triple::BGP, T.getVendor());
+ EXPECT_EQ(Triple::Linux, T.getOS());
+ EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+ T = Triple("ppc32-bgp-linux");
+ EXPECT_EQ(Triple::ppc, T.getArch());
+ EXPECT_EQ(Triple::BGP, T.getVendor());
+ EXPECT_EQ(Triple::Linux, T.getOS());
+ EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+ T = Triple("powerpc64-bgq-linux");
+ EXPECT_EQ(Triple::ppc64, T.getArch());
+ EXPECT_EQ(Triple::BGQ, T.getVendor());
+ EXPECT_EQ(Triple::Linux, T.getOS());
+ EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+ T = Triple("ppc64-bgq-linux");
+ EXPECT_EQ(Triple::ppc64, T.getArch());
+ EXPECT_EQ(Triple::BGQ, T.getVendor());
+ EXPECT_EQ(Triple::Linux, T.getOS());
+
T = Triple("powerpc-ibm-aix");
EXPECT_EQ(Triple::ppc, T.getArch());
EXPECT_EQ(Triple::IBM, T.getVendor());
"PPCMachineScheduler.cpp",
"PPCMacroFusion.cpp",
"PPCPreEmitPeephole.cpp",
+ "PPCQPXLoadSplat.cpp",
"PPCReduceCRLogicals.cpp",
"PPCRegisterInfo.cpp",
"PPCSubtarget.cpp",
#if KMP_OS_WINDOWS
#define KMP_INIT_WAIT 64U /* initial number of spin-tests */
#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
+#elif KMP_OS_CNK
+#define KMP_INIT_WAIT 16U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
#elif KMP_OS_LINUX
#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
// Nothing to see here move along
#elif KMP_ARCH_PPC64
// Nothing needed here (we have a real MB above).
+#if KMP_OS_CNK
+ // The flushing thread needs to yield here; this prevents a
+ // busy-waiting thread from saturating the pipeline. flush is
+ // often used in loops like this:
+ // while (!flag) {
+ // #pragma omp flush(flag)
+ // }
+ // and adding the yield here is good for at least a 10x speedup
+ // when running >2 threads per core (on the NAS LU benchmark).
+ __kmp_yield();
+#endif
#else
#error Unknown or unsupported architecture
#endif
#define KMP_LOCK_ACQUIRED_NEXT 0
#ifndef KMP_USE_FUTEX
#define KMP_USE_FUTEX \
- (KMP_OS_LINUX && \
+ (KMP_OS_LINUX && !KMP_OS_CNK && \
(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
#endif
#if KMP_USE_FUTEX
#error Unknown compiler
#endif
-#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD)
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_CNK
#define KMP_AFFINITY_SUPPORTED 1
#if KMP_OS_WINDOWS && KMP_ARCH_X86_64
#define KMP_GROUP_AFFINITY 1
#define KMP_OS_OPENBSD 0
#define KMP_OS_DARWIN 0
#define KMP_OS_WINDOWS 0
+#define KMP_OS_CNK 0
#define KMP_OS_HURD 0
#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */
#define KMP_OS_OPENBSD 1
#endif
+#if (defined __bgq__)
+#undef KMP_OS_CNK
+#define KMP_OS_CNK 1
+#endif
+
#if (defined __GNU__)
#undef KMP_OS_HURD
#define KMP_OS_HURD 1
add 12, 0, 12
neg 12, 12
-// We need to make sure that the stack frame stays aligned (to 16 bytes).
+// We need to make sure that the stack frame stays aligned (to 16 bytes, except
+// under the BG/Q CNK, where it must be to 32 bytes).
+# if KMP_OS_CNK
+ li 0, -32
+# else
li 0, -16
+# endif
and 12, 0, 12
// Establish the local stack frame.
#include <sys/times.h>
#include <unistd.h>
-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK
#include <sys/sysinfo.h>
#if KMP_USE_FUTEX
// We should really include <futex.h>, but that causes compatibility problems on
os=
basic_machine=$1
;;
+ -bluegene*)
+ os=-cnk
+ ;;
-sim | -cisco | -oki | -wec | -winbond)
os=
basic_machine=$1
basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'`
os=-linux
;;
+ bluegene*)
+ basic_machine=powerpc-ibm
+ os=-cnk
+ ;;
c54x-*)
basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
;;
# Each alternative MUST end in a * to match a version number.
# -sysv* is not here because it comes later, after sysvr4.
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
| -sym* | -kopensolaris* | -plan9* \
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-sunos*)
vendor=sun
;;
- -aix*)
+ -cnk*|-aix*)
vendor=ibm
;;
-beos*)
os=
basic_machine=$1
;;
+ -bluegene*)
+ os=-cnk
+ ;;
-sim | -cisco | -oki | -wec | -winbond)
os=
basic_machine=$1
basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
os=-linux
;;
+ bluegene*)
+ basic_machine=powerpc-ibm
+ os=-cnk
+ ;;
c54x-*)
basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
# Each alternative MUST END IN A *, to match a version number.
# -sysv* is not here because it comes later, after sysvr4.
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
| -sym* | -kopensolaris* \
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-sunos*)
vendor=sun
;;
- -aix*)
+ -cnk*|-aix*)
vendor=ibm
;;
-beos*)