According to Power ISA V3.0 document, the first source operand of mtvsrdd is constant 0 if r0 is specified. So the corresponding register constraint should be g8rc_nox0.
This bug caused wrong output generated by 401.bzip2 when -mcpu=power9 and fdo are specified.
Differential Revision: https://reviews.llvm.org/D32880
llvm-svn: 302834
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
+static const unsigned G80Regs[] = {
+ PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
static const unsigned QFRegs[] = {
PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
return decodeRegisterClass(Inst, RegNo, G8Regs);
}
+static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G80Regs);
+}
+
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrws $XT, $rA", IIC_VecGeneral, []>;
- def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
+ def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
"mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
[]>, Requires<[In64BitMode]>;
--- /dev/null
+; RUN: llc -mcpu=pwr9 -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \
+; RUN: < %s | FileCheck %s
+
+; This test case checks r0 is used as constant 0 in instruction mtvsrdd.
+
+define <2 x i64> @const0(i64 %a) {
+ %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
+ %vecinit1 = insertelement <2 x i64> %vecinit, i64 0, i32 1
+ ret <2 x i64> %vecinit1
+; CHECK-LABEL: const0
+; CHECK: mtvsrdd v2, 0, r3
+}
+
+define <2 x i64> @noconst0(i64* %a, i64* %b) {
+ %1 = load i64, i64* %a, align 8
+ %2 = load i64, i64* %b, align 8
+ %vecinit = insertelement <2 x i64> undef, i64 %2, i32 0
+ %vecinit1 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
+ ret <2 x i64> %vecinit1
+; CHECK-LABEL: noconst0
+; CHECK: mtvsrdd v2, {{r[0-9]+}}, {{r[0-9]+}}
+}
--- /dev/null
+# RUN: llvm-mc --disassemble %s -triple powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck %s
+
+# CHECK: mtvsrdd 6, 0, 3
+0x66 0x1b 0xc0 0x7c