unsigned LoReg, HiReg;
bool IsSigned = Opcode == ISD::SMUL_LOHI;
bool UseMULX = !IsSigned && Subtarget->hasBMI2();
+ bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32:
- Opc = UseMULX ? X86::MULX32rr :
+ Opc = UseMULXHi ? X86::MULX32Hrr :
+ UseMULX ? X86::MULX32rr :
IsSigned ? X86::IMUL32r : X86::MUL32r;
- MOpc = UseMULX ? X86::MULX32rm :
+ MOpc = UseMULXHi ? X86::MULX32Hrm :
+ UseMULX ? X86::MULX32rm :
IsSigned ? X86::IMUL32m : X86::MUL32m;
LoReg = UseMULX ? X86::EDX : X86::EAX;
HiReg = X86::EDX;
break;
case MVT::i64:
- Opc = UseMULX ? X86::MULX64rr :
+ Opc = UseMULXHi ? X86::MULX64Hrr :
+ UseMULX ? X86::MULX64rr :
IsSigned ? X86::IMUL64r : X86::MUL64r;
- MOpc = UseMULX ? X86::MULX64rm :
+ MOpc = UseMULXHi ? X86::MULX64Hrm :
+ UseMULX ? X86::MULX64rm :
IsSigned ? X86::IMUL64m : X86::MUL64m;
LoReg = UseMULX ? X86::RDX : X86::RAX;
HiReg = X86::RDX;
MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- if (UseMULX) {
+ if (UseMULXHi) {
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ ResHi = SDValue(CNode, 0);
+ Chain = SDValue(CNode, 1);
+ } else if (UseMULX) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
SDValue Ops[] = { N1, InFlag };
- if (UseMULX) {
+ if (UseMULXHi) {
+ SDVTList VTs = CurDAG->getVTList(NVT);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ ResHi = SDValue(CNode, 0);
+ } else if (UseMULX) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+
[]>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
+
+ // Pseudo instructions to be used when the low result isn't used. The
+ // instruction is defined to keep the high if both destinations are the same.
+ def Hrr : PseudoI<(outs RC:$dst), (ins RC:$src),
+ []>, Sched<[sched]>;
+
+ let mayLoad = 1 in
+ def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
+ []>, Sched<[sched.Folded]>;
}
}
"LEA has segment specified!");
break;
+ case X86::MULX32Hrr:
+ case X86::MULX32Hrm:
+ case X86::MULX64Hrr:
+ case X86::MULX64Hrm: {
+ // Turn into regular MULX by duplicating the destination.
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
+ case X86::MULX32Hrm: NewOpc = X86::MULX32rr; break;
+ case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
+ case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ // Duplicate the destination.
+ unsigned DestReg = OutMI.getOperand(0).getReg();
+ OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
+ break;
+ }
+
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
// if one of the registers is extended, but other isn't.
case X86::VMOVZPQILo2PQIrr:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O3-CUR-NEXT: mulxq %rax, %rcx, %rax
+; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax
; CHECK-O3-CUR-NEXT: shrq $3, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_udiv1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-O3-EX-NEXT: mulxq (%rdi), %rcx, %rax
+; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
; CHECK-O3-EX-NEXT: shrq $3, %rax
; CHECK-O3-EX-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
; CHECK-O3-NEXT: movq %rax, %rdx
-; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rdx
-; CHECK-O3-NEXT: shrq $3, %rdx
-; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rcx
+; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rcx
+; CHECK-O3-NEXT: shrq $3, %rcx
+; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rcx
; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx
; CHECK-O3-NEXT: subq %rcx, %rax
; CHECK-O3-NEXT: retq
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdx
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O0-NEXT: mulxq %rax, %rcx, %rax
+; CHECK-O0-NEXT: mulxq %rax, %rax, %rax
; CHECK-O0-NEXT: shrq $3, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rcx
-; CHECK-O3-CUR-NEXT: shrq $3, %rcx
-; CHECK-O3-CUR-NEXT: movq %rcx, (%rdi)
+; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax
+; CHECK-O3-CUR-NEXT: shrq $3, %rax
+; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_udiv1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rcx
-; CHECK-O3-EX-NEXT: shrq $3, %rcx
-; CHECK-O3-EX-NEXT: movq %rcx, (%rdi)
+; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
+; CHECK-O3-EX-NEXT: shrq $3, %rax
+; CHECK-O3-EX-NEXT: movq %rax, (%rdi)
; CHECK-O3-EX-NEXT: retq
%prev = load atomic i64, i64* %p unordered, align 8
%val = udiv i64 %prev, 15
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
; CHECK-O0-NEXT: movq %rax, %rdx
-; CHECK-O0-NEXT: mulxq %rcx, %rdx, %rcx
+; CHECK-O0-NEXT: mulxq %rcx, %rcx, %rcx
; CHECK-O0-NEXT: shrq $3, %rcx
; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx
; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rdx
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O3-NEXT: mulxq %rax, %rax, %rcx
-; CHECK-O3-NEXT: shrq $3, %rcx
-; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rax
+; CHECK-O3-NEXT: mulxq %rax, %rax, %rax
+; CHECK-O3-NEXT: shrq $3, %rax
+; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
; CHECK-O3-NEXT: subq %rax, %rdx
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI-NEXT: movl %eax, %edx
-; X86-BMI-NEXT: mulxl %esi, %edx, %ebx
+; X86-BMI-NEXT: mulxl %esi, %ebx, %ebx
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: mulxl %esi, %esi, %ebp
; X86-BMI-NEXT: addl %ebx, %esi
; X64-BMI-LABEL: foo:
; X64-BMI: # %bb.0:
; X64-BMI-NEXT: movq %rdi, %rdx
-; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax
+; X64-BMI-NEXT: mulxq %rsi, %rax, %rax
; X64-BMI-NEXT: retq
%tmp0 = zext i64 %x to i128
%tmp1 = zext i64 %y to i128
; HSW: # %bb.0: # %bb
; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
; HSW-NEXT: movq %rdi, %rdx
-; HSW-NEXT: mulxq %rax, %rax, %rcx
-; HSW-NEXT: shrq $42, %rcx
-; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
+; HSW-NEXT: mulxq %rax, %rax, %rax
+; HSW-NEXT: shrq $42, %rax
+; HSW-NEXT: imulq $281474977, %rax, %rax # imm = 0x10C6F7A1
; HSW-NEXT: shrq $20, %rax
; HSW-NEXT: leal (%rax,%rax,4), %eax
; HSW-NEXT: addl $5, %eax
; ZN: # %bb.0: # %bb
; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
; ZN-NEXT: movq %rdi, %rdx
-; ZN-NEXT: mulxq %rax, %rax, %rcx
-; ZN-NEXT: shrq $42, %rcx
-; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
+; ZN-NEXT: mulxq %rax, %rax, %rax
+; ZN-NEXT: shrq $42, %rax
+; ZN-NEXT: imulq $281474977, %rax, %rax # imm = 0x10C6F7A1
; ZN-NEXT: shrq $20, %rax
; ZN-NEXT: leal 5(%rax,%rax,4), %eax
; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF