SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- unsigned LoReg, Opc;
+ unsigned LoReg, ROpc, MOpc;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL;
- Opc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+ ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+ MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX;
+ ROpc = X86::MUL16r;
+ MOpc = X86::MUL16m;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX;
+ ROpc = X86::MUL32r;
+ MOpc = X86::MUL32m;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX;
+ ROpc = X86::MUL64r;
+ MOpc = X86::MUL64m;
break;
- case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
- case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
- case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!FoldedLoad) {
+ FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (FoldedLoad)
+ std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
- // i16/i32/i64 use an instruction that produces a low and high result even
- // though only the low result is used.
- SDVTList VTs;
- if (NVT == MVT::i8)
- VTs = CurDAG->getVTList(NVT, MVT::i32);
- else
- VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ MachineSDNode *CNode;
+ if (FoldedLoad) {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
+
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
+ } else {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+
+ CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
+ }
- SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
CurDAG->RemoveDeadNode(Node);
define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
; SDAG-LABEL: smuloi8_load:
; SDAG: ## %bb.0:
-; SDAG-NEXT: movb (%rdi), %al
-; SDAG-NEXT: imulb %sil
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: ## kill: def $al killed $al killed $eax
+; SDAG-NEXT: imulb (%rdi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
; SDAG-LABEL: smuloi8_load2:
; SDAG: ## %bb.0:
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movb (%rsi), %cl
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
-; SDAG-NEXT: imulb %cl
+; SDAG-NEXT: imulb (%rsi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
; SDAG-LABEL: umuloi8_load:
; SDAG: ## %bb.0:
-; SDAG-NEXT: movb (%rdi), %al
-; SDAG-NEXT: mulb %sil
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: ## kill: def $al killed $al killed $eax
+; SDAG-NEXT: mulb (%rdi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
; SDAG-LABEL: umuloi8_load2:
; SDAG: ## %bb.0:
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movb (%rsi), %cl
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
-; SDAG-NEXT: mulb %cl
+; SDAG-NEXT: mulb (%rsi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
; SDAG-LABEL: umuloi16_load:
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
-; SDAG-NEXT: movzwl (%rdi), %eax
-; SDAG-NEXT: mulw %si
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
+; SDAG-NEXT: mulw (%rdi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movw %ax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movzwl (%rsi), %edx
; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
-; SDAG-NEXT: mulw %dx
+; SDAG-NEXT: mulw (%rsi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movw %ax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
; SDAG-LABEL: umuloi32_load:
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
-; SDAG-NEXT: movl (%rdi), %eax
-; SDAG-NEXT: mull %esi
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: mull (%rdi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movl %eax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movl (%rsi), %edx
-; SDAG-NEXT: mull %edx
+; SDAG-NEXT: mull (%rsi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movl %eax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
; SDAG-LABEL: umuloi64_load:
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
-; SDAG-NEXT: movq (%rdi), %rax
-; SDAG-NEXT: mulq %rsi
+; SDAG-NEXT: movq %rsi, %rax
+; SDAG-NEXT: mulq (%rdi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movq %rax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
; SDAG-NEXT: movq %rdi, %rax
-; SDAG-NEXT: movq (%rsi), %rdx
-; SDAG-NEXT: mulq %rdx
+; SDAG-NEXT: mulq (%rsi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movq %rax, (%rcx)
; SDAG-NEXT: movl %edx, %eax