ShAmt1 = ShAmt1.getOperand(0);
SDLoc DL(N);
- unsigned Opc = X86ISD::SHLD;
+ unsigned Opc = ISD::FSHL;
SDValue Op0 = N0.getOperand(0);
SDValue Op1 = N1.getOperand(0);
- if (ShAmt0.getOpcode() == ISD::SUB ||
- ShAmt0.getOpcode() == ISD::XOR) {
- Opc = X86ISD::SHRD;
+ if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) {
+ Opc = ISD::FSHR;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
std::swap(ShMsk0, ShMsk1);
}
- // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
- // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
- // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
- // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
- // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C )
- // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C )
+ auto GetFunnelShift = [&DAG, &DL, VT, Opc](SDValue Op0, SDValue Op1,
+ SDValue Amt) {
+ if (Opc == ISD::FSHR)
+ std::swap(Op0, Op1);
+ return DAG.getNode(Opc, DL, VT, Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Amt));
+ };
+
+ // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> FSHL( X, Y, C )
+ // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> FSHR( Y, X, C )
+ // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHL( X, Y, C )
+ // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHR( Y, X, C )
+ // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> FSHL( X, Y, C )
+ // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> FSHR( Y, X, C )
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
if ((SumC->getAPIntValue() == Bits ||
(SumC->getAPIntValue() == 0 && ShMsk1)) &&
ShAmt1Op1 == ShAmt0)
- return DAG.getNode(Opc, DL, VT, Op0, Op1,
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1, ShAmt0);
}
} else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
- return DAG.getNode(Opc, DL, VT,
- N0.getOperand(0), N1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL,
- MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1, ShAmt0);
} else if (ShAmt1.getOpcode() == ISD::XOR) {
SDValue Mask = ShAmt1.getOperand(1);
if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
- unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
+ unsigned InnerShift = (ISD::FSHL == Opc ? ISD::SRL : ISD::SHL);
SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
ShAmt1Op0 = ShAmt1Op0.getOperand(0);
if (Op1.getOpcode() == InnerShift &&
isa<ConstantSDNode>(Op1.getOperand(1)) &&
Op1.getConstantOperandVal(1) == 1) {
- return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
}
// Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
Op1.getOperand(0) == Op1.getOperand(1)) {
- return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
}
}
}
define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind {
; X32-LABEL: bar:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: andb $15, %cl
; X32-NEXT: shldw %cl, %dx, %ax
; X32-NEXT: retl
;
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andb $15, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shldw %cl, %di, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind {
; X32-LABEL: bu:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: andb $15, %cl
; X32-NEXT: shrdw %cl, %dx, %ax
; X32-NEXT: retl
;
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andb $15, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrdw %cl, %di, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind {
; X86-LABEL: test5:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: andb $15, %cl
; X86-NEXT: shldw %cl, %dx, %ax
; X86-NEXT: retl
;
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andb $15, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shldw %cl, %si, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
define i16 @test7(i16 %A, i16 %B, i8 %C) nounwind {
; X86-LABEL: test7:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: andb $15, %cl
; X86-NEXT: shrdw %cl, %dx, %ax
; X86-NEXT: retl
;
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andb $15, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrdw %cl, %si, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax