!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
return false;
- // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
SDValue Z = Node->getOperand(2);
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Z.getValueType();
- SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
- SDValue Zero = DAG.getConstant(0, DL, ShVT);
-
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
SDValue ShAmt;
if (isPowerOf2_32(EltSizeInBits)) {
- SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+ // Z % BW -> Z & (BW - 1)
ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
} else {
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
}
+ SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
- SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
- SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
-
- // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
- // and that is undefined. We must compare and select to avoid UB.
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
-
- // For fshl, 0-shift returns the 1st arg (X).
- // For fshr, 0-shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
- Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ SDValue ShX, ShY;
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
+ SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
+ }
+ Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
return true;
}
;
; X86-SLOW-LABEL: var_shift_i16:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %edi
-; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-SLOW-NEXT: andb $15, %dl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edi
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: movb $16, %cl
-; X86-SLOW-NEXT: subb %dl, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb %dl, %dl
-; X86-SLOW-NEXT: je .LBB1_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: .LBB1_2:
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: andb $15, %cl
+; X86-SLOW-NEXT: shll %cl, %edx
+; X86-SLOW-NEXT: shrl %eax
+; X86-SLOW-NEXT: xorb $15, %cl
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: orl %edx, %eax
; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-SLOW-NEXT: popl %esi
-; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i16:
;
; X64-SLOW-LABEL: var_shift_i16:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movzwl %si, %eax
-; X64-SLOW-NEXT: andb $15, %dl
-; X64-SLOW-NEXT: movl %edi, %esi
; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: shll %cl, %esi
-; X64-SLOW-NEXT: movb $16, %cl
-; X64-SLOW-NEXT: subb %dl, %cl
+; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: andb $15, %cl
+; X64-SLOW-NEXT: shll %cl, %edi
+; X64-SLOW-NEXT: xorb $15, %cl
+; X64-SLOW-NEXT: shrl %eax
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-SLOW-NEXT: shrl %cl, %eax
-; X64-SLOW-NEXT: orl %esi, %eax
-; X64-SLOW-NEXT: testb %dl, %dl
-; X64-SLOW-NEXT: cmovel %edi, %eax
+; X64-SLOW-NEXT: orl %edi, %eax
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
;
; X86-SLOW-LABEL: var_shift_i32:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %edi
-; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edi
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: andb $31, %dl
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb %dl, %dl
-; X86-SLOW-NEXT: je .LBB2_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: .LBB2_2:
-; X86-SLOW-NEXT: popl %esi
-; X86-SLOW-NEXT: popl %edi
+; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: shll %cl, %edx
+; X86-SLOW-NEXT: shrl %eax
+; X86-SLOW-NEXT: andb $31, %cl
+; X86-SLOW-NEXT: xorb $31, %cl
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: orl %edx, %eax
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i32:
;
; X64-SLOW-LABEL: var_shift_i32:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movl %esi, %eax
-; X64-SLOW-NEXT: movl %edi, %esi
; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: shll %cl, %esi
-; X64-SLOW-NEXT: andb $31, %dl
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: negb %cl
+; X64-SLOW-NEXT: movl %esi, %eax
+; X64-SLOW-NEXT: shll %cl, %edi
+; X64-SLOW-NEXT: shrl %eax
+; X64-SLOW-NEXT: andb $31, %cl
+; X64-SLOW-NEXT: xorb $31, %cl
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-SLOW-NEXT: shrl %cl, %eax
-; X64-SLOW-NEXT: orl %esi, %eax
-; X64-SLOW-NEXT: testb %dl, %dl
-; X64-SLOW-NEXT: cmovel %edi, %eax
+; X64-SLOW-NEXT: orl %edi, %eax
; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: subl $8, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: andl $63, %ebx
-; X86-SLOW-NEXT: movb $64, %dh
-; X86-SLOW-NEXT: subb %bl, %dh
-; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movb %dh, %cl
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movb %dh, %dl
-; X86-SLOW-NEXT: andb $31, %dl
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: testb %dl, %dl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: je .LBB5_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %eax, %ebp
-; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB5_2:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl %ebp, %eax
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: movb %bl, %ch
-; X86-SLOW-NEXT: andb $31, %ch
+; X86-SLOW-NEXT: movb $64, %ch
+; X86-SLOW-NEXT: subb %bl, %ch
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: negb %cl
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: andb $31, %cl
+; X86-SLOW-NEXT: xorb $31, %cl
+; X86-SLOW-NEXT: addl %eax, %eax
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: movb %bl, %cl
+; X86-SLOW-NEXT: shll %cl, %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: shrl %edi
+; X86-SLOW-NEXT: andb $31, %cl
+; X86-SLOW-NEXT: xorb $31, %cl
; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: testb %ch, %ch
-; X86-SLOW-NEXT: je .LBB5_4
-; X86-SLOW-NEXT: # %bb.3:
-; X86-SLOW-NEXT: orl %edi, %eax
-; X86-SLOW-NEXT: movl %eax, %ebp
-; X86-SLOW-NEXT: .LBB5_4:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edi
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: movb %bl, %cl
+; X86-SLOW-NEXT: shll %cl, %esi
; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: je .LBB5_6
+; X86-SLOW-NEXT: jne .LBB5_1
+; X86-SLOW-NEXT: # %bb.2:
+; X86-SLOW-NEXT: orl %edi, %ebp
+; X86-SLOW-NEXT: jmp .LBB5_3
+; X86-SLOW-NEXT: .LBB5_1:
+; X86-SLOW-NEXT: movl %esi, %ebp
+; X86-SLOW-NEXT: xorl %esi, %esi
+; X86-SLOW-NEXT: .LBB5_3:
+; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: testb $32, %ch
+; X86-SLOW-NEXT: jne .LBB5_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %edi, %ebp
+; X86-SLOW-NEXT: orl %edx, %eax
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: jmp .LBB5_6
+; X86-SLOW-NEXT: .LBB5_4:
+; X86-SLOW-NEXT: movl %edi, %ecx
; X86-SLOW-NEXT: xorl %edi, %edi
; X86-SLOW-NEXT: .LBB5_6:
-; X86-SLOW-NEXT: movb %dh, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dh
-; X86-SLOW-NEXT: jne .LBB5_7
-; X86-SLOW-NEXT: # %bb.8:
-; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X86-SLOW-NEXT: testl %ebx, %ebx
-; X86-SLOW-NEXT: jne .LBB5_10
-; X86-SLOW-NEXT: jmp .LBB5_11
-; X86-SLOW-NEXT: .LBB5_7:
-; X86-SLOW-NEXT: movl %esi, %ecx
-; X86-SLOW-NEXT: xorl %esi, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: testl %ebx, %ebx
-; X86-SLOW-NEXT: je .LBB5_11
-; X86-SLOW-NEXT: .LBB5_10:
-; X86-SLOW-NEXT: orl %esi, %ebp
-; X86-SLOW-NEXT: orl %ecx, %edi
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: .LBB5_11:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-SLOW-NEXT: addl $8, %esp
+; X86-SLOW-NEXT: je .LBB5_8
+; X86-SLOW-NEXT: # %bb.7:
+; X86-SLOW-NEXT: orl %edi, %ebp
+; X86-SLOW-NEXT: orl %ecx, %esi
+; X86-SLOW-NEXT: movl %ebp, %edx
+; X86-SLOW-NEXT: movl %esi, %eax
+; X86-SLOW-NEXT: .LBB5_8:
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
;
; X64-SLOW-LABEL: var_shift_i64:
; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movq %rdx, %rcx
; X64-SLOW-NEXT: movq %rsi, %rax
-; X64-SLOW-NEXT: movq %rdi, %rsi
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: shlq %cl, %rsi
-; X64-SLOW-NEXT: andb $63, %dl
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: negb %cl
+; X64-SLOW-NEXT: shlq %cl, %rdi
+; X64-SLOW-NEXT: shrq %rax
+; X64-SLOW-NEXT: andb $63, %cl
+; X64-SLOW-NEXT: xorb $63, %cl
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-SLOW-NEXT: shrq %cl, %rax
-; X64-SLOW-NEXT: orq %rsi, %rax
-; X64-SLOW-NEXT: testb %dl, %dl
-; X64-SLOW-NEXT: cmoveq %rdi, %rax
+; X64-SLOW-NEXT: orq %rdi, %rax
; X64-SLOW-NEXT: retq
%tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
ret i64 %tmp
;
; X86-SLOW-LABEL: var_shift_i16:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %edi
-; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-SLOW-NEXT: andb $15, %dl
-; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edi
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movb $16, %cl
-; X86-SLOW-NEXT: subb %dl, %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb %dl, %dl
-; X86-SLOW-NEXT: je .LBB1_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl %esi, %eax
-; X86-SLOW-NEXT: .LBB1_2:
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: andb $15, %cl
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: addl %eax, %eax
+; X86-SLOW-NEXT: xorb $15, %cl
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: orl %edx, %eax
; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-SLOW-NEXT: popl %esi
-; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i16:
;
; X64-SLOW-LABEL: var_shift_i16:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movzwl %si, %eax
-; X64-SLOW-NEXT: andb $15, %dl
; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: shrl %cl, %eax
-; X64-SLOW-NEXT: movb $16, %cl
-; X64-SLOW-NEXT: subb %dl, %cl
-; X64-SLOW-NEXT: shll %cl, %edi
-; X64-SLOW-NEXT: orl %edi, %eax
-; X64-SLOW-NEXT: testb %dl, %dl
-; X64-SLOW-NEXT: cmovel %esi, %eax
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT: movzwl %si, %edx
+; X64-SLOW-NEXT: andb $15, %cl
+; X64-SLOW-NEXT: shrl %cl, %edx
+; X64-SLOW-NEXT: leal (%rdi,%rdi), %eax
+; X64-SLOW-NEXT: xorb $15, %cl
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shll %cl, %eax
+; X64-SLOW-NEXT: orl %edx, %eax
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
;
; X86-SLOW-LABEL: var_shift_i32:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %edi
-; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edi
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: andb $31, %dl
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb %dl, %dl
-; X86-SLOW-NEXT: je .LBB2_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl %esi, %eax
-; X86-SLOW-NEXT: .LBB2_2:
-; X86-SLOW-NEXT: popl %esi
-; X86-SLOW-NEXT: popl %edi
+; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: addl %eax, %eax
+; X86-SLOW-NEXT: andb $31, %cl
+; X86-SLOW-NEXT: xorb $31, %cl
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: orl %edx, %eax
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i32:
;
; X64-SLOW-LABEL: var_shift_i32:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movl %edi, %eax
-; X64-SLOW-NEXT: movl %esi, %edi
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: shrl %cl, %edi
-; X64-SLOW-NEXT: andb $31, %dl
; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: negb %cl
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT: shrl %cl, %esi
+; X64-SLOW-NEXT: leal (%rdi,%rdi), %eax
+; X64-SLOW-NEXT: andb $31, %cl
+; X64-SLOW-NEXT: xorb $31, %cl
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-SLOW-NEXT: shll %cl, %eax
-; X64-SLOW-NEXT: orl %edi, %eax
-; X64-SLOW-NEXT: testb %dl, %dl
-; X64-SLOW-NEXT: cmovel %esi, %eax
+; X64-SLOW-NEXT: orl %esi, %eax
; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: subl $8, %esp
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: andl $63, %ebx
-; X86-SLOW-NEXT: movb $64, %al
-; X86-SLOW-NEXT: subb %bl, %al
-; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: movb %al, %ch
-; X86-SLOW-NEXT: andb $31, %ch
+; X86-SLOW-NEXT: movb $64, %ch
+; X86-SLOW-NEXT: subb %bl, %ch
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: testb %ch, %ch
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: je .LBB5_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %edi, %edx
-; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB5_2:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: movb %bl, %ah
-; X86-SLOW-NEXT: andb $31, %ah
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %ebp, %edi
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: testb %ah, %ah
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: je .LBB5_4
-; X86-SLOW-NEXT: # %bb.3:
-; X86-SLOW-NEXT: orl %edx, %edi
-; X86-SLOW-NEXT: movl %edi, %ebp
-; X86-SLOW-NEXT: .LBB5_4:
+; X86-SLOW-NEXT: shll %cl, %edx
+; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %esi, %edx
+; X86-SLOW-NEXT: andb $31, %cl
+; X86-SLOW-NEXT: xorb $31, %cl
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: movb %bl, %cl
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: andb $31, %cl
+; X86-SLOW-NEXT: xorb $31, %cl
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: leal (%edi,%edi), %ebp
+; X86-SLOW-NEXT: shll %cl, %ebp
+; X86-SLOW-NEXT: movb %bl, %cl
; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: je .LBB5_6
-; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: jne .LBB5_1
+; X86-SLOW-NEXT: # %bb.2:
+; X86-SLOW-NEXT: orl %eax, %ebp
+; X86-SLOW-NEXT: jmp .LBB5_3
+; X86-SLOW-NEXT: .LBB5_1:
; X86-SLOW-NEXT: movl %edi, %ebp
; X86-SLOW-NEXT: xorl %edi, %edi
+; X86-SLOW-NEXT: .LBB5_3:
+; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: shll %cl, %edx
+; X86-SLOW-NEXT: testb $32, %ch
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: jne .LBB5_4
+; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: orl %esi, %ecx
+; X86-SLOW-NEXT: jmp .LBB5_6
+; X86-SLOW-NEXT: .LBB5_4:
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: movl $0, (%esp) # 4-byte Folded Spill
; X86-SLOW-NEXT: .LBB5_6:
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: jne .LBB5_7
-; X86-SLOW-NEXT: # %bb.8:
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-SLOW-NEXT: testl %ebx, %ebx
-; X86-SLOW-NEXT: jne .LBB5_10
-; X86-SLOW-NEXT: jmp .LBB5_11
-; X86-SLOW-NEXT: .LBB5_7:
-; X86-SLOW-NEXT: movl %esi, %eax
-; X86-SLOW-NEXT: xorl %esi, %esi
; X86-SLOW-NEXT: testl %ebx, %ebx
-; X86-SLOW-NEXT: je .LBB5_11
-; X86-SLOW-NEXT: .LBB5_10:
-; X86-SLOW-NEXT: orl %ebp, %esi
-; X86-SLOW-NEXT: orl %edi, %eax
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: .LBB5_11:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SLOW-NEXT: je .LBB5_8
+; X86-SLOW-NEXT: # %bb.7:
+; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: orl %ebp, %eax
+; X86-SLOW-NEXT: orl %edi, %ecx
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: .LBB5_8:
; X86-SLOW-NEXT: addl $8, %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
;
; X64-SLOW-LABEL: var_shift_i64:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movq %rdi, %rax
-; X64-SLOW-NEXT: movq %rsi, %rdi
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: shrq %cl, %rdi
-; X64-SLOW-NEXT: andb $63, %dl
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: negb %cl
+; X64-SLOW-NEXT: movq %rdx, %rcx
+; X64-SLOW-NEXT: shrq %cl, %rsi
+; X64-SLOW-NEXT: leaq (%rdi,%rdi), %rax
+; X64-SLOW-NEXT: andb $63, %cl
+; X64-SLOW-NEXT: xorb $63, %cl
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-SLOW-NEXT: shlq %cl, %rax
-; X64-SLOW-NEXT: orq %rdi, %rax
-; X64-SLOW-NEXT: testb %dl, %dl
-; X64-SLOW-NEXT: cmoveq %rsi, %rax
+; X64-SLOW-NEXT: orq %rsi, %rax
; X64-SLOW-NEXT: retq
%tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
ret i64 %tmp