// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
// to generate a NEG instead of a SUB of a constant.
} else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
- Add0C->getAPIntValue() != 0 &&
- Add0C->getAPIntValue().urem(Size) == 0) {
+ Add0C->getZExtValue() != 0) {
+ EVT SubVT = ShiftAmt.getValueType();
+ SDValue X;
+ if (Add0C->getZExtValue() % Size == 0)
+ X = Add1;
+ else if (ShiftAmt.hasOneUse() && Size == 64 &&
+ Add0C->getZExtValue() % 32 == 0) {
+ // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
+ // This is mainly beneficial if we already compute (x+n*32).
+ if (Add1.getOpcode() == ISD::TRUNCATE) {
+ Add1 = Add1.getOperand(0);
+ SubVT = Add1.getValueType();
+ }
+ X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1,
+ CurDAG->getZExtOrTrunc(Add0, DL, SubVT));
+ insertDAGNode(*CurDAG, OrigShiftAmt, X);
+ } else
+ return false;
// Insert a negate op.
// TODO: This isn't guaranteed to replace the sub if there is a logic cone
// that uses it that's not a shift.
- EVT SubVT = ShiftAmt.getValueType();
SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
- SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
+ SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
NewShiftAmt = Neg;
// Insert these operands into a valid topological order so they can
; X64-NOBMI2-LABEL: t0:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movq %rdi, %rax
-; X64-NOBMI2-NEXT: movb $32, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx
+; X64-NOBMI2-NEXT: negq %rcx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: t0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movb $32, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
+; X64-BMI2-NEXT: addq $32, %rsi
+; X64-BMI2-NEXT: negq %rsi
+; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: retq
;
; X32-NOBMI2-LABEL: t0:
; X64-NOBMI2-LABEL: t4:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movq %rdi, %rax
-; X64-NOBMI2-NEXT: movb $96, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: leaq 96(%rsi), %rcx
+; X64-NOBMI2-NEXT: negq %rcx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: t4:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movb $96, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
+; X64-BMI2-NEXT: addq $96, %rsi
+; X64-BMI2-NEXT: negq %rsi
+; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: retq
;
; X32-NOBMI2-LABEL: t4:
define i64 @t5_cse(i64 %val, i64 %shamt, i64*%dst) nounwind {
; X64-NOBMI2-LABEL: t5_cse:
; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: movq %rdi, %rax
-; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx
+; X64-NOBMI2-NEXT: addq $32, %rcx
; X64-NOBMI2-NEXT: movq %rcx, (%rdx)
-; X64-NOBMI2-NEXT: movb $32, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: negq %rcx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: t5_cse:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: leaq 32(%rsi), %rax
-; X64-BMI2-NEXT: movq %rax, (%rdx)
-; X64-BMI2-NEXT: movb $32, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
+; X64-BMI2-NEXT: addq $32, %rsi
+; X64-BMI2-NEXT: movq %rsi, (%rdx)
+; X64-BMI2-NEXT: negq %rsi
+; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: retq
;
; X32-NOBMI2-LABEL: t5_cse: