}
unsigned NextOp = N0.getOpcode();
- // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+
+ // fold (rot* (rot* x, c2), c1)
+ // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
EVT ShiftVT = C1->getValueType(0);
bool SameSide = (N->getOpcode() == NextOp);
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
- if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
- CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
- SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
- SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
- ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
- return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
- CombinedShiftNorm);
- }
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N1, BitsizeC});
+ SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N0.getOperand(1), BitsizeC});
+ if (Norm1 && Norm2)
+ if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+ CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
+ }
}
}
return SDValue();
ret i32 %4
}
-; FIXME: Failure to modulo the inner rotation before adding the results
+; Ensure we normalize the inner rotation before adding the results.
define i5 @rotl_merge_i5(i5 %x) {
; CHECK-LABEL: rotl_merge_i5:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: leal (,%rdi,4), %ecx
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: andb $24, %al
+; CHECK-NEXT: shrb $3, %al
+; CHECK-NEXT: orb %cl, %al
; CHECK-NEXT: retq
%r1 = call i5 @llvm.fshl.i5(i5 %x, i5 %x, i5 -1)
%r2 = call i5 @llvm.fshl.i5(i5 %r1, i5 %r1, i5 1)