From faa35fc873700b6d3e61e652c887b82d1cea7c0c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 3 May 2022 17:16:17 +0100 Subject: [PATCH] [DAG] Fix issue with rot(rot(x,c1),c2) -> rot(x,c1+c2) fold with unnormalized rotation amounts Don't assume the rotation amounts have been correctly normalized - do it as part of the constant folding. Also, the normalization should be performed with UREM not SREM. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 ++++++++++++++++--------- llvm/test/CodeGen/X86/combine-rotates.ll | 8 ++++++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e483c3a..679466b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8746,7 +8746,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } unsigned NextOp = N0.getOpcode(); - // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + + // fold (rot* (rot* x, c2), c1) + // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); @@ -8754,14 +8756,19 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { EVT ShiftVT = C1->getValueType(0); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; - if (SDValue CombinedShift = DAG.FoldConstantArithmetic( - CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) { - SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); - SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( - ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC}); - return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), - CombinedShiftNorm); - } + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); + SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, + {N1, BitsizeC}); + SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, + {N0.getOperand(1), BitsizeC}); + if (Norm1 && Norm2) + if (SDValue CombinedShift = DAG.FoldConstantArithmetic( + CombineOp, dl, ShiftVT, {Norm1, Norm2})) { + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC}); + return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), + CombinedShiftNorm); + } } } return SDValue(); diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll index ba775fc..dee22d1 100644 --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -435,12 +435,16 @@ define i32 @fuzz9935() { ret i32 %4 } -; FIXME: Failure to modulo the inner rotation before adding the results +; Ensure we normalize the inner rotation before adding the results. define i5 @rotl_merge_i5(i5 %x) { ; CHECK-LABEL: rotl_merge_i5: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (,%rdi,4), %ecx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: andb $24, %al +; CHECK-NEXT: shrb $3, %al +; CHECK-NEXT: orb %cl, %al ; CHECK-NEXT: retq %r1 = call i5 @llvm.fshl.i5(i5 %x, i5 %x, i5 -1) %r2 = call i5 @llvm.fshl.i5(i5 %r1, i5 %r1, i5 1) -- 2.7.4