return SDValue();
}
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ SDValue X, SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
+ if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+ return SDValue();
+ if (Carry1.getOpcode() != ISD::UADDO)
+ return SDValue();
+
+ SDValue Z;
+
+ /**
+ * First look for a suitable Z. It will present itself in the form of
+ * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ */
+ if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(Carry0.getOperand(1))) {
+ Z = Carry0.getOperand(2);
+ } else if (Carry0.getOpcode() == ISD::UADDO &&
+ isOneConstant(Carry0.getOperand(1))) {
+ EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+ } else {
+ // We couldn't find a suitable Z.
+ return SDValue();
+ }
+
+
+ auto cancelDiamond = [&](SDValue A,SDValue B) {
+ SDLoc DL(N);
+ SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ Combiner.AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ DAG.getConstant(0, DL, X.getValueType()),
+ NewY.getValue(1));
+ };
+
+ /**
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (addcarry *, 0, Z)
+ */
+ if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+ }
+
+ /**
+ * (addcarry A, 0, Z)
+ * |
+ * Sum
+ * |
+ * (uaddo *, B)
+ */
+ if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+ }
+
+ if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// Iff the flag result is dead:
* When one of the addcarry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
- *
- * We are trying to get:
- * (addcarry X, 0, (addcarry A, B, Z):Carry)
*/
if (auto Y = getAsCarry(TLI, N1)) {
- /**
- * (uaddo A, B)
- * / \
- * Carry Sum
- * | \
- * | (addcarry *, 0, Z)
- * | /
- * \ Carry
- * | /
- * (addcarry X, *, *)
- */
- if (Y.getOpcode() == ISD::UADDO &&
- CarryIn.getResNo() == 1 &&
- CarryIn.getOpcode() == ISD::ADDCARRY &&
- isNullConstant(CarryIn.getOperand(1)) &&
- CarryIn.getOperand(0) == Y.getValue(0)) {
- auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
- Y.getOperand(0), Y.getOperand(1),
- CarryIn.getOperand(2));
- AddToWorklist(NewY.getNode());
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
- DAG.getConstant(0, SDLoc(N), N0.getValueType()),
- NewY.getValue(1));
- }
+ // Because both are carries, Y and Z can be swapped.
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ return R;
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ return R;
}
return SDValue();
define i128 @addcarry_to_subcarry(i64 %a, i64 %b) {
; CHECK-LABEL: addcarry_to_subcarry:
; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: notq %rsi
-; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movb $1, %cl
+; CHECK-NEXT: addb $-1, %cl
; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: addq %rsi, %rcx
-; CHECK-NEXT: setb %al
-; CHECK-NEXT: addq $1, %rcx
-; CHECK-NEXT: adcq %rdi, %rax
+; CHECK-NEXT: adcq %rsi, %rcx
+; CHECK-NEXT: adcq $0, %rax
; CHECK-NEXT: setb %cl
; CHECK-NEXT: movzbl %cl, %edx
; CHECK-NEXT: addq %rsi, %rax
define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
; CHECK-LABEL: sub:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq (%rsi), %r10
+; CHECK-NEXT: movq 8(%rsi), %rdi
+; CHECK-NEXT: movq %r10, %r11
+; CHECK-NEXT: subq %rdx, %r11
; CHECK-NEXT: notq %rdx
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: addq (%rsi), %rdx
-; CHECK-NEXT: setb %dil
-; CHECK-NEXT: addq $1, %rdx
-; CHECK-NEXT: adcq 8(%rsi), %rdi
-; CHECK-NEXT: setb %r10b
-; CHECK-NEXT: movzbl %r10b, %r10d
+; CHECK-NEXT: movb $1, %bl
+; CHECK-NEXT: addb $-1, %bl
+; CHECK-NEXT: adcq %r10, %rdx
+; CHECK-NEXT: adcq $0, %rdi
+; CHECK-NEXT: setb %dl
+; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: notq %rcx
; CHECK-NEXT: addq %rdi, %rcx
-; CHECK-NEXT: adcq 16(%rsi), %r10
-; CHECK-NEXT: setb %dil
-; CHECK-NEXT: movzbl %dil, %edi
+; CHECK-NEXT: adcq 16(%rsi), %rdx
+; CHECK-NEXT: setb %bl
+; CHECK-NEXT: movzbl %bl, %edi
; CHECK-NEXT: notq %r8
-; CHECK-NEXT: addq %r10, %r8
+; CHECK-NEXT: addq %rdx, %r8
; CHECK-NEXT: adcq 24(%rsi), %rdi
; CHECK-NEXT: notq %r9
; CHECK-NEXT: addq %rdi, %r9
-; CHECK-NEXT: movq %rdx, (%rax)
+; CHECK-NEXT: movq %r11, (%rax)
; CHECK-NEXT: movq %rcx, 8(%rax)
; CHECK-NEXT: movq %r8, 16(%rax)
; CHECK-NEXT: movq %r9, 24(%rax)
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%0 = extractvalue %S %arg.b, 0