}
bool foldLoadStoreIntoMemOperand(SDNode *Node);
- bool matchBEXTRFromAnd(SDNode *Node);
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);
return true;
}
-// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
-bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
- MVT NVT = Node->getSimpleValueType(0);
- SDLoc dl(Node);
-
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
-
- // If we have TBM we can use an immediate for the control. If we have BMI
- // we should only do this if the BEXTR instruction is implemented well.
- // Otherwise moving the control into a register makes this more costly.
- // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
- // hoisting the move immediate would make it worthwhile with a less optimal
- // BEXTR?
- if (!Subtarget->hasTBM() &&
- !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
- return false;
-
- // Must have a shift right.
- if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
- return false;
-
- // Shift can't have additional users.
- if (!N0->hasOneUse())
- return false;
-
- // Only supported for 32 and 64 bits.
- if (NVT != MVT::i32 && NVT != MVT::i64)
- return false;
-
- // Shift amount and RHS of and must be constant.
- ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
- if (!MaskCst || !ShiftCst)
- return false;
-
- // And RHS must be a mask.
- uint64_t Mask = MaskCst->getZExtValue();
- if (!isMask_64(Mask))
- return false;
-
- uint64_t Shift = ShiftCst->getZExtValue();
- uint64_t MaskSize = countPopulation(Mask);
-
- // Don't interfere with something that can be handled by extracting AH.
- // TODO: If we are able to fold a load, BEXTR might still be better than AH.
- if (Shift == 8 && MaskSize == 8)
- return false;
-
- // Make sure we are only using bits that were in the original value, not
- // shifted in.
- if (Shift + MaskSize > NVT.getSizeInBits())
- return false;
-
- // Create a BEXTR node and run it through selection.
- SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
- SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
- N0->getOperand(0), C);
- ReplaceNode(Node, New.getNode());
- SelectCode(New.getNode());
- return true;
-}
-
// Emit a PCMISTR(I/M) instruction.
MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
bool MayFoldLoad, const SDLoc &dl,
break;
case ISD::AND:
- if (matchBEXTRFromAnd(Node))
- return;
if (AndImmShrink && shrinkAndImmediate(Node))
return;
return SDValue();
}
+static bool hasBEXTR(const X86Subtarget &Subtarget, EVT VT) {
+ // If we have TBM we can use an immediate for the control. If we have BMI
+ // we should only do this if the BEXTR instruction is implemented well.
+ // Otherwise moving the control into a register makes this more costly.
+ // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+ // hoisting the move immediate would make it worthwhile with a less optimal
+ // BEXTR?
+ if (!Subtarget.hasTBM() && !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
+ return false;
+ return (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
+}
+
+// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
+static SDValue combineAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT NVT = Node->getValueType(0);
+ SDLoc dl(Node);
+
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ // Check if subtarget has BEXTR instruction for the node's type
+ if (!hasBEXTR(Subtarget, NVT))
+ return SDValue();
+
+ // Must have a shift right.
+ if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
+ return SDValue();
+
+ // Shift can't have additional users.
+ if (!N0->hasOneUse())
+ return SDValue();
+
+ // Shift amount and RHS of and must be constant.
+ ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!MaskCst || !ShiftCst)
+ return SDValue();
+
+ // And RHS must be a mask.
+ uint64_t Mask = MaskCst->getZExtValue();
+ if (!isMask_64(Mask))
+ return SDValue();
+
+ uint64_t Shift = ShiftCst->getZExtValue();
+ uint64_t MaskSize = countPopulation(Mask);
+
+ // Don't interfere with something that can be handled by extracting AH.
+ // TODO: If we are able to fold a load, BEXTR might still be better than AH.
+ if (Shift == 8 && MaskSize == 8)
+ return SDValue();
+
+ // Make sure we are only using bits that were in the original value, not
+ // shifted in.
+ if (Shift + MaskSize > NVT.getSizeInBits())
+ return SDValue();
+
+ // Create a BEXTR node.
+ SDValue C = DAG.getConstant(Shift | (MaskSize << 8), dl, NVT);
+ SDValue New = DAG.getNode(X86ISD::BEXTR, dl, NVT, N0->getOperand(0), C);
+ return New;
+}
+
// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
// Turn it into series of XORs and a setnp.
static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ if (SDValue R = combineAndIntoBEXTR(N, DAG, Subtarget))
+ return R;
+
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: shrl $4, %edi
-; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF
+; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = lshr i32 %a, 4
; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrl $4, %edi
-; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF
+; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = lshr i64 %a, 4